日別アーカイブ: 2021年11月10日

jetson nanoでcv2(cuda有効化)

ソースからインストール

python3でcv2をimportすると crashしてコアーダンプ。

問題の解決:export OPENBLAS_CORETYPE=ARMV8

こちらを参照して解決。Jetson NanoのPython3環境でIllegal instruction (cpre dumped)

処理速度の比較測定:約3倍

$ sudo nvpmodel -m 0
$ sudo jetson_clocks
$ python3 opencv_cuda.py
CPU = 2.7655137538909913[msec]
GPU = 1.0501614570617677[msec]
1
$ python3 opencv_cuda.py
CPU = 2.7816075325012206[msec]
GPU = 0.9869620561599731[msec]
1

opencv_cuda.py

import sys
import time
import cv2

### VALUES
NUM_REPEAT = 10000

### Read source image
img_src = cv2.imread("resource/lena.jpg")
cv2.imshow('img_src', img_src)


### Run with CPU
time_start = time.time()
for i in range (NUM_REPEAT):
    img_dst = cv2.resize(img_src, (300, 300))
time_end = time.time()
print ("CPU = {0}".format((time_end - time_start) * 1000 / NUM_REPEAT) + "[msec]")
cv2.imshow('CPU', img_dst)


### Run with GPU
img_gpu_src = cv2.cuda_GpuMat() # Allocate device memory only once, as memory allocation seems to take time...
img_gpu_dst = cv2.cuda_GpuMat()
time_start = time.time()
for i in range (NUM_REPEAT):
    img_gpu_src.upload(img_src)
    img_gpu_dst = cv2.cuda.resize(img_gpu_src, (300, 300))
    img_dst = img_gpu_dst.download()
time_end = time.time()
print ("GPU = {0}".format((time_end - time_start) * 1000 / NUM_REPEAT) + "[msec]")
cv2.imshow('GPU', img_dst)


key = cv2.waitKey(0)
cv2.destroyAllWindows()

print(cv2.cuda.getCudaEnabledDeviceCount())

cudaで利用できる機能を表示してみる

import cv2
cv2.__version__
dir(cv2.cuda)
['ALPHA_ATOP',
 'ALPHA_ATOP_PREMUL',
 'ALPHA_IN',
 'ALPHA_IN_PREMUL',
 'ALPHA_OUT',
 'ALPHA_OUT_PREMUL',
 'ALPHA_OVER',
 'ALPHA_OVER_PREMUL',
 'ALPHA_PLUS',
 'ALPHA_PLUS_PREMUL',
 'ALPHA_PREMUL',
 'ALPHA_XOR',
 'ALPHA_XOR_PREMUL',
 'BroxOpticalFlow_create',
 'COLOR_BAYER_BG2BGR_MHT',
 'COLOR_BAYER_BG2GRAY_MHT',
 'COLOR_BAYER_BG2RGB_MHT',
 'COLOR_BAYER_GB2BGR_MHT',
 'COLOR_BAYER_GB2GRAY_MHT',
 'COLOR_BAYER_GB2RGB_MHT',
 'COLOR_BAYER_GR2BGR_MHT',
 'COLOR_BAYER_GR2GRAY_MHT',
 'COLOR_BAYER_GR2RGB_MHT',
 'COLOR_BAYER_RG2BGR_MHT',
 'COLOR_BAYER_RG2GRAY_MHT',
 'COLOR_BAYER_RG2RGB_MHT',
 'COLOR_BayerBG2BGR_MHT',
 'COLOR_BayerBG2GRAY_MHT',
 'COLOR_BayerBG2RGB_MHT',
 'COLOR_BayerGB2BGR_MHT',
 'COLOR_BayerGB2GRAY_MHT',
 'COLOR_BayerGB2RGB_MHT',
 'COLOR_BayerGR2BGR_MHT',
 'COLOR_BayerGR2GRAY_MHT',
 'COLOR_BayerGR2RGB_MHT',
 'COLOR_BayerRG2BGR_MHT',
 'COLOR_BayerRG2GRAY_MHT',
 'COLOR_BayerRG2RGB_MHT',
 'CascadeClassifier_create',
 'DEVICE_INFO_COMPUTE_MODE_DEFAULT',
 'DEVICE_INFO_COMPUTE_MODE_EXCLUSIVE',
 'DEVICE_INFO_COMPUTE_MODE_EXCLUSIVE_PROCESS',
 'DEVICE_INFO_COMPUTE_MODE_PROHIBITED',
 'DYNAMIC_PARALLELISM',
 'DensePyrLKOpticalFlow_create',
 'DescriptorMatcher_createBFMatcher',
 'DeviceInfo_ComputeModeDefault',
 'DeviceInfo_ComputeModeExclusive',
 'DeviceInfo_ComputeModeExclusiveProcess',
 'DeviceInfo_ComputeModeProhibited',
 'EVENT_BLOCKING_SYNC',
 'EVENT_DEFAULT',
 'EVENT_DISABLE_TIMING',
 'EVENT_INTERPROCESS',
 'Event_BLOCKING_SYNC',
 'Event_DEFAULT',
 'Event_DISABLE_TIMING',
 'Event_INTERPROCESS',
 'Event_elapsedTime',
 'FEATURE_SET_COMPUTE_10',
 'FEATURE_SET_COMPUTE_11',
 'FEATURE_SET_COMPUTE_12',
 'FEATURE_SET_COMPUTE_13',
 'FEATURE_SET_COMPUTE_20',
 'FEATURE_SET_COMPUTE_21',
 'FEATURE_SET_COMPUTE_30',
 'FEATURE_SET_COMPUTE_32',
 'FEATURE_SET_COMPUTE_35',
 'FEATURE_SET_COMPUTE_50',
 'FarnebackOpticalFlow_create',
 'FastFeatureDetector_create',
 'GLOBAL_ATOMICS',
 'GpuMat_defaultAllocator',
 'GpuMat_setDefaultAllocator',
 'HOG_create',
 'HOST_MEM_PAGE_LOCKED',
 'HOST_MEM_SHARED',
 'HOST_MEM_WRITE_COMBINED',
 'HostMem_PAGE_LOCKED',
 'HostMem_SHARED',
 'HostMem_WRITE_COMBINED',
 'NATIVE_DOUBLE',
 'NVIDIA_OPTICAL_FLOW_1_0_NV_OF_PERF_LEVEL_FAST',
 'NVIDIA_OPTICAL_FLOW_1_0_NV_OF_PERF_LEVEL_MAX',
 'NVIDIA_OPTICAL_FLOW_1_0_NV_OF_PERF_LEVEL_MEDIUM',
 'NVIDIA_OPTICAL_FLOW_1_0_NV_OF_PERF_LEVEL_SLOW',
 'NVIDIA_OPTICAL_FLOW_1_0_NV_OF_PERF_LEVEL_UNDEFINED',
 'NvidiaOpticalFlow_1_0_NV_OF_PERF_LEVEL_FAST',
 'NvidiaOpticalFlow_1_0_NV_OF_PERF_LEVEL_MAX',
 'NvidiaOpticalFlow_1_0_NV_OF_PERF_LEVEL_MEDIUM',
 'NvidiaOpticalFlow_1_0_NV_OF_PERF_LEVEL_SLOW',
 'NvidiaOpticalFlow_1_0_NV_OF_PERF_LEVEL_UNDEFINED',
 'NvidiaOpticalFlow_1_0_create',
 'ORB_create',
 'OpticalFlowDual_TVL1_create',
 'SHARED_ATOMICS',
 'SURF_CUDA_ANGLE_ROW',
 'SURF_CUDA_HESSIAN_ROW',
 'SURF_CUDA_LAPLACIAN_ROW',
 'SURF_CUDA_OCTAVE_ROW',
 'SURF_CUDA_ROWS_COUNT',
 'SURF_CUDA_SIZE_ROW',
 'SURF_CUDA_X_ROW',
 'SURF_CUDA_Y_ROW',
 'SparsePyrLKOpticalFlow_create',
 'StereoBeliefPropagation_estimateRecommendedParams',
 'StereoConstantSpaceBP_estimateRecommendedParams',
 'Stream_Null',
 'TargetArchs_has',
 'TargetArchs_hasBin',
 'TargetArchs_hasEqualOrGreater',
 'TargetArchs_hasEqualOrGreaterBin',
 'TargetArchs_hasEqualOrGreaterPtx',
 'TargetArchs_hasEqualOrLessPtx',
 'TargetArchs_hasPtx',
 'WARP_SHUFFLE_FUNCTIONS',
 '__doc__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'abs',
 'absSum',
 'absdiff',
 'add',
 'addWeighted',
 'alphaComp',
 'bilateralFilter',
 'bitwise_and',
 'bitwise_not',
 'bitwise_or',
 'bitwise_xor',
 'blendLinear',
 'buildWarpAffineMaps',
 'buildWarpPerspectiveMaps',
 'calcAbsSum',
 'calcHist',
 'calcNorm',
 'calcNormDiff',
 'calcSqrSum',
 'calcSum',
 'cartToPolar',
 'compare',
 'copyMakeBorder',
 'countNonZero',
 'createBackgroundSubtractorMOG',
 'createBackgroundSubtractorMOG2',
 'createBoxFilter',
 'createBoxMaxFilter',
 'createBoxMinFilter',
 'createCLAHE',
 'createCannyEdgeDetector',
 'createColumnSumFilter',
 'createContinuous',
 'createConvolution',
 'createDFT',
 'createDerivFilter',
 'createDisparityBilateralFilter',
 'createGaussianFilter',
 'createGeneralizedHoughBallard',
 'createGeneralizedHoughGuil',
 'createGoodFeaturesToTrackDetector',
 'createHarrisCorner',
 'createHoughCirclesDetector',
 'createHoughLinesDetector',
 'createHoughSegmentDetector',
 'createLaplacianFilter',
 'createLinearFilter',
 'createLookUpTable',
 'createMedianFilter',
 'createMinEigenValCorner',
 'createMorphologyFilter',
 'createRowSumFilter',
 'createScharrFilter',
 'createSeparableLinearFilter',
 'createSobelFilter',
 'createStereoBM',
 'createStereoBeliefPropagation',
 'createStereoConstantSpaceBP',
 'createTemplateMatching',
 'cvtColor',
 'demosaicing',
 'dft',
 'divide',
 'drawColorDisp',
 'ensureSizeIsEnough',
 'equalizeHist',
 'evenLevels',
 'exp',
 'findMinMax',
 'findMinMaxLoc',
 'flip',
 'gammaCorrection',
 'gemm',
 'getCudaEnabledDeviceCount',
 'getDevice',
 'histEven',
 'histRange',
 'integral',
 'log',
 'magnitude',
 'magnitudeSqr',
 'max',
 'meanShiftFiltering',
 'meanShiftProc',
 'meanShiftSegmentation',
 'meanStdDev',
 'merge',
 'min',
 'minMax',
 'minMaxLoc',
 'mulAndScaleSpectrums',
 'mulSpectrums',
 'multiply',
 'norm',
 'normalize',
 'phase',
 'polarToCart',
 'pow',
 'printCudaDeviceInfo',
 'printShortCudaDeviceInfo',
 'pyrDown',
 'pyrUp',
 'rectStdDev',
 'reduce',
 'registerPageLocked',
 'remap',
 'reprojectImageTo3D',
 'resetDevice',
 'resize',
 'rotate',
 'setBufferPoolConfig',
 'setBufferPoolUsage',
 'setDevice',
 'split',
 'sqr',
 'sqrIntegral',
 'sqrSum',
 'sqrt',
 'subtract',
 'sum',
 'threshold',
 'transpose',
 'unregisterPageLocked',
 'warpAffine',
 'warpPerspective']