diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 8e4aba31..45d5c859 100644 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -8,9 +8,31 @@ from models import MODELS from utils import METRICS, DATALOADERS +# Check OpenCV version +assert cv.__version__ >= "4.7.0", \ + "Please install latest opencv-python for benchmark: python3 -m pip install --upgrade opencv-python" + +# Valid combinations of backends and targets +backend_target_pairs = [ + [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], + [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], + [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] +] + parser = argparse.ArgumentParser("Benchmarks for OpenCV Zoo.") parser.add_argument('--cfg', '-c', type=str, help='Benchmarking on the given config.') +parser.add_argument('--cfg_overwrite_backend_target', type=int, default=-1, + help='''Choose one of the backend-target pair to run this demo: + others: (default) use the one from config, + {:d}: OpenCV implementation + CPU, + {:d}: CUDA + GPU (CUDA), + {:d}: CUDA + GPU (CUDA FP16), + {:d}: TIM-VX + NPU, + {:d}: CANN + NPU + '''.format(*[x for x in range(len(backend_target_pairs))])) parser.add_argument("--fp32", action="store_true", help="Runs models of float32 precision only.") parser.add_argument("--fp16", action="store_true", help="Runs models of float16 precision only.") parser.add_argument("--int8", action="store_true", help="Runs models of int8 precision only.") @@ -56,6 +78,8 @@ def __init__(self, **kwargs): opencv=cv.dnn.DNN_BACKEND_OPENCV, # vkcom=cv.dnn.DNN_BACKEND_VKCOM, cuda=cv.dnn.DNN_BACKEND_CUDA, + timvx=cv.dnn.DNN_BACKEND_TIMVX, + cann=cv.dnn.DNN_BACKEND_CANN, ) target_id = kwargs.pop('target', 'cpu') @@ -69,28 +93,20 @@ def __init__(self, **kwargs): cuda=cv.dnn.DNN_TARGET_CUDA, cuda_fp16=cv.dnn.DNN_TARGET_CUDA_FP16, # hddl=cv.dnn.DNN_TARGET_HDDL, + npu=cv.dnn.DNN_TARGET_NPU, ) - # add extra backends & targets - try: - available_backends['timvx'] = cv.dnn.DNN_BACKEND_TIMVX - available_targets['npu'] = cv.dnn.DNN_TARGET_NPU - except: - print('OpenCV is not compiled with TIM-VX backend enbaled. See https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more details on how to enable TIM-VX backend.') - try: - available_backends['cann'] = cv.dnn.DNN_BACKEND_CANN - available_targets['npu'] = cv.dnn.DNN_TARGET_NPU - except: - print('OpenCV is not compiled with CANN backend enabled. See https://github.com/opencv/opencv/wiki/Huawei-CANN-Backend for more details on how to enable CANN backend.') - self._backend = available_backends[backend_id] self._target = available_targets[target_id] self._benchmark_results = dict() + def setBackendAndTarget(self, backend_id, target_id): + self._backend = backend_id + self._target = target_id + def run(self, model): - model.setBackend(self._backend) - model.setTarget(self._target) + model.setBackendAndTarget(self._backend, self._target) for idx, data in enumerate(self._dataloader): filename, input_data = data[:2] @@ -118,6 +134,11 @@ def printResults(self): # Instantiate benchmark benchmark = Benchmark(**cfg['Benchmark']) + if args.cfg_overwrite_backend_target >= 0: + backend_id = backend_target_pairs[args.backend_target][0] + target_id = backend_target_pairs[args.backend_target][1] + benchmark.setBackendAndTarget(backend_id, target_id) + # Instantiate model model_config = cfg['Model'] model_handler, model_paths = MODELS.get(model_config.pop('name')) diff --git a/models/face_detection_yunet/demo.py b/models/face_detection_yunet/demo.py index 00b099e0..cfcc1925 100644 --- a/models/face_detection_yunet/demo.py +++ b/models/face_detection_yunet/demo.py @@ -11,36 +11,42 @@ from yunet import YuNet -def str2bool(v): - if v.lower() in ['on', 'yes', 'true', 'y', 't']: - return True - elif v.lower() in ['off', 'no', 'false', 'n', 'f']: - return False - else: - raise NotImplementedError - -backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA] -targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16] -help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA" -help_msg_targets = "Choose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" -try: - backends += [cv.dnn.DNN_BACKEND_TIMVX] - targets += [cv.dnn.DNN_TARGET_NPU] - help_msg_backends += "; {:d}: TIMVX" - help_msg_targets += "; {:d}: NPU" -except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') +# Check OpenCV version +assert cv.__version__ >= "4.7.0", \ + "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python" + +# Valid combinations of backends and targets +backend_target_pairs = [ + [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], + [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], + [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] +] parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).') -parser.add_argument('--input', '-i', type=str, help='Usage: Set input to a certain image, omit if using camera.') -parser.add_argument('--model', '-m', type=str, default='face_detection_yunet_2022mar.onnx', help="Usage: Set model type, defaults to 'face_detection_yunet_2022mar.onnx'.") -parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) -parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) -parser.add_argument('--conf_threshold', type=float, default=0.9, help='Usage: Set the minimum needed confidence for the model to identify a face, defauts to 0.9. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold.') -parser.add_argument('--nms_threshold', type=float, default=0.3, help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.') -parser.add_argument('--top_k', type=int, default=5000, help='Usage: Keep top_k bounding boxes before NMS.') -parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.') +parser.add_argument('--input', '-i', type=str, + help='Usage: Set input to a certain image, omit if using camera.') +parser.add_argument('--model', '-m', type=str, default='face_detection_yunet_2022mar.onnx', + help="Usage: Set model type, defaults to 'face_detection_yunet_2022mar.onnx'.") +parser.add_argument('--backend_target', '-bt', type=int, default=0, + help='''Choose one of the backend-target pair to run this demo: + {:d}: (default) OpenCV implementation + CPU, + {:d}: CUDA + GPU (CUDA), + {:d}: CUDA + GPU (CUDA FP16), + {:d}: TIM-VX + NPU, + {:d}: CANN + NPU + '''.format(*[x for x in range(len(backend_target_pairs))])) +parser.add_argument('--conf_threshold', type=float, default=0.9, + help='Usage: Set the minimum needed confidence for the model to identify a face, defauts to 0.9. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold.') +parser.add_argument('--nms_threshold', type=float, default=0.3, + help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.') +parser.add_argument('--top_k', type=int, default=5000, + help='Usage: Keep top_k bounding boxes before NMS.') +parser.add_argument('--save', '-s', action='store_true', + help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.') +parser.add_argument('--vis', '-v', action='store_true', + help='Usage: Specify to open a new window to show results. Invalid in case of camera input.') args = parser.parse_args() def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps=None): @@ -70,14 +76,17 @@ def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps return output if __name__ == '__main__': + backend_id = backend_target_pairs[args.backend_target][0] + target_id = backend_target_pairs[args.backend_target][1] + # Instantiate YuNet model = YuNet(modelPath=args.model, inputSize=[320, 320], confThreshold=args.conf_threshold, nmsThreshold=args.nms_threshold, topK=args.top_k, - backendId=args.backend, - targetId=args.target) + backendId=backend_id, + targetId=target_id) # If input is an image if args.input is not None: @@ -134,4 +143,3 @@ def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps cv.imshow('YuNet Demo', frame) tm.reset() - diff --git a/models/face_detection_yunet/yunet.py b/models/face_detection_yunet/yunet.py index 85e64e01..5b7369bb 100644 --- a/models/face_detection_yunet/yunet.py +++ b/models/face_detection_yunet/yunet.py @@ -33,19 +33,8 @@ def __init__(self, modelPath, inputSize=[320, 320], confThreshold=0.6, nmsThresh def name(self): return self.__class__.__name__ - def setBackend(self, backendId): + def setBackendAndTarget(self, backendId, targetId): self._backendId = backendId - self._model = cv.FaceDetectorYN.create( - model=self._modelPath, - config="", - input_size=self._inputSize, - score_threshold=self._confThreshold, - nms_threshold=self._nmsThreshold, - top_k=self._topK, - backend_id=self._backendId, - target_id=self._targetId) - - def setTarget(self, targetId): self._targetId = targetId self._model = cv.FaceDetectorYN.create( model=self._modelPath, @@ -64,4 +53,3 @@ def infer(self, image): # Forward faces = self._model.detect(image) return faces[1] - diff --git a/models/face_recognition_sface/demo.py b/models/face_recognition_sface/demo.py index 76fa100f..735e76cc 100644 --- a/models/face_recognition_sface/demo.py +++ b/models/face_recognition_sface/demo.py @@ -15,49 +15,55 @@ sys.path.append('../face_detection_yunet') from yunet import YuNet -def str2bool(v): - if v.lower() in ['on', 'yes', 'true', 'y', 't']: - return True - elif v.lower() in ['off', 'no', 'false', 'n', 'f']: - return False - else: - raise NotImplementedError +# Check OpenCV version +assert cv.__version__ >= "4.7.0", \ + "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python" -backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA] -targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16] -help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA \n Usage: Set backend DNN model, defaults to cv.dnn.DNN_BACKEND_OPENCV (int = 0). Based on your OpenCV version, it may or may not support cv.dnn.DNN_BACKEND_TIMVX. More details: [https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f]" -help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" -try: - backends += [cv.dnn.DNN_BACKEND_TIMVX] - targets += [cv.dnn.DNN_TARGET_NPU] - help_msg_backends += "; {:d}: TIMVX" - help_msg_targets += "; {:d}: NPU" -except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') +# Valid combinations of backends and targets +backend_target_pairs = [ + [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], + [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], + [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] +] parser = argparse.ArgumentParser( description="SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition (https://ieeexplore.ieee.org/document/9318547)") -parser.add_argument('--input1', '-i1', type=str, help='Usage: Set path to the input image 1 (original face).') -parser.add_argument('--input2', '-i2', type=str, help='Usage: Set path to the input image 2 (comparison face).') -parser.add_argument('--model', '-m', type=str, default='face_recognition_sface_2021dec.onnx', help='Usage: Set model path, defaults to face_recognition_sface_2021dec.onnx.') -parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) -parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) -parser.add_argument('--dis_type', type=int, choices=[0, 1], default=0, help='Usage: Distance type. \'0\': cosine, \'1\': norm_l1. Defaults to \'0\'') -parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.') +parser.add_argument('--input1', '-i1', type=str, + help='Usage: Set path to the input image 1 (original face).') +parser.add_argument('--input2', '-i2', type=str, + help='Usage: Set path to the input image 2 (comparison face).') +parser.add_argument('--model', '-m', type=str, default='face_recognition_sface_2021dec.onnx', + help='Usage: Set model path, defaults to face_recognition_sface_2021dec.onnx.') +parser.add_argument('--backend_target', '-bt', type=int, default=0, + help='''Choose one of the backend-target pair to run this demo: + {:d}: (default) OpenCV implementation + CPU, + {:d}: CUDA + GPU (CUDA), + {:d}: CUDA + GPU (CUDA FP16), + {:d}: TIM-VX + NPU, + {:d}: CANN + NPU + '''.format(*[x for x in range(len(backend_target_pairs))])) +parser.add_argument('--dis_type', type=int, choices=[0, 1], default=0, + help='Usage: Distance type. \'0\': cosine, \'1\': norm_l1. Defaults to \'0\'') args = parser.parse_args() if __name__ == '__main__': + backend_id = backend_target_pairs[args.backend_target][0] + target_id = backend_target_pairs[args.backend_target][1] # Instantiate SFace for face recognition - recognizer = SFace(modelPath=args.model, disType=args.dis_type, backendId=args.backend, targetId=args.target) + recognizer = SFace(modelPath=args.model, + disType=args.dis_type, + backendId=backend_id, + targetId=target_id) # Instantiate YuNet for face detection detector = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2022mar.onnx', inputSize=[320, 320], confThreshold=0.9, nmsThreshold=0.3, topK=5000, - backendId=args.backend, - targetId=args.target) + backendId=backend_id, + targetId=target_id) img1 = cv.imread(args.input1) img2 = cv.imread(args.input2) @@ -73,4 +79,3 @@ def str2bool(v): # Match result = recognizer.match(img1, face1[0][:-1], img2, face2[0][:-1]) print('Result: {}.'.format('same identity' if result else 'different identities')) - diff --git a/models/face_recognition_sface/sface.py b/models/face_recognition_sface/sface.py index 98acf612..91ca7361 100644 --- a/models/face_recognition_sface/sface.py +++ b/models/face_recognition_sface/sface.py @@ -7,8 +7,6 @@ import numpy as np import cv2 as cv -from _testcapi import FLT_MIN - class SFace: def __init__(self, modelPath, disType=0, backendId=0, targetId=0): self._modelPath = modelPath @@ -30,15 +28,8 @@ def __init__(self, modelPath, disType=0, backendId=0, targetId=0): def name(self): return self.__class__.__name__ - def setBackend(self, backendId): + def setBackendAndTarget(self, backendId, targetId): self._backendId = backendId - self._model = cv.FaceRecognizerSF.create( - model=self._modelPath, - config="", - backend_id=self._backendId, - target_id=self._targetId) - - def setTarget(self, targetId): self._targetId = targetId self._model = cv.FaceRecognizerSF.create( model=self._modelPath, @@ -70,4 +61,3 @@ def match(self, image1, face1, image2, face2): else: # NORM_L2 norml2_distance = self._model.match(feature1, feature2, self._disType) return 1 if norml2_distance <= self._threshold_norml2 else 0 - diff --git a/models/facial_expression_recognition/demo.py b/models/facial_expression_recognition/demo.py index e12885e8..f518909f 100644 --- a/models/facial_expression_recognition/demo.py +++ b/models/facial_expression_recognition/demo.py @@ -11,38 +11,38 @@ sys.path.append('../face_detection_yunet') from yunet import YuNet - -def str2bool(v): - if v.lower() in ['on', 'yes', 'true', 'y', 't']: - return True - elif v.lower() in ['off', 'no', 'false', 'n', 'f']: - return False - else: - raise NotImplementedError - - -backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA] -targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16] -help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA" -help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" -try: - backends += [cv.dnn.DNN_BACKEND_TIMVX] - targets += [cv.dnn.DNN_TARGET_NPU] - help_msg_backends += "; {:d}: TIMVX" - help_msg_targets += "; {:d}: NPU" -except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') +# Check OpenCV version +assert cv.__version__ >= "4.7.0", \ + "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python" + +# Valid combinations of backends and targets +backend_target_pairs = [ + [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], + [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], + [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] +] parser = argparse.ArgumentParser(description='Facial Expression Recognition') -parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.') -parser.add_argument('--model', '-m', type=str, default='./facial_expression_recognition_mobilefacenet_2022july.onnx', help='Path to the facial expression recognition model.') -parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) -parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) -parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') +parser.add_argument('--input', '-i', type=str, + help='Path to the input image. Omit for using default camera.') +parser.add_argument('--model', '-m', type=str, default='./facial_expression_recognition_mobilefacenet_2022july.onnx', + help='Path to the facial expression recognition model.') +parser.add_argument('--backend_target', '-bt', type=int, default=0, + help='''Choose one of the backend-target pair to run this demo: + {:d}: (default) OpenCV implementation + CPU, + {:d}: CUDA + GPU (CUDA), + {:d}: CUDA + GPU (CUDA FP16), + {:d}: TIM-VX + NPU, + {:d}: CANN + NPU + '''.format(*[x for x in range(len(backend_target_pairs))])) +parser.add_argument('--save', '-s', action='store_true', + help='Specify to save results. This flag is invalid when using camera.') +parser.add_argument('--vis', '-v', action='store_true', + help='Specify to open a window for result visualization. This flag is invalid when using camera.') args = parser.parse_args() - def visualize(image, det_res, fer_res, box_color=(0, 255, 0), text_color=(0, 0, 255)): print('%s %3d faces detected.' % (datetime.datetime.now(), len(det_res))) @@ -83,11 +83,14 @@ def process(detect_model, fer_model, frame): if __name__ == '__main__': + backend_id = backend_target_pairs[args.backend_target][0] + target_id = backend_target_pairs[args.backend_target][1] + detect_model = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2022mar.onnx') fer_model = FacialExpressionRecog(modelPath=args.model, - backendId=args.backend, - targetId=args.target) + backendId=backend_id, + targetId=target_id) # If input is an image if args.input is not None: diff --git a/models/facial_expression_recognition/facial_fer_model.py b/models/facial_expression_recognition/facial_fer_model.py index e0a739dd..307af559 100644 --- a/models/facial_expression_recognition/facial_fer_model.py +++ b/models/facial_expression_recognition/facial_fer_model.py @@ -29,12 +29,10 @@ def __init__(self, modelPath, backendId=0, targetId=0): def name(self): return self.__class__.__name__ - def setBackend(self, backend_id): - self._backendId = backend_id + def setBackendAndTarget(self, backendId, targetId): + self._backendId = backendId + self._targetId = targetId self._model.setPreferableBackend(self._backendId) - - def setTarget(self, target_id): - self._targetId = target_id self._model.setPreferableTarget(self._targetId) def _preprocess(self, image, bbox): diff --git a/models/handpose_estimation_mediapipe/demo.py b/models/handpose_estimation_mediapipe/demo.py index c78d8215..7777a53c 100644 --- a/models/handpose_estimation_mediapipe/demo.py +++ b/models/handpose_estimation_mediapipe/demo.py @@ -9,34 +9,38 @@ sys.path.append('../palm_detection_mediapipe') from mp_palmdet import MPPalmDet -def str2bool(v): - if v.lower() in ['on', 'yes', 'true', 'y', 't']: - return True - elif v.lower() in ['off', 'no', 'false', 'n', 'f']: - return False - else: - raise NotImplementedError - -backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA] -targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16] -help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA" -help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" -try: - backends += [cv.dnn.DNN_BACKEND_TIMVX] - targets += [cv.dnn.DNN_TARGET_NPU] - help_msg_backends += "; {:d}: TIMVX" - help_msg_targets += "; {:d}: NPU" -except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') +# Check OpenCV version +assert cv.__version__ >= "4.7.0", \ + "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python" + +# Valid combinations of backends and targets +backend_target_pairs = [ + [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], + [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], + [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] +] parser = argparse.ArgumentParser(description='Hand Pose Estimation from MediaPipe') -parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.') -parser.add_argument('--model', '-m', type=str, default='./handpose_estimation_mediapipe_2023feb.onnx', help='Path to the model.') -parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) -parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) -parser.add_argument('--conf_threshold', type=float, default=0.9, help='Filter out hands of confidence < conf_threshold.') -parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') +parser.add_argument('--input', '-i', type=str, + help='Path to the input image. Omit for using default camera.') +parser.add_argument('--model', '-m', type=str, default='./handpose_estimation_mediapipe_2023feb.onnx', + help='Path to the model.') +parser.add_argument('--backend_target', '-bt', type=int, default=0, + help='''Choose one of the backend-target pair to run this demo: + {:d}: (default) OpenCV implementation + CPU, + {:d}: CUDA + GPU (CUDA), + {:d}: CUDA + GPU (CUDA FP16), + {:d}: TIM-VX + NPU, + {:d}: CANN + NPU + '''.format(*[x for x in range(len(backend_target_pairs))])) +parser.add_argument('--conf_threshold', type=float, default=0.9, + help='Filter out hands of confidence < conf_threshold.') +parser.add_argument('--save', '-s', action='store_true', + help='Specify to save results. This flag is invalid when using camera.') +parser.add_argument('--vis', '-v', action='store_true', + help='Specify to open a window for result visualization. This flag is invalid when using camera.') args = parser.parse_args() @@ -147,17 +151,19 @@ def draw_lines(image, landmarks, is_draw_point=True, thickness=2): if __name__ == '__main__': + backend_id = backend_target_pairs[args.backend_target][0] + target_id = backend_target_pairs[args.backend_target][1] # palm detector palm_detector = MPPalmDet(modelPath='../palm_detection_mediapipe/palm_detection_mediapipe_2023feb.onnx', nmsThreshold=0.3, scoreThreshold=0.6, - backendId=args.backend, - targetId=args.target) + backendId=backend_id, + targetId=target_id) # handpose detector handpose_detector = MPHandPose(modelPath=args.model, confThreshold=args.conf_threshold, - backendId=args.backend, - targetId=args.target) + backendId=backend_id, + targetId=target_id) # If input is an image if args.input is not None: diff --git a/models/handpose_estimation_mediapipe/mp_handpose.py b/models/handpose_estimation_mediapipe/mp_handpose.py index e286e9d6..101e509c 100644 --- a/models/handpose_estimation_mediapipe/mp_handpose.py +++ b/models/handpose_estimation_mediapipe/mp_handpose.py @@ -1,7 +1,6 @@ import numpy as np import cv2 as cv - class MPHandPose: def __init__(self, modelPath, confThreshold=0.8, backendId=0, targetId=0): self.model_path = modelPath @@ -28,12 +27,10 @@ def __init__(self, modelPath, confThreshold=0.8, backendId=0, targetId=0): def name(self): return self.__class__.__name__ - def setBackend(self, backendId): - self.backend_id = backendId + def setBackendAndTarget(self, backendId, targetId): + self._backendId = backendId + self._targetId = targetId self.model.setPreferableBackend(self.backend_id) - - def setTarget(self, targetId): - self.target_id = targetId self.model.setPreferableTarget(self.target_id) def _cropAndPadFromPalm(self, image, palm_bbox, for_rotation = False): diff --git a/models/human_segmentation_pphumanseg/demo.py b/models/human_segmentation_pphumanseg/demo.py index 9b4a7e59..81e6bf77 100644 --- a/models/human_segmentation_pphumanseg/demo.py +++ b/models/human_segmentation_pphumanseg/demo.py @@ -11,33 +11,36 @@ from pphumanseg import PPHumanSeg -def str2bool(v): - if v.lower() in ['on', 'yes', 'true', 'y', 't']: - return True - elif v.lower() in ['off', 'no', 'false', 'n', 'f']: - return False - else: - raise NotImplementedError - -backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA] -targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16] -help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA" -help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" -try: - backends += [cv.dnn.DNN_BACKEND_TIMVX] - targets += [cv.dnn.DNN_TARGET_NPU] - help_msg_backends += "; {:d}: TIMVX" - help_msg_targets += "; {:d}: NPU" -except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') +# Check OpenCV version +assert cv.__version__ >= "4.7.0", \ + "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python" + +# Valid combinations of backends and targets +backend_target_pairs = [ + [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], + [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], + [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] +] parser = argparse.ArgumentParser(description='PPHumanSeg (https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/PP-HumanSeg)') -parser.add_argument('--input', '-i', type=str, help='Usage: Set input path to a certain image, omit if using camera.') -parser.add_argument('--model', '-m', type=str, default='human_segmentation_pphumanseg_2023mar.onnx', help='Usage: Set model path, defaults to human_segmentation_pphumanseg_2023mar.onnx.') -parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) -parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) -parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save a file with results. Invalid in case of camera input. Default will be set to “False”.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.') +parser.add_argument('--input', '-i', type=str, + help='Usage: Set input path to a certain image, omit if using camera.') +parser.add_argument('--model', '-m', type=str, default='human_segmentation_pphumanseg_2023mar.onnx', + help='Usage: Set model path, defaults to human_segmentation_pphumanseg_2023mar.onnx.') +parser.add_argument('--backend_target', '-bt', type=int, default=0, + help='''Choose one of the backend-target pair to run this demo: + {:d}: (default) OpenCV implementation + CPU, + {:d}: CUDA + GPU (CUDA), + {:d}: CUDA + GPU (CUDA FP16), + {:d}: TIM-VX + NPU, + {:d}: CANN + NPU + '''.format(*[x for x in range(len(backend_target_pairs))])) +parser.add_argument('--save', '-s', action='store_true', + help='Usage: Specify to save a file with results. Invalid in case of camera input.') +parser.add_argument('--vis', '-v', action='store_true', + help='Usage: Specify to open a new window to show results. Invalid in case of camera input.') args = parser.parse_args() def get_color_map_list(num_classes): @@ -97,8 +100,10 @@ def visualize(image, result, weight=0.6, fps=None): if __name__ == '__main__': + backend_id = backend_target_pairs[args.backend_target][0] + target_id = backend_target_pairs[args.backend_target][1] # Instantiate PPHumanSeg - model = PPHumanSeg(modelPath=args.model, backendId=args.backend, targetId=args.target) + model = PPHumanSeg(modelPath=args.model, backendId=backend_id, targetId=target_id) if args.input is not None: # Read image and resize to 192x192 diff --git a/models/human_segmentation_pphumanseg/pphumanseg.py b/models/human_segmentation_pphumanseg/pphumanseg.py index 2f38159e..be41351a 100644 --- a/models/human_segmentation_pphumanseg/pphumanseg.py +++ b/models/human_segmentation_pphumanseg/pphumanseg.py @@ -28,12 +28,10 @@ def __init__(self, modelPath, backendId=0, targetId=0): def name(self): return self.__class__.__name__ - def setBackend(self, backend_id): - self._backendId = backend_id + def setBackendAndTarget(self, backendId, targetId): + self._backendId = backendId + self._targetId = targetId self._model.setPreferableBackend(self._backendId) - - def setTarget(self, target_id): - self._targetId = target_id self._model.setPreferableTarget(self._targetId) def _preprocess(self, image): @@ -69,4 +67,3 @@ def _postprocess(self, outputBlob): result = np.argmax(outputBlob, axis=1).astype(np.uint8) return result - diff --git a/models/image_classification_mobilenet/demo.py b/models/image_classification_mobilenet/demo.py index 948a2470..d4b1dae6 100644 --- a/models/image_classification_mobilenet/demo.py +++ b/models/image_classification_mobilenet/demo.py @@ -5,43 +5,39 @@ from mobilenet import MobileNet -def str2bool(v): - if v.lower() in ['on', 'yes', 'true', 'y', 't']: - return True - elif v.lower() in ['off', 'no', 'false', 'n', 'f']: - return False - else: - raise NotImplementedError - -backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA] -targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16] -help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA" -help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" -try: - backends += [cv.dnn.DNN_BACKEND_TIMVX] - targets += [cv.dnn.DNN_TARGET_NPU] - help_msg_backends += "; {:d}: TIMVX" - help_msg_targets += "; {:d}: NPU" -except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') - -all_mobilenets = [ - 'image_classification_mobilenetv1_2022apr.onnx', - 'image_classification_mobilenetv2_2022apr.onnx', - 'image_classification_mobilenetv1_2022apr-int8-quantized.onnx', - 'image_classification_mobilenetv2_2022apr-int8-quantized.onnx' +# Check OpenCV version +assert cv.__version__ >= "4.7.0", \ + "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python" + +# Valid combinations of backends and targets +backend_target_pairs = [ + [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], + [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], + [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] ] parser = argparse.ArgumentParser(description='Demo for MobileNet V1 & V2.') -parser.add_argument('--input', '-i', type=str, help='Usage: Set input path to a certain image, omit if using camera.') -parser.add_argument('--model', '-m', type=str, choices=all_mobilenets, default=all_mobilenets[0], help='Usage: Set model type, defaults to image_classification_mobilenetv1_2022apr.onnx (v1).') -parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) -parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) +parser.add_argument('--input', '-i', type=str, + help='Usage: Set input path to a certain image, omit if using camera.') +parser.add_argument('--model', '-m', type=str, default='image_classification_mobilenetv1_2022apr.onnx', + help='Usage: Set model type, defaults to image_classification_mobilenetv1_2022apr.onnx (v1).') +parser.add_argument('--backend_target', '-bt', type=int, default=0, + help='''Choose one of the backend-target pair to run this demo: + {:d}: (default) OpenCV implementation + CPU, + {:d}: CUDA + GPU (CUDA), + {:d}: CUDA + GPU (CUDA FP16), + {:d}: TIM-VX + NPU, + {:d}: CANN + NPU + '''.format(*[x for x in range(len(backend_target_pairs))])) args = parser.parse_args() if __name__ == '__main__': + backend_id = backend_target_pairs[args.backend_target][0] + target_id = backend_target_pairs[args.backend_target][1] # Instantiate MobileNet - model = MobileNet(modelPath=args.model, backendId=args.backend, targetId=args.target) + model = MobileNet(modelPath=args.model, backendId=backend_id, targetId=target_id) # Read image and get a 224x224 crop from a 256x256 resized image = cv.imread(args.input) diff --git a/models/image_classification_mobilenet/mobilenet.py b/models/image_classification_mobilenet/mobilenet.py index 7047a755..6aad9bec 100644 --- a/models/image_classification_mobilenet/mobilenet.py +++ b/models/image_classification_mobilenet/mobilenet.py @@ -33,12 +33,10 @@ def _load_labels(self): def name(self): return self.__class__.__name__ - def setBackend(self, backendId): - self.backend_id = backendId + def setBackendAndTarget(self, backendId, targetId): + self._backendId = backendId + self._targetId = targetId self.model.setPreferableBackend(self.backend_id) - - def setTarget(self, targetId): - self.target_id = targetId self.model.setPreferableTarget(self.target_id) def _preprocess(self, image): diff --git a/models/image_classification_ppresnet/demo.py b/models/image_classification_ppresnet/demo.py index f78ac6bf..16fc7dac 100644 --- a/models/image_classification_ppresnet/demo.py +++ b/models/image_classification_ppresnet/demo.py @@ -11,36 +11,39 @@ from ppresnet import PPResNet -def str2bool(v): - if v.lower() in ['on', 'yes', 'true', 'y', 't']: - return True - elif v.lower() in ['off', 'no', 'false', 'n', 'f']: - return False - else: - raise NotImplementedError - -backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA] -targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16] -help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA" -help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" -try: - backends += [cv.dnn.DNN_BACKEND_TIMVX] - targets += [cv.dnn.DNN_TARGET_NPU] - help_msg_backends += "; {:d}: TIMVX" - help_msg_targets += "; {:d}: NPU" -except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') +# Check OpenCV version +assert cv.__version__ >= "4.7.0", \ + "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python" + +# Valid combinations of backends and targets +backend_target_pairs = [ + [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], + [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], + [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] +] parser = argparse.ArgumentParser(description='Deep Residual Learning for Image Recognition (https://arxiv.org/abs/1512.03385, https://github.com/PaddlePaddle/PaddleHub)') -parser.add_argument('--input', '-i', type=str, help='Usage: Set input path to a certain image, omit if using camera.') -parser.add_argument('--model', '-m', type=str, default='image_classification_ppresnet50_2022jan.onnx', help='Usage: Set model path, defaults to image_classification_ppresnet50_2022jan.onnx.') -parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) -parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) +parser.add_argument('--input', '-i', type=str, + help='Usage: Set input path to a certain image, omit if using camera.') +parser.add_argument('--model', '-m', type=str, default='image_classification_ppresnet50_2022jan.onnx', + help='Usage: Set model path, defaults to image_classification_ppresnet50_2022jan.onnx.') +parser.add_argument('--backend_target', '-bt', type=int, default=0, + help='''Choose one of the backend-target pair to run this demo: + {:d}: (default) OpenCV implementation + CPU, + {:d}: CUDA + GPU (CUDA), + {:d}: CUDA + GPU (CUDA FP16), + {:d}: TIM-VX + NPU, + {:d}: CANN + NPU + '''.format(*[x for x in range(len(backend_target_pairs))])) args = parser.parse_args() if __name__ == '__main__': + backend_id = backend_target_pairs[args.backend_target][0] + target_id = backend_target_pairs[args.backend_target][1] # Instantiate ResNet - model = PPResNet(modelPath=args.model, backendId=args.backend, targetId=args.target) + model = PPResNet(modelPath=args.model, backendId=backend_id, targetId=target_id) # Read image and get a 224x224 crop from a 256x256 resized image = cv.imread(args.input) diff --git a/models/image_classification_ppresnet/ppresnet.py b/models/image_classification_ppresnet/ppresnet.py index be36dd32..79cd4995 100644 --- a/models/image_classification_ppresnet/ppresnet.py +++ b/models/image_classification_ppresnet/ppresnet.py @@ -36,12 +36,10 @@ def _load_labels(self): def name(self): return self.__class__.__name__ - def setBackend(self, backend_id): - self._backendId = backend_id + def setBackendAndTarget(self, backendId, targetId): + self._backendId = backendId + self._targetId = targetId self._model.setPreferableBackend(self._backendId) - - def setTarget(self, target_id): - self._targetId = target_id self._model.setPreferableTarget(self._targetId) def _preprocess(self, image): diff --git a/models/license_plate_detection_yunet/demo.py b/models/license_plate_detection_yunet/demo.py index daa2c87f..7344e80d 100644 --- a/models/license_plate_detection_yunet/demo.py +++ b/models/license_plate_detection_yunet/demo.py @@ -5,37 +5,44 @@ from lpd_yunet import LPD_YuNet -def str2bool(v): - if v.lower() in ['on', 'yes', 'true', 'y', 't']: - return True - elif v.lower() in ['off', 'no', 'false', 'n', 'f']: - return False - else: - raise NotImplementedError - -backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA] -targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16] -help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA" -help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" -try: - backends += [cv.dnn.DNN_BACKEND_TIMVX] - targets += [cv.dnn.DNN_TARGET_NPU] - help_msg_backends += "; {:d}: TIMVX" - help_msg_targets += "; {:d}: NPU" -except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') +# Check OpenCV version +assert cv.__version__ >= "4.7.0", \ + "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python" + +# Valid combinations of backends and targets +backend_target_pairs = [ + [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], + [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], + [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] +] parser = argparse.ArgumentParser(description='LPD-YuNet for License Plate Detection') -parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.') -parser.add_argument('--model', '-m', type=str, default='license_plate_detection_lpd_yunet_2022may.onnx', help='Usage: Set model path, defaults to license_plate_detection_lpd_yunet_2022may.onnx.') -parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) -parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) -parser.add_argument('--conf_threshold', type=float, default=0.9, help='Usage: Set the minimum needed confidence for the model to identify a license plate, defaults to 0.9. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold.') -parser.add_argument('--nms_threshold', type=float, default=0.3, help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3. Suppress bounding boxes of iou >= nms_threshold.') -parser.add_argument('--top_k', type=int, default=5000, help='Usage: Keep top_k bounding boxes before NMS.') -parser.add_argument('--keep_top_k', type=int, default=750, help='Usage: Keep keep_top_k bounding boxes after NMS.') -parser.add_argument('--save', '-s', type=str2bool, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.') +parser.add_argument('--input', '-i', type=str, + help='Usage: Set path to the input image. Omit for using default camera.') +parser.add_argument('--model', '-m', type=str, default='license_plate_detection_lpd_yunet_2023mar.onnx', + help='Usage: Set model path, defaults to license_plate_detection_lpd_yunet_2023mar.onnx.') +parser.add_argument('--backend_target', '-bt', type=int, default=0, + help='''Choose one of the backend-target pair to run this demo: + {:d}: (default) OpenCV implementation + CPU, + {:d}: CUDA + GPU (CUDA), + {:d}: CUDA + GPU (CUDA FP16), + {:d}: TIM-VX + NPU, + {:d}: CANN + NPU + '''.format(*[x for x in range(len(backend_target_pairs))])) +parser.add_argument('--conf_threshold', type=float, default=0.9, + help='Usage: Set the minimum needed confidence for the model to identify a license plate, defaults to 0.9. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold.') +parser.add_argument('--nms_threshold', type=float, default=0.3, + help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3. Suppress bounding boxes of iou >= nms_threshold.') +parser.add_argument('--top_k', type=int, default=5000, + help='Usage: Keep top_k bounding boxes before NMS.') +parser.add_argument('--keep_top_k', type=int, default=750, + help='Usage: Keep keep_top_k bounding boxes after NMS.') +parser.add_argument('--save', '-s', action='store_true', + help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.') +parser.add_argument('--vis', '-v', action='store_true', + help='Usage: Specify to open a new window to show results. Invalid in case of camera input.') args = parser.parse_args() def visualize(image, dets, line_color=(0, 255, 0), text_color=(0, 0, 255), fps=None): @@ -57,14 +64,17 @@ def visualize(image, dets, line_color=(0, 255, 0), text_color=(0, 0, 255), fps=N return output if __name__ == '__main__': + backend_id = backend_target_pairs[args.backend_target][0] + target_id = backend_target_pairs[args.backend_target][1] + # Instantiate LPD-YuNet model = LPD_YuNet(modelPath=args.model, confThreshold=args.conf_threshold, nmsThreshold=args.nms_threshold, topK=args.top_k, keepTopK=args.keep_top_k, - backendId=args.backend, - targetId=args.target) + backendId=backend_id, + targetId=target_id) # If input is an image if args.input is not None: @@ -117,4 +127,3 @@ def visualize(image, dets, line_color=(0, 255, 0), text_color=(0, 0, 255), fps=N cv.imshow('LPD-YuNet Demo', frame) tm.reset() - diff --git a/models/license_plate_detection_yunet/lpd_yunet.py b/models/license_plate_detection_yunet/lpd_yunet.py index ec4d7d09..36d89613 100644 --- a/models/license_plate_detection_yunet/lpd_yunet.py +++ b/models/license_plate_detection_yunet/lpd_yunet.py @@ -28,12 +28,10 @@ def __init__(self, modelPath, inputSize=[320, 240], confThreshold=0.8, nmsThresh def name(self): return self.__class__.__name__ - def setBackend(self, backendId): + def setBackendAndTarget(self, backendId, targetId): self.backend_id = backendId - self.model.setPreferableBackend(self.backend_id) - - def setTarget(self, targetId): self.target_id = targetId + self.model.setPreferableBackend(self.backend_id) self.model.setPreferableTarget(self.target_id) def setInputSize(self, inputSize): diff --git a/models/object_detection_nanodet/demo.py b/models/object_detection_nanodet/demo.py index b04f3319..c20cdf8d 100644 --- a/models/object_detection_nanodet/demo.py +++ b/models/object_detection_nanodet/demo.py @@ -1,29 +1,21 @@ import numpy as np -import cv2 +import cv2 as cv import argparse from nanodet import NanoDet -def str2bool(v): - if v.lower() in ['on', 'yes', 'true', 'y', 't']: - return True - elif v.lower() in ['off', 'no', 'false', 'n', 'f']: - return False - else: - raise NotImplementedError - -backends = [cv2.dnn.DNN_BACKEND_OPENCV, cv2.dnn.DNN_BACKEND_CUDA] -targets = [cv2.dnn.DNN_TARGET_CPU, cv2.dnn.DNN_TARGET_CUDA, cv2.dnn.DNN_TARGET_CUDA_FP16] -help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA" -help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" +# Check OpenCV version +assert cv.__version__ >= "4.7.0", \ + "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python" -try: - backends += [cv2.dnn.DNN_BACKEND_TIMVX] - targets += [cv2.dnn.DNN_TARGET_NPU] - help_msg_backends += "; {:d}: TIMVX" - help_msg_targets += "; {:d}: NPU" -except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') +# Valid combinations of backends and targets +backend_target_pairs = [ + [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], + [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], + [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] +] classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', @@ -48,16 +40,16 @@ def letterbox(srcimg, target_size=(416, 416)): hw_scale = img.shape[0] / img.shape[1] if hw_scale > 1: newh, neww = target_size[0], int(target_size[1] / hw_scale) - img = cv2.resize(img, (neww, newh), interpolation=cv2.INTER_AREA) + img = cv.resize(img, (neww, newh), interpolation=cv.INTER_AREA) left = int((target_size[1] - neww) * 0.5) - img = cv2.copyMakeBorder(img, 0, 0, left, target_size[1] - neww - left, cv2.BORDER_CONSTANT, value=0) # add border + img = cv.copyMakeBorder(img, 0, 0, left, target_size[1] - neww - left, cv.BORDER_CONSTANT, value=0) # add border else: newh, neww = int(target_size[0] * hw_scale), target_size[1] - img = cv2.resize(img, (neww, newh), interpolation=cv2.INTER_AREA) + img = cv.resize(img, (neww, newh), interpolation=cv.INTER_AREA) top = int((target_size[0] - newh) * 0.5) - img = cv2.copyMakeBorder(img, top, target_size[0] - newh - top, 0, 0, cv2.BORDER_CONSTANT, value=0) + img = cv.copyMakeBorder(img, top, target_size[0] - newh - top, 0, 0, cv.BORDER_CONSTANT, value=0) else: - img = cv2.resize(img, target_size, interpolation=cv2.INTER_AREA) + img = cv.resize(img, target_size, interpolation=cv.INTER_AREA) letterbox_scale = [top, left, newh, neww] return img, letterbox_scale @@ -87,7 +79,7 @@ def vis(preds, res_img, letterbox_scale, fps=None): # draw FPS if fps is not None: fps_label = "FPS: %.2f" % fps - cv2.putText(ret, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) + cv.putText(ret, fps_label, (10, 25), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) # draw bboxes and labels for pred in preds: @@ -97,37 +89,52 @@ def vis(preds, res_img, letterbox_scale, fps=None): # bbox xmin, ymin, xmax, ymax = unletterbox(bbox, ret.shape[:2], letterbox_scale) - cv2.rectangle(ret, (xmin, ymin), (xmax, ymax), (0, 255, 0), thickness=2) + cv.rectangle(ret, (xmin, ymin), (xmax, ymax), (0, 255, 0), thickness=2) # label label = "{:s}: {:.2f}".format(classes[classid], conf) - cv2.putText(ret, label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2) + cv.putText(ret, label, (xmin, ymin - 10), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2) return ret if __name__=='__main__': parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022') - parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.') - parser.add_argument('--model', '-m', type=str, default='object_detection_nanodet_2022nov.onnx', help="Path to the model") - parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) - parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) - parser.add_argument('--confidence', default=0.35, type=float, help='Class confidence') - parser.add_argument('--nms', default=0.6, type=float, help='Enter nms IOU threshold') - parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.') - parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') + parser.add_argument('--input', '-i', type=str, + help='Path to the input image. Omit for using default camera.') + parser.add_argument('--model', '-m', type=str, + default='object_detection_nanodet_2022nov.onnx', help="Path to the model") + parser.add_argument('--backend_target', '-bt', type=int, default=0, + help='''Choose one of the backend-target pair to run this demo: + {:d}: (default) OpenCV implementation + CPU, + {:d}: CUDA + GPU (CUDA), + {:d}: CUDA + GPU (CUDA FP16), + {:d}: TIM-VX + NPU, + {:d}: CANN + NPU + '''.format(*[x for x in range(len(backend_target_pairs))])) + parser.add_argument('--confidence', default=0.35, type=float, + help='Class confidence') + parser.add_argument('--nms', default=0.6, type=float, + help='Enter nms IOU threshold') + parser.add_argument('--save', '-s', action='store_true', + help='Specify to save results. This flag is invalid when using camera.') + parser.add_argument('--vis', '-v', action='store_true', + help='Specify to open a window for result visualization. This flag is invalid when using camera.') args = parser.parse_args() + backend_id = backend_target_pairs[args.backend_target][0] + target_id = backend_target_pairs[args.backend_target][1] + model = NanoDet(modelPath= args.model, prob_threshold=args.confidence, iou_threshold=args.nms, - backend_id=args.backend, - target_id=args.target) + backend_id=backend_id, + target_id=target_id) - tm = cv2.TickMeter() + tm = cv.TickMeter() tm.reset() if args.input is not None: - image = cv2.imread(args.input) - input_blob = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image = cv.imread(args.input) + input_blob = cv.cvtColor(image, cv.COLOR_BGR2RGB) # Letterbox transformation input_blob, letterbox_scale = letterbox(input_blob) @@ -142,25 +149,25 @@ def vis(preds, res_img, letterbox_scale, fps=None): if args.save: print('Resutls saved to result.jpg\n') - cv2.imwrite('result.jpg', img) + cv.imwrite('result.jpg', img) if args.vis: - cv2.namedWindow(args.input, cv2.WINDOW_AUTOSIZE) - cv2.imshow(args.input, img) - cv2.waitKey(0) + cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE) + cv.imshow(args.input, img) + cv.waitKey(0) else: print("Press any key to stop video capture") deviceId = 0 - cap = cv2.VideoCapture(deviceId) + cap = cv.VideoCapture(deviceId) - while cv2.waitKey(1) < 0: + while cv.waitKey(1) < 0: hasFrame, frame = cap.read() if not hasFrame: print('No frames grabbed!') break - input_blob = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + input_blob = cv.cvtColor(frame, cv.COLOR_BGR2RGB) input_blob, letterbox_scale = letterbox(input_blob) # Inference tm.start() @@ -169,6 +176,6 @@ def vis(preds, res_img, letterbox_scale, fps=None): img = vis(preds, frame, letterbox_scale, fps=tm.getFPS()) - cv2.imshow("NanoDet Demo", img) + cv.imshow("NanoDet Demo", img) tm.reset() diff --git a/models/object_detection_nanodet/nanodet.py b/models/object_detection_nanodet/nanodet.py index 8937b522..7d8affd1 100644 --- a/models/object_detection_nanodet/nanodet.py +++ b/models/object_detection_nanodet/nanodet.py @@ -37,12 +37,10 @@ def __init__(self, modelPath, prob_threshold=0.35, iou_threshold=0.6, backend_id def name(self): return self.__class__.__name__ - def setBackend(self, backendId): - self.backend_id = backendId + def setBackendAndTarget(self, backendId, targetId): + self._backendId = backendId + self._targetId = targetId self.net.setPreferableBackend(self.backend_id) - - def setTarget(self, targetId): - self.target_id = targetId self.net.setPreferableTarget(self.target_id) def pre_process(self, img): diff --git a/models/object_detection_yolox/demo.py b/models/object_detection_yolox/demo.py index ed31f1f2..0ff961db 100644 --- a/models/object_detection_yolox/demo.py +++ b/models/object_detection_yolox/demo.py @@ -1,29 +1,21 @@ import numpy as np -import cv2 +import cv2 as cv import argparse from yolox import YoloX -def str2bool(v): - if v.lower() in ['on', 'yes', 'true', 'y', 't']: - return True - elif v.lower() in ['off', 'no', 'false', 'n', 'f']: - return False - else: - raise NotImplementedError - -backends = [cv2.dnn.DNN_BACKEND_OPENCV, cv2.dnn.DNN_BACKEND_CUDA] -targets = [cv2.dnn.DNN_TARGET_CPU, cv2.dnn.DNN_TARGET_CUDA, cv2.dnn.DNN_TARGET_CUDA_FP16] -help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA" -help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" +# Check OpenCV version +assert cv.__version__ >= "4.7.0", \ + "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python" -try: - backends += [cv2.dnn.DNN_BACKEND_TIMVX] - targets += [cv2.dnn.DNN_TARGET_NPU] - help_msg_backends += "; {:d}: TIMVX" - help_msg_targets += "; {:d}: NPU" -except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') +# Valid combinations of backends and targets +backend_target_pairs = [ + [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], + [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], + [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] +] classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', @@ -43,8 +35,8 @@ def str2bool(v): def letterbox(srcimg, target_size=(640, 640)): padded_img = np.ones((target_size[0], target_size[1], 3)) * 114.0 ratio = min(target_size[0] / srcimg.shape[0], target_size[1] / srcimg.shape[1]) - resized_img = cv2.resize( - srcimg, (int(srcimg.shape[1] * ratio), int(srcimg.shape[0] * ratio)), interpolation=cv2.INTER_LINEAR + resized_img = cv.resize( + srcimg, (int(srcimg.shape[1] * ratio), int(srcimg.shape[0] * ratio)), interpolation=cv.INTER_LINEAR ).astype(np.float32) padded_img[: int(srcimg.shape[0] * ratio), : int(srcimg.shape[1] * ratio)] = resized_img @@ -58,7 +50,7 @@ def vis(dets, srcimg, letterbox_scale, fps=None): if fps is not None: fps_label = "FPS: %.2f" % fps - cv2.putText(res_img, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) + cv.putText(res_img, fps_label, (10, 25), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) for det in dets: box = unletterbox(det[:4], letterbox_scale).astype(np.int32) @@ -68,39 +60,55 @@ def vis(dets, srcimg, letterbox_scale, fps=None): x0, y0, x1, y1 = box text = '{}:{:.1f}%'.format(classes[cls_id], score * 100) - font = cv2.FONT_HERSHEY_SIMPLEX - txt_size = cv2.getTextSize(text, font, 0.4, 1)[0] - cv2.rectangle(res_img, (x0, y0), (x1, y1), (0, 255, 0), 2) - cv2.rectangle(res_img, (x0, y0 + 1), (x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])), (255, 255, 255), -1) - cv2.putText(res_img, text, (x0, y0 + txt_size[1]), font, 0.4, (0, 0, 0), thickness=1) + font = cv.FONT_HERSHEY_SIMPLEX + txt_size = cv.getTextSize(text, font, 0.4, 1)[0] + cv.rectangle(res_img, (x0, y0), (x1, y1), (0, 255, 0), 2) + cv.rectangle(res_img, (x0, y0 + 1), (x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])), (255, 255, 255), -1) + cv.putText(res_img, text, (x0, y0 + txt_size[1]), font, 0.4, (0, 0, 0), thickness=1) return res_img if __name__=='__main__': parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022') - parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.') - parser.add_argument('--model', '-m', type=str, default='object_detection_yolox_2022nov.onnx', help="Path to the model") - parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) - parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) - parser.add_argument('--confidence', default=0.5, type=float, help='Class confidence') - parser.add_argument('--nms', default=0.5, type=float, help='Enter nms IOU threshold') - parser.add_argument('--obj', default=0.5, type=float, help='Enter object threshold') - parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.') - parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') + parser.add_argument('--input', '-i', type=str, + help='Path to the input image. Omit for using default camera.') + parser.add_argument('--model', '-m', type=str, default='object_detection_yolox_2022nov.onnx', + help="Path to the model") + parser.add_argument('--backend_target', '-bt', type=int, default=0, + help='''Choose one of the backend-target pair to run this demo: + {:d}: (default) OpenCV implementation + CPU, + {:d}: CUDA + GPU (CUDA), + {:d}: CUDA + GPU (CUDA FP16), + {:d}: TIM-VX + NPU, + {:d}: CANN + NPU + '''.format(*[x for x in range(len(backend_target_pairs))])) + parser.add_argument('--confidence', default=0.5, type=float, + help='Class confidence') + parser.add_argument('--nms', default=0.5, type=float, + help='Enter nms IOU threshold') + parser.add_argument('--obj', default=0.5, type=float, + help='Enter object threshold') + parser.add_argument('--save', '-s', action='store_true', + help='Specify to save results. This flag is invalid when using camera.') + parser.add_argument('--vis', '-v', action='store_true', + help='Specify to open a window for result visualization. This flag is invalid when using camera.') args = parser.parse_args() + backend_id = backend_target_pairs[args.backend_target][0] + target_id = backend_target_pairs[args.backend_target][1] + model_net = YoloX(modelPath= args.model, confThreshold=args.confidence, nmsThreshold=args.nms, objThreshold=args.obj, - backendId=args.backend, - targetId=args.target) + backendId=backend_id, + targetId=target_id) - tm = cv2.TickMeter() + tm = cv.TickMeter() tm.reset() if args.input is not None: - image = cv2.imread(args.input) - input_blob = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image = cv.imread(args.input) + input_blob = cv.cvtColor(image, cv.COLOR_BGR2RGB) input_blob, letterbox_scale = letterbox(input_blob) # Inference @@ -113,25 +121,25 @@ def vis(dets, srcimg, letterbox_scale, fps=None): if args.save: print('Resutls saved to result.jpg\n') - cv2.imwrite('result.jpg', img) + cv.imwrite('result.jpg', img) if args.vis: - cv2.namedWindow(args.input, cv2.WINDOW_AUTOSIZE) - cv2.imshow(args.input, img) - cv2.waitKey(0) + cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE) + cv.imshow(args.input, img) + cv.waitKey(0) else: print("Press any key to stop video capture") deviceId = 0 - cap = cv2.VideoCapture(deviceId) + cap = cv.VideoCapture(deviceId) - while cv2.waitKey(1) < 0: + while cv.waitKey(1) < 0: hasFrame, frame = cap.read() if not hasFrame: print('No frames grabbed!') break - input_blob = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + input_blob = cv.cvtColor(frame, cv.COLOR_BGR2RGB) input_blob, letterbox_scale = letterbox(input_blob) # Inference @@ -141,6 +149,6 @@ def vis(dets, srcimg, letterbox_scale, fps=None): img = vis(preds, frame, letterbox_scale, fps=tm.getFPS()) - cv2.imshow("YoloX Demo", img) + cv.imshow("YoloX Demo", img) tm.reset() diff --git a/models/object_detection_yolox/yolox.py b/models/object_detection_yolox/yolox.py index 617aa986..6d781523 100644 --- a/models/object_detection_yolox/yolox.py +++ b/models/object_detection_yolox/yolox.py @@ -23,12 +23,10 @@ def __init__(self, modelPath, confThreshold=0.35, nmsThreshold=0.5, objThreshold def name(self): return self.__class__.__name__ - def setBackend(self, backendId): - self.backendId = backendId + def setBackendAndTarget(self, backendId, targetId): + self._backendId = backendId + self._targetId = targetId self.net.setPreferableBackend(self.backendId) - - def setTarget(self, targetId): - self.targetId = targetId self.net.setPreferableTarget(self.targetId) def preprocess(self, img): diff --git a/models/object_tracking_dasiamrpn/dasiamrpn.py b/models/object_tracking_dasiamrpn/dasiamrpn.py index edf8c7a4..03a97130 100644 --- a/models/object_tracking_dasiamrpn/dasiamrpn.py +++ b/models/object_tracking_dasiamrpn/dasiamrpn.py @@ -27,18 +27,10 @@ def __init__(self, kernel_cls1_path, kernel_r1_path, model_path, backend_id=0, t def name(self): return self.__class__.__name__ - def setBackend(self, backend_id): - self._backend_id = backend_id - self._param = cv.TrackerDaSiamRPN_Params() - self._param.model = self._model_path - self._param.kernel_cls1 = self._kernel_cls1_path - self._param.kernel_r1 = self._kernel_r1_path - self._param.backend = self._backend_id - self._param.target = self._target_id - self._model = cv.TrackerDaSiamRPN.create(self._param) + def setBackendAndTarget(self, backendId, targetId): + self._backend_id = backendId + self._target_id = targetId - def setTarget(self, target_id): - self._target_id = target_id self._param = cv.TrackerDaSiamRPN_Params() self._param.model = self._model_path self._param.kernel_cls1 = self._kernel_cls1_path @@ -53,4 +45,4 @@ def init(self, image, roi): def infer(self, image): isLocated, bbox = self._model.update(image) score = self._model.getTrackingScore() - return isLocated, bbox, score \ No newline at end of file + return isLocated, bbox, score diff --git a/models/object_tracking_dasiamrpn/demo.py b/models/object_tracking_dasiamrpn/demo.py index 237d95a0..b6793979 100644 --- a/models/object_tracking_dasiamrpn/demo.py +++ b/models/object_tracking_dasiamrpn/demo.py @@ -11,22 +11,41 @@ from dasiamrpn import DaSiamRPN -def str2bool(v): - if v.lower() in ['on', 'yes', 'true', 'y', 't']: - return True - elif v.lower() in ['off', 'no', 'false', 'n', 'f']: - return False - else: - raise NotImplementedError +# Check OpenCV version +assert cv.__version__ >= "4.7.0", \ + "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python" + +# Valid combinations of backends and targets +backend_target_pairs = [ + [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], + [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], + [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] +] parser = argparse.ArgumentParser( description="Distractor-aware Siamese Networks for Visual Object Tracking (https://arxiv.org/abs/1808.06048)") -parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input video. Omit for using default camera.') -parser.add_argument('--model_path', type=str, default='object_tracking_dasiamrpn_model_2021nov.onnx', help='Usage: Set model path, defaults to object_tracking_dasiamrpn_model_2021nov.onnx.') -parser.add_argument('--kernel_cls1_path', type=str, default='object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx', help='Usage: Set path to dasiamrpn_kernel_cls1.onnx.') -parser.add_argument('--kernel_r1_path', type=str, default='object_tracking_dasiamrpn_kernel_r1_2021nov.onnx', help='Usage: Set path to dasiamrpn_kernel_r1.onnx.') -parser.add_argument('--save', '-s', type=str2bool, default=False, help='Usage: Set “True” to save a file with results. Invalid in case of camera input. Default will be set to “False”.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.') +parser.add_argument('--input', '-i', type=str, + help='Usage: Set path to the input video. Omit for using default camera.') +parser.add_argument('--model_path', type=str, default='object_tracking_dasiamrpn_model_2021nov.onnx', + help='Usage: Set model path, defaults to object_tracking_dasiamrpn_model_2021nov.onnx.') +parser.add_argument('--kernel_cls1_path', type=str, default='object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx', + help='Usage: Set path to dasiamrpn_kernel_cls1.onnx.') +parser.add_argument('--kernel_r1_path', type=str, default='object_tracking_dasiamrpn_kernel_r1_2021nov.onnx', + help='Usage: Set path to dasiamrpn_kernel_r1.onnx.') +parser.add_argument('--backend_target', '-bt', type=int, default=0, + help='''Choose one of the backend-target pair to run this demo: + {:d}: (default) OpenCV implementation + CPU, + {:d}: CUDA + GPU (CUDA), + {:d}: CUDA + GPU (CUDA FP16), + {:d}: TIM-VX + NPU, + {:d}: CANN + NPU + '''.format(*[x for x in range(len(backend_target_pairs))])) +parser.add_argument('--save', '-s', action='store_true', + help='Usage: Specify to save a file with results. Invalid in case of camera input.') +parser.add_argument('--vis', '-v', action='store_true', + help='Usage: Specify to open a new window to show results. Invalid in case of camera input.') args = parser.parse_args() def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),text_color=(0, 255, 0), fontScale = 1, fontSize = 1): @@ -50,12 +69,16 @@ def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),tex return output if __name__ == '__main__': + backend_id = backend_target_pairs[args.backend_target][0] + target_id = backend_target_pairs[args.backend_target][1] + # Instantiate DaSiamRPN model = DaSiamRPN( kernel_cls1_path=args.kernel_cls1_path, kernel_r1_path=args.kernel_r1_path, model_path=args.model_path, - ) + backend_id=backend_id, + target_id=target_id) # Read from args.input _input = args.input diff --git a/models/palm_detection_mediapipe/demo.py b/models/palm_detection_mediapipe/demo.py index d338d813..e8ea9b9f 100644 --- a/models/palm_detection_mediapipe/demo.py +++ b/models/palm_detection_mediapipe/demo.py @@ -5,35 +5,40 @@ from mp_palmdet import MPPalmDet -def str2bool(v): - if v.lower() in ['on', 'yes', 'true', 'y', 't']: - return True - elif v.lower() in ['off', 'no', 'false', 'n', 'f']: - return False - else: - raise NotImplementedError - -backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA] -targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16] -help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA" -help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" -try: - backends += [cv.dnn.DNN_BACKEND_TIMVX] - targets += [cv.dnn.DNN_TARGET_NPU] - help_msg_backends += "; {:d}: TIMVX" - help_msg_targets += "; {:d}: NPU" -except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') +# Check OpenCV version +assert cv.__version__ >= "4.7.0", \ + "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python" + +# Valid combinations of backends and targets +backend_target_pairs = [ + [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], + [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], + [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] +] parser = argparse.ArgumentParser(description='Hand Detector from MediaPipe') -parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.') -parser.add_argument('--model', '-m', type=str, default='./palm_detection_mediapipe_2023feb.onnx', help='Usage: Set model path, defaults to palm_detection_mediapipe_2023feb.onnx.') -parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) -parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) -parser.add_argument('--score_threshold', type=float, default=0.8, help='Usage: Set the minimum needed confidence for the model to identify a palm, defaults to 0.8. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold. An empirical score threshold for the quantized model is 0.49.') -parser.add_argument('--nms_threshold', type=float, default=0.3, help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.') -parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.') +parser.add_argument('--input', '-i', type=str, + help='Usage: Set path to the input image. Omit for using default camera.') +parser.add_argument('--model', '-m', type=str, default='./palm_detection_mediapipe_2023feb.onnx', + help='Usage: Set model path, defaults to palm_detection_mediapipe_2023feb.onnx.') +parser.add_argument('--backend_target', '-bt', type=int, default=0, + help='''Choose one of the backend-target pair to run this demo: + {:d}: (default) OpenCV implementation + CPU, + {:d}: CUDA + GPU (CUDA), + {:d}: CUDA + GPU (CUDA FP16), + {:d}: TIM-VX + NPU, + {:d}: CANN + NPU + '''.format(*[x for x in range(len(backend_target_pairs))])) +parser.add_argument('--score_threshold', type=float, default=0.8, + help='Usage: Set the minimum needed confidence for the model to identify a palm, defaults to 0.8. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold. An empirical score threshold for the quantized model is 0.49.') +parser.add_argument('--nms_threshold', type=float, default=0.3, + help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.') +parser.add_argument('--save', '-s', action='store_true', + help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.') +parser.add_argument('--vis', '-v', action='store_true', + help='Usage: Specify to open a new window to show results. Invalid in case of camera input.') args = parser.parse_args() def visualize(image, results, print_results=False, fps=None): @@ -71,12 +76,15 @@ def visualize(image, results, print_results=False, fps=None): return output if __name__ == '__main__': + backend_id = backend_target_pairs[args.backend_target][0] + target_id = backend_target_pairs[args.backend_target][1] + # Instantiate MPPalmDet model = MPPalmDet(modelPath=args.model, nmsThreshold=args.nms_threshold, scoreThreshold=args.score_threshold, - backendId=args.backend, - targetId=args.target) + backendId=backend_id, + targetId=target_id) # If input is an image if args.input is not None: @@ -123,4 +131,3 @@ def visualize(image, results, print_results=False, fps=None): cv.imshow('MPPalmDet Demo', frame) tm.reset() - diff --git a/models/palm_detection_mediapipe/mp_palmdet.py b/models/palm_detection_mediapipe/mp_palmdet.py index ec8cd718..fd1f3bad 100644 --- a/models/palm_detection_mediapipe/mp_palmdet.py +++ b/models/palm_detection_mediapipe/mp_palmdet.py @@ -22,12 +22,10 @@ def __init__(self, modelPath, nmsThreshold=0.3, scoreThreshold=0.5, topK=5000, b def name(self): return self.__class__.__name__ - def setBackend(self, backendId): + def setBackendAndTarget(self, backendId, targetId): self.backend_id = backendId - self.model.setPreferableBackend(self.backend_id) - - def setTarget(self, targetId): self.target_id = targetId + self.model.setPreferableBackend(self.backend_id) self.model.setPreferableTarget(self.target_id) def _preprocess(self, image): @@ -35,7 +33,7 @@ def _preprocess(self, image): ratio = min(self.input_size / image.shape[:2]) if image.shape[0] != self.input_size[0] or image.shape[1] != self.input_size[1]: # keep aspect ratio when resize - ratio_size = (np.array(image.shape[:2]) * ratio).astype(np.int) + ratio_size = (np.array(image.shape[:2]) * ratio).astype(np.int32) image = cv.resize(image, (ratio_size[1], ratio_size[0])) pad_h = self.input_size[0] - ratio_size[0] pad_w = self.input_size[1] - ratio_size[1] @@ -46,7 +44,7 @@ def _preprocess(self, image): image = cv.copyMakeBorder(image, top, bottom, left, right, cv.BORDER_CONSTANT, None, (0, 0, 0)) image = cv.cvtColor(image, cv.COLOR_BGR2RGB) image = image.astype(np.float32) / 255.0 # norm - pad_bias = (pad_bias / ratio).astype(np.int) + pad_bias = (pad_bias / ratio).astype(np.int32) return image[np.newaxis, :, :, :], pad_bias # hwc -> nhwc def infer(self, image): diff --git a/models/person_reid_youtureid/demo.py b/models/person_reid_youtureid/demo.py index a6835a97..f4e98432 100644 --- a/models/person_reid_youtureid/demo.py +++ b/models/person_reid_youtureid/demo.py @@ -12,36 +12,41 @@ from youtureid import YoutuReID -def str2bool(v): - if v.lower() in ['on', 'yes', 'true', 'y', 't']: - return True - elif v.lower() in ['off', 'no', 'false', 'n', 'f']: - return False - else: - raise NotImplementedError - -backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA] -targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16] -help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA" -help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" -try: - backends += [cv.dnn.DNN_BACKEND_TIMVX] - targets += [cv.dnn.DNN_TARGET_NPU] - help_msg_backends += "; {:d}: TIMVX" - help_msg_targets += "; {:d}: NPU" -except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') +# Check OpenCV version +assert cv.__version__ >= "4.7.0", \ + "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python" + +# Valid combinations of backends and targets +backend_target_pairs = [ + [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], + [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], + [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] +] parser = argparse.ArgumentParser( description="ReID baseline models from Tencent Youtu Lab") -parser.add_argument('--query_dir', '-q', type=str, help='Query directory.') -parser.add_argument('--gallery_dir', '-g', type=str, help='Gallery directory.') -parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) -parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) -parser.add_argument('--topk', type=int, default=10, help='Top-K closest from gallery for each query.') -parser.add_argument('--model', '-m', type=str, default='person_reid_youtu_2021nov.onnx', help='Path to the model.') -parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') +parser.add_argument('--query_dir', '-q', type=str, + help='Query directory.') +parser.add_argument('--gallery_dir', '-g', type=str, + help='Gallery directory.') +parser.add_argument('--backend_target', '-bt', type=int, default=0, + help='''Choose one of the backend-target pair to run this demo: + {:d}: (default) OpenCV implementation + CPU, + {:d}: CUDA + GPU (CUDA), + {:d}: CUDA + GPU (CUDA FP16), + {:d}: TIM-VX + NPU, + {:d}: CANN + NPU + '''.format(*[x for x in range(len(backend_target_pairs))])) +parser.add_argument('--topk', type=int, default=10, + help='Top-K closest from gallery for each query.') +parser.add_argument('--model', '-m', type=str, default='person_reid_youtu_2021nov.onnx', + help='Path to the model.') +parser.add_argument('--save', '-s', type=str2bool, default=False, + help='Set true to save results. This flag is invalid when using camera.') +parser.add_argument('--vis', '-v', type=str2bool, default=True, + help='Set true to open a window for result visualization. This flag is invalid when using camera.') args = parser.parse_args() def readImageFromDirectory(img_dir, w=128, h=256): @@ -78,8 +83,11 @@ def addBorder(img, color, borderSize=5): return results_vis if __name__ == '__main__': + backend_id = backend_target_pairs[args.backend_target][0] + target_id = backend_target_pairs[args.backend_target][1] + # Instantiate YoutuReID for person ReID - net = YoutuReID(modelPath=args.model, backendId=args.backend, targetId=args.target) + net = YoutuReID(modelPath=args.model, backendId=backend_id, targetId=target_id) # Read images from dir query_img_list, query_file_list = readImageFromDirectory(args.query_dir) diff --git a/models/person_reid_youtureid/youtureid.py b/models/person_reid_youtureid/youtureid.py index fa24483d..b2fafe16 100644 --- a/models/person_reid_youtureid/youtureid.py +++ b/models/person_reid_youtureid/youtureid.py @@ -26,12 +26,10 @@ def __init__(self, modelPath, backendId=0, targetId=0): def name(self): return self.__class__.__name__ - def setBackend(self, backend_id): - self._backendId = backend_id + def setBackendAndTarget(self, backendId, targetId): + self._backendId = backendId + self._targetId = targetId self._model.setPreferableBackend(self._backendId) - - def setTarget(self, target_id): - self._targetId = target_id self._model.setPreferableTarget(self._targetId) def _preprocess(self, image): @@ -67,4 +65,3 @@ def query(self, query_img_list, gallery_img_list, topK=5): dist = np.matmul(query_arr, gallery_arr.T) idx = np.argsort(-dist, axis=1) return [i[0:topK] for i in idx] - diff --git a/models/qrcode_wechatqrcode/demo.py b/models/qrcode_wechatqrcode/demo.py index 47952d11..1f4821db 100644 --- a/models/qrcode_wechatqrcode/demo.py +++ b/models/qrcode_wechatqrcode/demo.py @@ -11,23 +11,43 @@ from wechatqrcode import WeChatQRCode -def str2bool(v): - if v.lower() in ['on', 'yes', 'true', 'y', 't']: - return True - elif v.lower() in ['off', 'no', 'false', 'n', 'f']: - return False - else: - raise NotImplementedError +# Check OpenCV version +assert cv.__version__ >= "4.7.0", \ + "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python" + +# Valid combinations of backends and targets +backend_target_pairs = [ + [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], + [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], + [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] +] parser = argparse.ArgumentParser( description="WeChat QR code detector for detecting and parsing QR code (https://github.com/opencv/opencv_contrib/tree/master/modules/wechat_qrcode)") -parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.') -parser.add_argument('--detect_prototxt_path', type=str, default='detect_2021sep.prototxt', help='Usage: Set path to detect.prototxt.') -parser.add_argument('--detect_model_path', type=str, default='detect_2021sep.caffemodel', help='Usage: Set path to detect.caffemodel.') -parser.add_argument('--sr_prototxt_path', type=str, default='sr_2021sep.prototxt', help='Usage: Set path to sr.prototxt.') -parser.add_argument('--sr_model_path', type=str, default='sr_2021sep.caffemodel', help='Usage: Set path to sr.caffemodel.') -parser.add_argument('--save', '-s', type=str2bool, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.') +parser.add_argument('--input', '-i', type=str, + help='Usage: Set path to the input image. Omit for using default camera.') +parser.add_argument('--detect_prototxt_path', type=str, default='detect_2021sep.prototxt', + help='Usage: Set path to detect.prototxt.') +parser.add_argument('--detect_model_path', type=str, default='detect_2021sep.caffemodel', + help='Usage: Set path to detect.caffemodel.') +parser.add_argument('--sr_prototxt_path', type=str, default='sr_2021sep.prototxt', + help='Usage: Set path to sr.prototxt.') +parser.add_argument('--sr_model_path', type=str, default='sr_2021sep.caffemodel', + help='Usage: Set path to sr.caffemodel.') +parser.add_argument('--backend_target', '-bt', type=int, default=0, + help='''Choose one of the backend-target pair to run this demo: + {:d}: (default) OpenCV implementation + CPU, + {:d}: CUDA + GPU (CUDA), + {:d}: CUDA + GPU (CUDA FP16), + {:d}: TIM-VX + NPU, + {:d}: CANN + NPU + '''.format(*[x for x in range(len(backend_target_pairs))])) +parser.add_argument('--save', '-s', action='store_true', + help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.') +parser.add_argument('--vis', '-v', action='store_true', + help='Usage: Specify to open a new window to show results. Invalid in case of camera input.') args = parser.parse_args() def visualize(image, res, points, points_color=(0, 255, 0), text_color=(0, 255, 0), fps=None): @@ -56,11 +76,16 @@ def visualize(image, res, points, points_color=(0, 255, 0), text_color=(0, 255, if __name__ == '__main__': + backend_id = backend_target_pairs[args.backend_target][0] + target_id = backend_target_pairs[args.backend_target][1] + # Instantiate WeChatQRCode model = WeChatQRCode(args.detect_prototxt_path, args.detect_model_path, args.sr_prototxt_path, - args.sr_model_path) + args.sr_model_path, + backendId=backend_id, + targetId=target_id) # If input is an image: if args.input is not None: @@ -107,4 +132,4 @@ def visualize(image, res, points, points_color=(0, 255, 0), text_color=(0, 255, # Visualize results in a new window cv.imshow('WeChatQRCode Demo', frame) - tm.reset() \ No newline at end of file + tm.reset() diff --git a/models/qrcode_wechatqrcode/wechatqrcode.py b/models/qrcode_wechatqrcode/wechatqrcode.py index 937a03e3..3b0432e9 100644 --- a/models/qrcode_wechatqrcode/wechatqrcode.py +++ b/models/qrcode_wechatqrcode/wechatqrcode.py @@ -8,27 +8,27 @@ import cv2 as cv # needs to have cv.wechat_qrcode_WeChatQRCode, which requires compile from source with opencv_contrib/modules/wechat_qrcode class WeChatQRCode: - def __init__(self, detect_prototxt_path, detect_model_path, sr_prototxt_path, sr_model_path): + def __init__(self, detect_prototxt_path, detect_model_path, sr_prototxt_path, sr_model_path, backendId=0, targetId=0): self._model = cv.wechat_qrcode_WeChatQRCode( detect_prototxt_path, detect_model_path, sr_prototxt_path, sr_model_path ) + if backendId != 0: + raise NotImplementedError("Backend {} is not supported by cv.wechat_qrcode_WeChatQRCode()".format(backendId)) + if targetId != 0: + raise NotImplementedError("Target {} is not supported by cv.wechat_qrcode_WeChatQRCode()") @property def name(self): return self.__class__.__name__ - def setBackend(self, backend_id): - # self._model.setPreferableBackend(backend_id) - if backend_id != 0: - raise NotImplementedError("Backend {} is not supported by cv.wechat_qrcode_WeChatQRCode()") - - def setTarget(self, target_id): - # self._model.setPreferableTarget(target_id) - if target_id != 0: + def setBackendAndTarget(self, backendId, targetId): + if backendId != 0: + raise NotImplementedError("Backend {} is not supported by cv.wechat_qrcode_WeChatQRCode()".format(backendId)) + if targetId != 0: raise NotImplementedError("Target {} is not supported by cv.wechat_qrcode_WeChatQRCode()") def infer(self, image): - return self._model.detectAndDecode(image) \ No newline at end of file + return self._model.detectAndDecode(image) diff --git a/models/text_detection_db/db.py b/models/text_detection_db/db.py index 9f24899f..518c97f7 100644 --- a/models/text_detection_db/db.py +++ b/models/text_detection_db/db.py @@ -38,12 +38,10 @@ def __init__(self, modelPath, inputSize=[736, 736], binaryThreshold=0.3, polygon def name(self): return self.__class__.__name__ - def setBackend(self, backend): - self._backendId = backend + def setBackendAndTarget(self, backendId, targetId): + self._backendId = backendId + self._targetId = targetId self._model.setPreferableBackend(self._backendId) - - def setTarget(self, target): - self._targetId = target self._model.setPreferableTarget(self._targetId) def setInputSize(self, input_size): @@ -55,4 +53,3 @@ def infer(self, image): assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0]) return self._model.detect(image) - diff --git a/models/text_detection_db/demo.py b/models/text_detection_db/demo.py index dbee8310..fd02bc5a 100644 --- a/models/text_detection_db/demo.py +++ b/models/text_detection_db/demo.py @@ -11,41 +11,48 @@ from db import DB -def str2bool(v): - if v.lower() in ['on', 'yes', 'true', 'y', 't']: - return True - elif v.lower() in ['off', 'no', 'false', 'n', 'f']: - return False - else: - raise NotImplementedError - -backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA] -targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16] -help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA" -help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" -try: - backends += [cv.dnn.DNN_BACKEND_TIMVX] - targets += [cv.dnn.DNN_TARGET_NPU] - help_msg_backends += "; {:d}: TIMVX" - help_msg_targets += "; {:d}: NPU" -except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') +# Check OpenCV version +assert cv.__version__ >= "4.7.0", \ + "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python" + +# Valid combinations of backends and targets +backend_target_pairs = [ + [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], + [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], + [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] +] parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).') -parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.') -parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx', help='Usage: Set model path, defaults to text_detection_DB_TD500_resnet18_2021sep.onnx.') -parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) -parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) +parser.add_argument('--input', '-i', type=str, + help='Usage: Set path to the input image. Omit for using default camera.') +parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx', + help='Usage: Set model path, defaults to text_detection_DB_TD500_resnet18_2021sep.onnx.') +parser.add_argument('--backend_target', '-bt', type=int, default=0, + help='''Choose one of the backend-target pair to run this demo: + {:d}: (default) OpenCV implementation + CPU, + {:d}: CUDA + GPU (CUDA), + {:d}: CUDA + GPU (CUDA FP16), + {:d}: TIM-VX + NPU, + {:d}: CANN + NPU + '''.format(*[x for x in range(len(backend_target_pairs))])) parser.add_argument('--width', type=int, default=736, help='Usage: Resize input image to certain width, default = 736. It should be multiple by 32.') parser.add_argument('--height', type=int, default=736, help='Usage: Resize input image to certain height, default = 736. It should be multiple by 32.') -parser.add_argument('--binary_threshold', type=float, default=0.3, help='Usage: Threshold of the binary map, default = 0.3.') -parser.add_argument('--polygon_threshold', type=float, default=0.5, help='Usage: Threshold of polygons, default = 0.5.') -parser.add_argument('--max_candidates', type=int, default=200, help='Usage: Set maximum number of polygon candidates, default = 200.') -parser.add_argument('--unclip_ratio', type=np.float64, default=2.0, help=' Usage: The unclip ratio of the detected text region, which determines the output size, default = 2.0.') -parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.') +parser.add_argument('--binary_threshold', type=float, default=0.3, + help='Usage: Threshold of the binary map, default = 0.3.') +parser.add_argument('--polygon_threshold', type=float, default=0.5, + help='Usage: Threshold of polygons, default = 0.5.') +parser.add_argument('--max_candidates', type=int, default=200, + help='Usage: Set maximum number of polygon candidates, default = 200.') +parser.add_argument('--unclip_ratio', type=np.float64, default=2.0, + help=' Usage: The unclip ratio of the detected text region, which determines the output size, default = 2.0.') +parser.add_argument('--save', '-s', action='store_true', + help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.') +parser.add_argument('--vis', '-v', action='store_true', + help='Usage: Specify to open a new window to show results. Invalid in case of camera input.') args = parser.parse_args() def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isClosed=True, thickness=2, fps=None): @@ -60,6 +67,9 @@ def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isC return output if __name__ == '__main__': + backend_id = backend_target_pairs[args.backend_target][0] + target_id = backend_target_pairs[args.backend_target][1] + # Instantiate DB model = DB(modelPath=args.model, inputSize=[args.width, args.height], @@ -67,9 +77,8 @@ def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isC polygonThreshold=args.polygon_threshold, maxCandidates=args.max_candidates, unclipRatio=args.unclip_ratio, - backendId=args.backend, - targetId=args.target - ) + backendId=backend_id, + targetId=target_id) # If input is an image if args.input is not None: @@ -143,4 +152,3 @@ def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isC cv.imshow('{} Demo'.format(model.name), original_image) tm.reset() - diff --git a/models/text_recognition_crnn/crnn.py b/models/text_recognition_crnn/crnn.py index 74c821e2..20860c5f 100644 --- a/models/text_recognition_crnn/crnn.py +++ b/models/text_recognition_crnn/crnn.py @@ -43,12 +43,10 @@ def name(self): def _load_charset(self, charset): return ''.join(charset.splitlines()) - def setBackend(self, backend_id): - self._backendId = backend_id + def setBackendAndTarget(self, backendId, targetId): + self._backendId = backendId + self._targetId = targetId self._model.setPreferableBackend(self._backendId) - - def setTarget(self, target_id): - self._targetId = target_id self._model.setPreferableTarget(self._targetId) def _preprocess(self, image, rbbox): diff --git a/models/text_recognition_crnn/demo.py b/models/text_recognition_crnn/demo.py index 66b5bea0..cddbd1e0 100644 --- a/models/text_recognition_crnn/demo.py +++ b/models/text_recognition_crnn/demo.py @@ -15,38 +15,41 @@ sys.path.append('../text_detection_db') from db import DB -def str2bool(v): - if v.lower() in ['on', 'yes', 'true', 'y', 't']: - return True - elif v.lower() in ['off', 'no', 'false', 'n', 'f']: - return False - else: - raise NotImplementedError - -backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA] -targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16] -help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA" -help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" -try: - backends += [cv.dnn.DNN_BACKEND_TIMVX] - targets += [cv.dnn.DNN_TARGET_NPU] - help_msg_backends += "; {:d}: TIMVX" - help_msg_targets += "; {:d}: NPU" -except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') +# Check OpenCV version +assert cv.__version__ >= "4.7.0", \ + "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python" + +# Valid combinations of backends and targets +backend_target_pairs = [ + [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], + [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], + [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], + [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] +] parser = argparse.ArgumentParser( description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)") -parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.') -parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx', help='Usage: Set model path, defaults to text_recognition_CRNN_EN_2021sep.onnx.') -parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) -parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) -parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save a file with results. Invalid in case of camera input. Default will be set to “False”.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.') +parser.add_argument('--input', '-i', type=str, + help='Usage: Set path to the input image. Omit for using default camera.') +parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx', + help='Usage: Set model path, defaults to text_recognition_CRNN_EN_2021sep.onnx.') +parser.add_argument('--backend_target', '-bt', type=int, default=0, + help='''Choose one of the backend-target pair to run this demo: + {:d}: (default) OpenCV implementation + CPU, + {:d}: CUDA + GPU (CUDA), + {:d}: CUDA + GPU (CUDA FP16), + {:d}: TIM-VX + NPU, + {:d}: CANN + NPU + '''.format(*[x for x in range(len(backend_target_pairs))])) parser.add_argument('--width', type=int, default=736, help='Preprocess input image by resizing to a specific width. It should be multiple by 32.') parser.add_argument('--height', type=int, default=736, help='Preprocess input image by resizing to a specific height. It should be multiple by 32.') +parser.add_argument('--save', '-s', action='store_true', + help='Usage: Specify to save a file with results. Invalid in case of camera input.') +parser.add_argument('--vis', '-v', action='store_true', + help='Usage: Specify to open a new window to show results. Invalid in case of camera input.') args = parser.parse_args() def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2): @@ -59,8 +62,9 @@ def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2 return output if __name__ == '__main__': - # Instantiate CRNN for text recognition - recognizer = CRNN(modelPath=args.model) + backend_id = backend_target_pairs[args.backend_target][0] + target_id = backend_target_pairs[args.backend_target][1] + # Instantiate DB for text detection detector = DB(modelPath='../text_detection_db/text_detection_DB_IC15_resnet18_2021sep.onnx', inputSize=[args.width, args.height], @@ -68,9 +72,10 @@ def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2 polygonThreshold=0.5, maxCandidates=200, unclipRatio=2.0, - backendId=args.backend, - targetId=args.target - ) + backendId=backend_id, + targetId=target_id) + # Instantiate CRNN for text recognition + recognizer = CRNN(modelPath=args.model, backendId=backend_id, targetId=target_id) # If input is an image if args.input is not None: @@ -161,4 +166,3 @@ def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2 # Visualize results in a new Window cv.imshow('{} Demo'.format(recognizer.name), original_image) -