From f7c288143430c04422f8425f6f2186b6c4adaa9f Mon Sep 17 00:00:00 2001 From: Satyam Goyal Date: Sun, 27 Nov 2022 03:20:15 -0800 Subject: [PATCH] Merge pull request #95 from Satgoy152:adding-doc Improved help messages for demo programs (#95) - Added Demo Documentation - Updated help messages - Changed exception link --- README.md | 20 +++++++++------- models/face_detection_yunet/README.md | 9 +++++-- models/face_detection_yunet/demo.py | 18 +++++++------- models/face_recognition_sface/README.md | 13 ++++++---- models/face_recognition_sface/demo.py | 16 ++++++------- models/handpose_estimation_mediapipe/demo.py | 2 +- .../human_segmentation_pphumanseg/README.md | 8 +++++-- models/human_segmentation_pphumanseg/demo.py | 10 ++++---- .../image_classification_mobilenet/README.md | 17 +++++++------ models/image_classification_mobilenet/demo.py | 8 +++---- .../image_classification_ppresnet/README.md | 13 ++++++---- models/image_classification_ppresnet/demo.py | 8 +++---- .../license_plate_detection_yunet/README.md | 6 ++++- models/license_plate_detection_yunet/demo.py | 18 +++++++------- models/object_tracking_dasiamrpn/README.md | 7 +++++- models/object_tracking_dasiamrpn/demo.py | 12 +++++----- models/palm_detection_mediapipe/README.md | 7 +++++- models/palm_detection_mediapipe/demo.py | 14 +++++------ models/person_reid_youtureid/README.md | 11 ++++++--- models/person_reid_youtureid/demo.py | 2 +- models/qrcode_wechatqrcode/README.md | 7 +++++- models/qrcode_wechatqrcode/demo.py | 14 +++++------ models/text_detection_db/README.md | 8 ++++++- models/text_detection_db/demo.py | 22 ++++++++--------- models/text_recognition_crnn/README.md | 24 +++++++++++++------ models/text_recognition_crnn/demo.py | 12 +++++----- 26 files changed, 184 insertions(+), 122 deletions(-) diff --git a/README.md b/README.md index e5bb5904..8957f58b 100644 --- a/README.md +++ b/README.md @@ -3,19 +3,20 @@ A zoo for models tuned for OpenCV DNN with benchmarks on different platforms. Guidelines: + - Clone this repo to download all models and demo scripts: - ```shell - # Install git-lfs from https://git-lfs.github.com/ - git clone https://github.com/opencv/opencv_zoo && cd opencv_zoo - git lfs install - git lfs pull - ``` + ```shell + # Install git-lfs from https://git-lfs.github.com/ + git clone https://github.com/opencv/opencv_zoo && cd opencv_zoo + git lfs install + git lfs pull + ``` - To run benchmarks on your hardware settings, please refer to [benchmark/README](./benchmark/README.md). ## Models & Benchmark Results -| Model | Task | Input Size | INTEL-CPU (ms) | RPI-CPU (ms) | JETSON-GPU (ms) | KV3-NPU (ms) | D1-CPU (ms) | -|---------------------------------------------------------|-------------------------------|------------|----------------|--------------|-----------------|--------------|-------------| +| Model | Task | Input Size | INTEL-CPU (ms) | RPI-CPU (ms) | JETSON-GPU (ms) | KV3-NPU (ms) | D1-CPU (ms) | +| ---------------------------------------------------- | ----------------------------- | ---------- | -------------- | ------------ | --------------- | ------------ | ----------- | | [YuNet](./models/face_detection_yunet) | Face Detection | 160x120 | 1.45 | 6.22 | 12.18 | 4.04 | 86.69 | | [SFace](./models/face_recognition_sface) | Face Recognition | 112x112 | 8.65 | 99.20 | 24.88 | 46.25 | --- | | [LPD-YuNet](./models/license_plate_detection_yunet/) | License Plate Detection | 320x240 | --- | 168.03 | 56.12 | 154.20\* | | @@ -36,6 +37,7 @@ Guidelines: \*: Models are quantized in per-channel mode, which run slower than per-tensor quantized models on NPU. Hardware Setup: + - `INTEL-CPU`: [Intel Core i7-5930K](https://www.intel.com/content/www/us/en/products/sku/82931/intel-core-i75930k-processor-15m-cache-up-to-3-70-ghz/specifications.html) @ 3.50GHz, 6 cores, 12 threads. - `RPI-CPU`: [Raspberry Pi 4B](https://www.raspberrypi.com/products/raspberry-pi-4-model-b/specifications/), Broadcom BCM2711, Quad core Cortex-A72 (ARM v8) 64-bit SoC @ 1.5GHz. - `JETSON-GPU`: [NVIDIA Jetson Nano B01](https://developer.nvidia.com/embedded/jetson-nano-developer-kit), 128-core NVIDIA Maxwell GPU. @@ -43,6 +45,7 @@ Hardware Setup: - `D1-CPU`: [Allwinner D1](https://d1.docs.aw-ol.com/en), [Xuantie C906 CPU](https://www.t-head.cn/product/C906?spm=a2ouz.12986968.0.0.7bfc1384auGNPZ) (RISC-V, RVV 0.7.1) @ 1.0GHz, 1 core. YuNet is supported for now. Visit [here](https://github.com/fengyuentau/opencv_zoo_cpp) for more details. ***Important Notes***: + - The data under each column of hardware setups on the above table represents the elapsed time of an inference (preprocess, forward and postprocess). - The time data is the median of 10 runs after some warmup runs. Different metrics may be applied to some specific models. - Batch size is 1 for all benchmark results. @@ -52,6 +55,7 @@ Hardware Setup: ## Some Examples Some examples are listed below. You can find more in the directory of each model! + ### Face Detection with [YuNet](./models/face_detection_yunet/) ![largest selfie](./models/face_detection_yunet/examples/largest_selfie.jpg) diff --git a/models/face_detection_yunet/README.md b/models/face_detection_yunet/README.md index ea5e3cfd..b3cf740b 100644 --- a/models/face_detection_yunet/README.md +++ b/models/face_detection_yunet/README.md @@ -3,14 +3,15 @@ YuNet is a light-weight, fast and accurate face detection model, which achieves 0.834(AP_easy), 0.824(AP_medium), 0.708(AP_hard) on the WIDER Face validation set. Notes: + - Model source: [here](https://github.com/ShiqiYu/libfacedetection.train/blob/a61a428929148171b488f024b5d6774f93cdbc13/tasks/task1/onnx/yunet.onnx). - For details on training this model, please visit https://github.com/ShiqiYu/libfacedetection.train. - This ONNX model has fixed input shape, but OpenCV DNN infers on the exact shape of input image. See https://github.com/opencv/opencv_zoo/issues/44 for more information. Results of accuracy evaluation with [tools/eval](../../tools/eval). -| Models | Easy AP | Medium AP | Hard AP | -|-------------|---------|-----------|---------| +| Models | Easy AP | Medium AP | Hard AP | +| ----------- | ------- | --------- | ------- | | YuNet | 0.8498 | 0.8384 | 0.7357 | | YuNet quant | 0.7751 | 0.8145 | 0.7312 | @@ -19,11 +20,15 @@ Results of accuracy evaluation with [tools/eval](../../tools/eval). ## Demo Run the following command to try the demo: + ```shell # detect on camera input python demo.py # detect on an image python demo.py --input /path/to/image + +# get help regarding various parameters +python demo.py --help ``` ### Example outputs diff --git a/models/face_detection_yunet/demo.py b/models/face_detection_yunet/demo.py index dcb7621f..00b099e0 100644 --- a/models/face_detection_yunet/demo.py +++ b/models/face_detection_yunet/demo.py @@ -22,25 +22,25 @@ def str2bool(v): backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA] targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16] help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA" -help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" +help_msg_targets = "Choose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" try: backends += [cv.dnn.DNN_BACKEND_TIMVX] targets += [cv.dnn.DNN_TARGET_NPU] help_msg_backends += "; {:d}: TIMVX" help_msg_targets += "; {:d}: NPU" except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.') + print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).') -parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.') -parser.add_argument('--model', '-m', type=str, default='face_detection_yunet_2022mar.onnx', help='Path to the model.') +parser.add_argument('--input', '-i', type=str, help='Usage: Set input to a certain image, omit if using camera.') +parser.add_argument('--model', '-m', type=str, default='face_detection_yunet_2022mar.onnx', help="Usage: Set model type, defaults to 'face_detection_yunet_2022mar.onnx'.") parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) -parser.add_argument('--conf_threshold', type=float, default=0.9, help='Filter out faces of confidence < conf_threshold.') -parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.') -parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.') -parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') +parser.add_argument('--conf_threshold', type=float, default=0.9, help='Usage: Set the minimum needed confidence for the model to identify a face, defauts to 0.9. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold.') +parser.add_argument('--nms_threshold', type=float, default=0.3, help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.') +parser.add_argument('--top_k', type=int, default=5000, help='Usage: Keep top_k bounding boxes before NMS.') +parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.') +parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.') args = parser.parse_args() def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps=None): diff --git a/models/face_recognition_sface/README.md b/models/face_recognition_sface/README.md index 10ab8137..aab5cff4 100644 --- a/models/face_recognition_sface/README.md +++ b/models/face_recognition_sface/README.md @@ -3,30 +3,33 @@ SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition Note: + - SFace is contributed by [Yaoyao Zhong](https://github.com/zhongyy/SFace). - [face_recognition_sface_2021sep.onnx](./face_recognition_sface_2021sep.onnx) is converted from the model from https://github.com/zhongyy/SFace thanks to [Chengrui Wang](https://github.com/crywang). - Support 5-landmark warpping for now (2021sep) Results of accuracy evaluation with [tools/eval](../../tools/eval). -| Models | Accuracy | -|-------------|----------| +| Models | Accuracy | +| ----------- | -------- | | SFace | 0.9940 | | SFace quant | 0.9932 | \*: 'quant' stands for 'quantized'. - ## Demo ***NOTE***: This demo uses [../face_detection_yunet](../face_detection_yunet) as face detector, which supports 5-landmark detection for now (2021sep). Run the following command to try the demo: + ```shell # recognize on images python demo.py --input1 /path/to/image1 --input2 /path/to/image2 -``` +# get help regarding various parameters +python demo.py --help +``` ## License @@ -35,4 +38,4 @@ All files in this directory are licensed under [Apache 2.0 License](./LICENSE). ## Reference - https://ieeexplore.ieee.org/document/9318547 -- https://github.com/zhongyy/SFace \ No newline at end of file +- https://github.com/zhongyy/SFace diff --git a/models/face_recognition_sface/demo.py b/models/face_recognition_sface/demo.py index 95023522..76fa100f 100644 --- a/models/face_recognition_sface/demo.py +++ b/models/face_recognition_sface/demo.py @@ -25,7 +25,7 @@ def str2bool(v): backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA] targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16] -help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA" +help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA \n Usage: Set backend DNN model, defaults to cv.dnn.DNN_BACKEND_OPENCV (int = 0). Based on your OpenCV version, it may or may not support cv.dnn.DNN_BACKEND_TIMVX. More details: [https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f]" help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" try: backends += [cv.dnn.DNN_BACKEND_TIMVX] @@ -33,18 +33,18 @@ def str2bool(v): help_msg_backends += "; {:d}: TIMVX" help_msg_targets += "; {:d}: NPU" except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.') + print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') parser = argparse.ArgumentParser( description="SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition (https://ieeexplore.ieee.org/document/9318547)") -parser.add_argument('--input1', '-i1', type=str, help='Path to the input image 1.') -parser.add_argument('--input2', '-i2', type=str, help='Path to the input image 2.') -parser.add_argument('--model', '-m', type=str, default='face_recognition_sface_2021dec.onnx', help='Path to the model.') +parser.add_argument('--input1', '-i1', type=str, help='Usage: Set path to the input image 1 (original face).') +parser.add_argument('--input2', '-i2', type=str, help='Usage: Set path to the input image 2 (comparison face).') +parser.add_argument('--model', '-m', type=str, default='face_recognition_sface_2021dec.onnx', help='Usage: Set model path, defaults to face_recognition_sface_2021dec.onnx.') parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) -parser.add_argument('--dis_type', type=int, choices=[0, 1], default=0, help='Distance type. \'0\': cosine, \'1\': norm_l1.') -parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') +parser.add_argument('--dis_type', type=int, choices=[0, 1], default=0, help='Usage: Distance type. \'0\': cosine, \'1\': norm_l1. Defaults to \'0\'') +parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.') +parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.') args = parser.parse_args() if __name__ == '__main__': diff --git a/models/handpose_estimation_mediapipe/demo.py b/models/handpose_estimation_mediapipe/demo.py index 33429547..ade0b96d 100644 --- a/models/handpose_estimation_mediapipe/demo.py +++ b/models/handpose_estimation_mediapipe/demo.py @@ -27,7 +27,7 @@ def str2bool(v): help_msg_backends += "; {:d}: TIMVX" help_msg_targets += "; {:d}: NPU" except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.') + print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') parser = argparse.ArgumentParser(description='Hand Pose Estimation from MediaPipe') parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.') diff --git a/models/human_segmentation_pphumanseg/README.md b/models/human_segmentation_pphumanseg/README.md index 257e54b0..c59f253a 100644 --- a/models/human_segmentation_pphumanseg/README.md +++ b/models/human_segmentation_pphumanseg/README.md @@ -5,14 +5,18 @@ This model is ported from [PaddleHub](https://github.com/PaddlePaddle/PaddleHub) ## Demo Run the following command to try the demo: + ```shell # detect on camera input python demo.py # detect on an image python demo.py --input /path/to/image + +# get help regarding various parameters +python demo.py --help ``` -## Example outputs +### Example outputs ![webcam demo](./examples/pphumanseg_demo.gif) @@ -26,4 +30,4 @@ All files in this directory are licensed under [Apache 2.0 License](./LICENSE). - https://arxiv.org/abs/1512.03385 - https://github.com/opencv/opencv/tree/master/samples/dnn/dnn_model_runner/dnn_conversion/paddlepaddle -- https://github.com/PaddlePaddle/PaddleHub \ No newline at end of file +- https://github.com/PaddlePaddle/PaddleHub diff --git a/models/human_segmentation_pphumanseg/demo.py b/models/human_segmentation_pphumanseg/demo.py index 4f7ac363..d5b14697 100644 --- a/models/human_segmentation_pphumanseg/demo.py +++ b/models/human_segmentation_pphumanseg/demo.py @@ -29,15 +29,15 @@ def str2bool(v): help_msg_backends += "; {:d}: TIMVX" help_msg_targets += "; {:d}: NPU" except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.') + print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') parser = argparse.ArgumentParser(description='PPHumanSeg (https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/PP-HumanSeg)') -parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.') -parser.add_argument('--model', '-m', type=str, default='human_segmentation_pphumanseg_2021oct.onnx', help='Path to the model.') +parser.add_argument('--input', '-i', type=str, help='Usage: Set input path to a certain image, omit if using camera.') +parser.add_argument('--model', '-m', type=str, default='human_segmentation_pphumanseg_2021oct.onnx', help='Usage: Set model path, defaults to human_segmentation_pphumanseg_2021oct.onnx.') parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) -parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') +parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save a file with results. Invalid in case of camera input. Default will be set to “False”.') +parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.') args = parser.parse_args() def get_color_map_list(num_classes): diff --git a/models/image_classification_mobilenet/README.md b/models/image_classification_mobilenet/README.md index 8f4d1024..96c668c7 100644 --- a/models/image_classification_mobilenet/README.md +++ b/models/image_classification_mobilenet/README.md @@ -6,23 +6,27 @@ MobileNetV2: Inverted Residuals and Linear Bottlenecks Results of accuracy evaluation with [tools/eval](../../tools/eval). -| Models | Top-1 Accuracy | Top-5 Accuracy | -| ------ | -------------- | -------------- | -| MobileNet V1 | 67.64 | 87.97 | -| MobileNet V1 quant | 55.53 | 78.74 | -| MobileNet V2 | 69.44 | 89.23 | -| MobileNet V2 quant | 68.37 | 88.56 | +| Models | Top-1 Accuracy | Top-5 Accuracy | +| ------------------ | -------------- | -------------- | +| MobileNet V1 | 67.64 | 87.97 | +| MobileNet V1 quant | 55.53 | 78.74 | +| MobileNet V2 | 69.44 | 89.23 | +| MobileNet V2 quant | 68.37 | 88.56 | \*: 'quant' stands for 'quantized'. ## Demo Run the following command to try the demo: + ```shell # MobileNet V1 python demo.py --input /path/to/image # MobileNet V2 python demo.py --input /path/to/image --model v2 + +# get help regarding various parameters +python demo.py --help ``` ## License @@ -35,4 +39,3 @@ All files in this directory are licensed under [Apache 2.0 License](./LICENSE). - MobileNet V2: https://arxiv.org/abs/1801.04381 - MobileNet V1 weight and scripts for training: https://github.com/wjc852456/pytorch-mobilenet-v1 - MobileNet V2 weight: https://github.com/onnx/models/tree/main/vision/classification/mobilenet - diff --git a/models/image_classification_mobilenet/demo.py b/models/image_classification_mobilenet/demo.py index 0d76a1a0..0af322ac 100644 --- a/models/image_classification_mobilenet/demo.py +++ b/models/image_classification_mobilenet/demo.py @@ -24,14 +24,14 @@ def str2bool(v): help_msg_backends += "; {:d}: TIMVX" help_msg_targets += "; {:d}: NPU" except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.') + print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') parser = argparse.ArgumentParser(description='Demo for MobileNet V1 & V2.') -parser.add_argument('--input', '-i', type=str, help='Path to the input image.') -parser.add_argument('--model', '-m', type=str, choices=['v1', 'v2', 'v1-q', 'v2-q'], default='v1', help='Which model to use, either v1 or v2.') +parser.add_argument('--input', '-i', type=str, help='Usage: Set input path to a certain image, omit if using camera.') +parser.add_argument('--model', '-m', type=str, choices=['v1', 'v2', 'v1-q', 'v2-q'], default='v1', help='Usage: Set model type, defaults to image_classification_mobilenetv1_2022apr.onnx (v1).') parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) -parser.add_argument('--label', '-l', type=str, default='./imagenet_labels.txt', help='Path to the dataset labels.') +parser.add_argument('--label', '-l', type=str, default='./imagenet_labels.txt', help='Usage: Set path to the different labels that will be used during the detection. Default list found in imagenet_labels.txt') args = parser.parse_args() if __name__ == '__main__': diff --git a/models/image_classification_ppresnet/README.md b/models/image_classification_ppresnet/README.md index 6707743a..01353cce 100644 --- a/models/image_classification_ppresnet/README.md +++ b/models/image_classification_ppresnet/README.md @@ -6,18 +6,22 @@ This model is ported from [PaddleHub](https://github.com/PaddlePaddle/PaddleHub) Results of accuracy evaluation with [tools/eval](../../tools/eval). -| Models | Top-1 Accuracy | Top-5 Accuracy | -| ------ | -------------- | -------------- | -| PP-ResNet | 82.28 | 96.15 | -| PP-ResNet quant | 0.22 | 0.96 | +| Models | Top-1 Accuracy | Top-5 Accuracy | +| --------------- | -------------- | -------------- | +| PP-ResNet | 82.28 | 96.15 | +| PP-ResNet quant | 0.22 | 0.96 | \*: 'quant' stands for 'quantized'. ## Demo Run the following command to try the demo: + ```shell python demo.py --input /path/to/image + +# get help regarding various parameters +python demo.py --help ``` ## License @@ -29,4 +33,3 @@ All files in this directory are licensed under [Apache 2.0 License](./LICENSE). - https://arxiv.org/abs/1512.03385 - https://github.com/opencv/opencv/tree/master/samples/dnn/dnn_model_runner/dnn_conversion/paddlepaddle - https://github.com/PaddlePaddle/PaddleHub - diff --git a/models/image_classification_ppresnet/demo.py b/models/image_classification_ppresnet/demo.py index b11cf79e..1109be09 100644 --- a/models/image_classification_ppresnet/demo.py +++ b/models/image_classification_ppresnet/demo.py @@ -29,14 +29,14 @@ def str2bool(v): help_msg_backends += "; {:d}: TIMVX" help_msg_targets += "; {:d}: NPU" except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.') + print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') parser = argparse.ArgumentParser(description='Deep Residual Learning for Image Recognition (https://arxiv.org/abs/1512.03385, https://github.com/PaddlePaddle/PaddleHub)') -parser.add_argument('--input', '-i', type=str, help='Path to the input image.') -parser.add_argument('--model', '-m', type=str, default='image_classification_ppresnet50_2022jan.onnx', help='Path to the model.') +parser.add_argument('--input', '-i', type=str, help='Usage: Set input path to a certain image, omit if using camera.') +parser.add_argument('--model', '-m', type=str, default='image_classification_ppresnet50_2022jan.onnx', help='Usage: Set model path, defaults to image_classification_ppresnet50_2022jan.onnx.') parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) -parser.add_argument('--label', '-l', type=str, default='./imagenet_labels.txt', help='Path to the dataset labels.') +parser.add_argument('--label', '-l', type=str, default='./imagenet_labels.txt', help='Usage: Set path to the different labels that will be used during the detection. Default list found in imagenet_labels.txt') args = parser.parse_args() if __name__ == '__main__': diff --git a/models/license_plate_detection_yunet/README.md b/models/license_plate_detection_yunet/README.md index 75f07e8d..2b30ddf5 100644 --- a/models/license_plate_detection_yunet/README.md +++ b/models/license_plate_detection_yunet/README.md @@ -7,11 +7,14 @@ Please note that the model is trained with Chinese license plates, so the detect ## Demo Run the following command to try the demo: + ```shell # detect on camera input python demo.py # detect on an image python demo.py --input /path/to/image +# get help regarding various parameters +python demo.py --help ``` ### Example outputs @@ -19,8 +22,9 @@ python demo.py --input /path/to/image ![lpd](./examples/lpd_yunet_demo.gif) ## License + All files in this directory are licensed under [Apache 2.0 License](./LICENSE) ## Reference - - https://github.com/ShiqiYu/libfacedetection.train +- https://github.com/ShiqiYu/libfacedetection.train diff --git a/models/license_plate_detection_yunet/demo.py b/models/license_plate_detection_yunet/demo.py index d601025f..daa2c87f 100644 --- a/models/license_plate_detection_yunet/demo.py +++ b/models/license_plate_detection_yunet/demo.py @@ -23,19 +23,19 @@ def str2bool(v): help_msg_backends += "; {:d}: TIMVX" help_msg_targets += "; {:d}: NPU" except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.') + print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') parser = argparse.ArgumentParser(description='LPD-YuNet for License Plate Detection') -parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.') -parser.add_argument('--model', '-m', type=str, default='license_plate_detection_lpd_yunet_2022may.onnx', help='Path to the model.') +parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.') +parser.add_argument('--model', '-m', type=str, default='license_plate_detection_lpd_yunet_2022may.onnx', help='Usage: Set model path, defaults to license_plate_detection_lpd_yunet_2022may.onnx.') parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) -parser.add_argument('--conf_threshold', type=float, default=0.9, help='Filter out faces of confidence < conf_threshold.') -parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.') -parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.') -parser.add_argument('--keep_top_k', type=int, default=750, help='Keep keep_top_k bounding boxes after NMS.') -parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') +parser.add_argument('--conf_threshold', type=float, default=0.9, help='Usage: Set the minimum needed confidence for the model to identify a license plate, defaults to 0.9. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold.') +parser.add_argument('--nms_threshold', type=float, default=0.3, help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3. Suppress bounding boxes of iou >= nms_threshold.') +parser.add_argument('--top_k', type=int, default=5000, help='Usage: Keep top_k bounding boxes before NMS.') +parser.add_argument('--keep_top_k', type=int, default=750, help='Usage: Keep keep_top_k bounding boxes after NMS.') +parser.add_argument('--save', '-s', type=str2bool, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.') +parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.') args = parser.parse_args() def visualize(image, dets, line_color=(0, 255, 0), text_color=(0, 0, 255), fps=None): diff --git a/models/object_tracking_dasiamrpn/README.md b/models/object_tracking_dasiamrpn/README.md index c765975b..24be3473 100644 --- a/models/object_tracking_dasiamrpn/README.md +++ b/models/object_tracking_dasiamrpn/README.md @@ -3,17 +3,22 @@ [Distractor-aware Siamese Networks for Visual Object Tracking](https://arxiv.org/abs/1808.06048) Note: + - Model source: [opencv/samples/dnn/diasiamrpn_tracker.cpp](https://github.com/opencv/opencv/blob/ceb94d52a104c0c1287a43dfa6ba72705fb78ac1/samples/dnn/dasiamrpn_tracker.cpp#L5-L7) - Visit https://github.com/foolwood/DaSiamRPN for training details. ## Demo Run the following command to try the demo: + ```shell # track on camera input python demo.py # track on video input python demo.py --input /path/to/video + +# get help regarding various parameters +python demo.py --help ``` ### Example outputs @@ -29,4 +34,4 @@ All files in this directory are licensed under [Apache 2.0 License](./LICENSE). - DaSiamRPN Official Repository: https://github.com/foolwood/DaSiamRPN - Paper: https://arxiv.org/abs/1808.06048 - OpenCV API `TrackerDaSiamRPN` Doc: https://docs.opencv.org/4.x/de/d93/classcv_1_1TrackerDaSiamRPN.html -- OpenCV Sample: https://github.com/opencv/opencv/blob/4.x/samples/dnn/dasiamrpn_tracker.cpp \ No newline at end of file +- OpenCV Sample: https://github.com/opencv/opencv/blob/4.x/samples/dnn/dasiamrpn_tracker.cpp diff --git a/models/object_tracking_dasiamrpn/demo.py b/models/object_tracking_dasiamrpn/demo.py index 319b5c53..11aeef6e 100644 --- a/models/object_tracking_dasiamrpn/demo.py +++ b/models/object_tracking_dasiamrpn/demo.py @@ -21,12 +21,12 @@ def str2bool(v): parser = argparse.ArgumentParser( description="Distractor-aware Siamese Networks for Visual Object Tracking (https://arxiv.org/abs/1808.06048)") -parser.add_argument('--input', '-i', type=str, help='Path to the input video. Omit for using default camera.') -parser.add_argument('--model_path', type=str, default='object_tracking_dasiamrpn_model_2021nov.onnx', help='Path to dasiamrpn_model.onnx.') -parser.add_argument('--kernel_cls1_path', type=str, default='object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx', help='Path to dasiamrpn_kernel_cls1.onnx.') -parser.add_argument('--kernel_r1_path', type=str, default='object_tracking_dasiamrpn_kernel_r1_2021nov.onnx', help='Path to dasiamrpn_kernel_r1.onnx.') -parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') +parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input video. Omit for using default camera.') +parser.add_argument('--model_path', type=str, default='object_tracking_dasiamrpn_model_2021nov.onnx', help='Usage: Set model path, defaults to object_tracking_dasiamrpn_model_2021nov.onnx.') +parser.add_argument('--kernel_cls1_path', type=str, default='object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx', help='Usage: Set path to dasiamrpn_kernel_cls1.onnx.') +parser.add_argument('--kernel_r1_path', type=str, default='object_tracking_dasiamrpn_kernel_r1_2021nov.onnx', help='Usage: Set path to dasiamrpn_kernel_r1.onnx.') +parser.add_argument('--save', '-s', type=str2bool, default=False, help='Usage: Set “True” to save a file with results. Invalid in case of camera input. Default will be set to “False”.') +parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.') args = parser.parse_args() def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),text_color=(0, 255, 0), fontScale = 1, fontSize = 1): diff --git a/models/palm_detection_mediapipe/README.md b/models/palm_detection_mediapipe/README.md index ca001e4f..ab761bcf 100644 --- a/models/palm_detection_mediapipe/README.md +++ b/models/palm_detection_mediapipe/README.md @@ -1,20 +1,25 @@ # Palm detector from MediaPipe Handpose This model detects palm bounding boxes and palm landmarks, and is converted from Tensorflow-JS to ONNX using following tools: + - tfjs to tf_saved_model: https://github.com/patlevin/tfjs-to-tf/ - tf_saved_model to ONNX: https://github.com/onnx/tensorflow-onnx - simplified by [onnx-simplifier](https://github.com/daquexian/onnx-simplifier) -Also note that the model is quantized in per-channel mode with [Intel's neural compressor](https://github.com/intel/neural-compressor), which gives better accuracy but may lose some speed. +Also note that the model is quantized in per-channel mode with [Intel's neural compressor](https://github.com/intel/neural-compressor), which gives better accuracy but may lose some speed. ## Demo Run the following commands to try the demo: + ```bash # detect on camera input python demo.py # detect on an image python demo.py -i /path/to/image + +# get help regarding various parameters +python demo.py --help ``` ### Example outputs diff --git a/models/palm_detection_mediapipe/demo.py b/models/palm_detection_mediapipe/demo.py index 8b38e680..b60df336 100644 --- a/models/palm_detection_mediapipe/demo.py +++ b/models/palm_detection_mediapipe/demo.py @@ -23,17 +23,17 @@ def str2bool(v): help_msg_backends += "; {:d}: TIMVX" help_msg_targets += "; {:d}: NPU" except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.') + print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') parser = argparse.ArgumentParser(description='Hand Detector from MediaPipe') -parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.') -parser.add_argument('--model', '-m', type=str, default='./palm_detection_mediapipe_2022may.onnx', help='Path to the model.') +parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.') +parser.add_argument('--model', '-m', type=str, default='./palm_detection_mediapipe_2022may.onnx', help='Usage: Set model path, defaults to palm_detection_mediapipe_2022may.onnx.') parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) -parser.add_argument('--score_threshold', type=float, default=0.99, help='Filter out faces of confidence < conf_threshold. An empirical score threshold for the quantized model is 0.49.') -parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.') -parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') +parser.add_argument('--score_threshold', type=float, default=0.99, help='Usage: Set the minimum needed confidence for the model to identify a palm, defaults to 0.99. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold. An empirical score threshold for the quantized model is 0.49.') +parser.add_argument('--nms_threshold', type=float, default=0.3, help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.') +parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.') +parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.') args = parser.parse_args() def visualize(image, results, print_results=False, fps=None): diff --git a/models/person_reid_youtureid/README.md b/models/person_reid_youtureid/README.md index 441c1c5a..e62287e6 100644 --- a/models/person_reid_youtureid/README.md +++ b/models/person_reid_youtureid/README.md @@ -3,20 +3,25 @@ This model is provided by Tencent Youtu Lab [[Credits]](https://github.com/opencv/opencv/blob/394e640909d5d8edf9c1f578f8216d513373698c/samples/dnn/person_reid.py#L6-L11). Note: + - Model source: https://github.com/ReID-Team/ReID_extra_testdata ## Demo Run the following command to try the demo: + ```shell -python demo.py --input1 /path/to/person1 --input2 /path/to/person2 +python demo.py --query_dir /path/to/query --gallery_dir /path/to/gallery + +# get help regarding various parameters +python demo.py --help ``` -## License +### License All files in this directory are licensed under [Apache 2.0 License](./LICENSE). ## Reference: - OpenCV DNN Sample: https://github.com/opencv/opencv/blob/4.x/samples/dnn/person_reid.py -- Model source: https://github.com/ReID-Team/ReID_extra_testdata \ No newline at end of file +- Model source: https://github.com/ReID-Team/ReID_extra_testdata diff --git a/models/person_reid_youtureid/demo.py b/models/person_reid_youtureid/demo.py index adf6b299..a6835a97 100644 --- a/models/person_reid_youtureid/demo.py +++ b/models/person_reid_youtureid/demo.py @@ -30,7 +30,7 @@ def str2bool(v): help_msg_backends += "; {:d}: TIMVX" help_msg_targets += "; {:d}: NPU" except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.') + print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') parser = argparse.ArgumentParser( description="ReID baseline models from Tencent Youtu Lab") diff --git a/models/qrcode_wechatqrcode/README.md b/models/qrcode_wechatqrcode/README.md index 1568c4b9..e777001a 100644 --- a/models/qrcode_wechatqrcode/README.md +++ b/models/qrcode_wechatqrcode/README.md @@ -3,17 +3,22 @@ WeChatQRCode for detecting and parsing QR Code, contributed by [WeChat Computer Vision Team (WeChatCV)](https://github.com/WeChatCV). Visit [opencv/opencv_contrib/modules/wechat_qrcode](https://github.com/opencv/opencv_contrib/tree/master/modules/wechat_qrcode) for more details. Notes: + - Model source: [opencv/opencv_3rdparty:wechat_qrcode_20210119](https://github.com/opencv/opencv_3rdparty/tree/wechat_qrcode_20210119) - The APIs `cv::wechat_qrcode::WeChatQRCode` (C++) & `cv.wechat_qrcode_WeChatQRCode` (Python) are both designed to run on default backend (OpenCV) and target (CPU) only. Therefore, benchmark results of this model are only available on CPU devices, until the APIs are updated with setting backends and targets. ## Demo Run the following command to try the demo: + ```shell # detect on camera input python demo.py # detect on an image python demo.py --input /path/to/image + +# get help regarding various parameters +python demo.py --help ``` ### Example outputs @@ -27,4 +32,4 @@ All files in this directory are licensed under [Apache 2.0 License](./LICENSE). ## Reference: - https://github.com/opencv/opencv_contrib/tree/master/modules/wechat_qrcode -- https://github.com/opencv/opencv_3rdparty/tree/wechat_qrcode_20210119 \ No newline at end of file +- https://github.com/opencv/opencv_3rdparty/tree/wechat_qrcode_20210119 diff --git a/models/qrcode_wechatqrcode/demo.py b/models/qrcode_wechatqrcode/demo.py index 312ed300..47952d11 100644 --- a/models/qrcode_wechatqrcode/demo.py +++ b/models/qrcode_wechatqrcode/demo.py @@ -21,13 +21,13 @@ def str2bool(v): parser = argparse.ArgumentParser( description="WeChat QR code detector for detecting and parsing QR code (https://github.com/opencv/opencv_contrib/tree/master/modules/wechat_qrcode)") -parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.') -parser.add_argument('--detect_prototxt_path', type=str, default='detect_2021sep.prototxt', help='Path to detect.prototxt.') -parser.add_argument('--detect_model_path', type=str, default='detect_2021sep.caffemodel', help='Path to detect.caffemodel.') -parser.add_argument('--sr_prototxt_path', type=str, default='sr_2021sep.prototxt', help='Path to sr.prototxt.') -parser.add_argument('--sr_model_path', type=str, default='sr_2021sep.caffemodel', help='Path to sr.caffemodel.') -parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') +parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.') +parser.add_argument('--detect_prototxt_path', type=str, default='detect_2021sep.prototxt', help='Usage: Set path to detect.prototxt.') +parser.add_argument('--detect_model_path', type=str, default='detect_2021sep.caffemodel', help='Usage: Set path to detect.caffemodel.') +parser.add_argument('--sr_prototxt_path', type=str, default='sr_2021sep.prototxt', help='Usage: Set path to sr.prototxt.') +parser.add_argument('--sr_model_path', type=str, default='sr_2021sep.caffemodel', help='Usage: Set path to sr.caffemodel.') +parser.add_argument('--save', '-s', type=str2bool, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.') +parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.') args = parser.parse_args() def visualize(image, res, points, points_color=(0, 255, 0), text_color=(0, 255, 0), fps=None): diff --git a/models/text_detection_db/README.md b/models/text_detection_db/README.md index dc1eebee..ff63a081 100644 --- a/models/text_detection_db/README.md +++ b/models/text_detection_db/README.md @@ -3,6 +3,7 @@ Real-time Scene Text Detection with Differentiable Binarization Note: + - Models source: [here](https://drive.google.com/drive/folders/1qzNCHfUJOS0NEUOIKn69eCtxdlNPpWbq). - `IC15` in the filename means the model is trained on [IC15 dataset](https://rrc.cvc.uab.es/?ch=4&com=introduction), which can detect English text instances only. - `TD500` in the filename means the model is trained on [TD500 dataset](http://www.iapr-tc11.org/mediawiki/index.php/MSRA_Text_Detection_500_Database_(MSRA-TD500)), which can detect both English & Chinese instances. @@ -11,12 +12,17 @@ Note: ## Demo Run the following command to try the demo: + ```shell # detect on camera input python demo.py # detect on an image python demo.py --input /path/to/image + +# get help regarding various parameters +python demo.py --help ``` + ### Example outputs ![mask](./examples/mask.jpg) @@ -31,4 +37,4 @@ All files in this directory are licensed under [Apache 2.0 License](./LICENSE). - https://arxiv.org/abs/1911.08947 - https://github.com/MhLiao/DB -- https://docs.opencv.org/master/d4/d43/tutorial_dnn_text_spotting.html \ No newline at end of file +- https://docs.opencv.org/master/d4/d43/tutorial_dnn_text_spotting.html diff --git a/models/text_detection_db/demo.py b/models/text_detection_db/demo.py index 2bd10dde..dbee8310 100644 --- a/models/text_detection_db/demo.py +++ b/models/text_detection_db/demo.py @@ -29,23 +29,23 @@ def str2bool(v): help_msg_backends += "; {:d}: TIMVX" help_msg_targets += "; {:d}: NPU" except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.') + print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).') -parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.') -parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx', help='Path to the model.') +parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.') +parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx', help='Usage: Set model path, defaults to text_detection_DB_TD500_resnet18_2021sep.onnx.') parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) parser.add_argument('--width', type=int, default=736, - help='Preprocess input image by resizing to a specific width. It should be multiple by 32.') + help='Usage: Resize input image to certain width, default = 736. It should be multiple by 32.') parser.add_argument('--height', type=int, default=736, - help='Preprocess input image by resizing to a specific height. It should be multiple by 32.') -parser.add_argument('--binary_threshold', type=float, default=0.3, help='Threshold of the binary map.') -parser.add_argument('--polygon_threshold', type=float, default=0.5, help='Threshold of polygons.') -parser.add_argument('--max_candidates', type=int, default=200, help='Max candidates of polygons.') -parser.add_argument('--unclip_ratio', type=np.float64, default=2.0, help=' The unclip ratio of the detected text region, which determines the output size.') -parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') + help='Usage: Resize input image to certain height, default = 736. It should be multiple by 32.') +parser.add_argument('--binary_threshold', type=float, default=0.3, help='Usage: Threshold of the binary map, default = 0.3.') +parser.add_argument('--polygon_threshold', type=float, default=0.5, help='Usage: Threshold of polygons, default = 0.5.') +parser.add_argument('--max_candidates', type=int, default=200, help='Usage: Set maximum number of polygon candidates, default = 200.') +parser.add_argument('--unclip_ratio', type=np.float64, default=2.0, help=' Usage: The unclip ratio of the detected text region, which determines the output size, default = 2.0.') +parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.') +parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.') args = parser.parse_args() def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isClosed=True, thickness=2, fps=None): diff --git a/models/text_recognition_crnn/README.md b/models/text_recognition_crnn/README.md index dde3a010..84d768fc 100644 --- a/models/text_recognition_crnn/README.md +++ b/models/text_recognition_crnn/README.md @@ -5,7 +5,7 @@ An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Results of accuracy evaluation with [tools/eval](../../tools/eval) at different text recognition datasets. | Model name | ICDAR03(%) | IIIT5k(%) | CUTE80(%) | -|--------------|------------|-----------|-----------| +| ------------ | ---------- | --------- | --------- | | CRNN_EN | 81.66 | 74.33 | 52.78 | | CRNN_EN_FP16 | 82.01 | 74.93 | 52.34 | | CRNN_EN_INT8 | 81.75 | 75.33 | 52.43 | @@ -16,10 +16,11 @@ Results of accuracy evaluation with [tools/eval](../../tools/eval) at different \*: 'FP16' or 'INT8' stands for 'model quantized into FP16' or 'model quantized into int8' Note: + - Model source: - - `text_recognition_CRNN_EN_2021sep.onnx`: https://docs.opencv.org/4.5.2/d9/d1e/tutorial_dnn_OCR.html (CRNN_VGG_BiLSTM_CTC.onnx) - - `text_recognition_CRNN_CH_2021sep.onnx`: https://docs.opencv.org/4.x/d4/d43/tutorial_dnn_text_spotting.html (crnn_cs.onnx) - - `text_recognition_CRNN_CN_2021nov.onnx`: https://docs.opencv.org/4.5.2/d4/d43/tutorial_dnn_text_spotting.html (crnn_cs_CN.onnx) + - `text_recognition_CRNN_EN_2021sep.onnx`: https://docs.opencv.org/4.5.2/d9/d1e/tutorial_dnn_OCR.html (CRNN_VGG_BiLSTM_CTC.onnx) + - `text_recognition_CRNN_CH_2021sep.onnx`: https://docs.opencv.org/4.x/d4/d43/tutorial_dnn_text_spotting.html (crnn_cs.onnx) + - `text_recognition_CRNN_CN_2021nov.onnx`: https://docs.opencv.org/4.5.2/d4/d43/tutorial_dnn_text_spotting.html (crnn_cs_CN.onnx) - `text_recognition_CRNN_EN_2021sep.onnx` can detect digits (0\~9) and letters (return lowercase letters a\~z) (view `charset_36_EN.txt` for details). - `text_recognition_CRNN_CH_2021sep.onnx` can detect digits (0\~9), upper/lower-case letters (a\~z and A\~Z), and some special characters (view `charset_94_CH.txt` for details). - `text_recognition_CRNN_CN_2021nov.onnx` can detect digits (0\~9), upper/lower-case letters (a\~z and A\~Z), some Chinese characters and some special characters (view `charset_3944_CN.txt` for details). @@ -28,26 +29,35 @@ Note: ## Demo ***NOTE***: + - This demo uses [text_detection_db](../text_detection_db) as text detector. - Selected model must match with the charset: - - Try `text_recognition_CRNN_EN_2021sep.onnx` with `charset_36_EN.txt`. - - Try `text_recognition_CRNN_CH_2021sep.onnx` with `charset_94_CH.txt` - - Try `text_recognition_CRNN_CN_2021sep.onnx` with `charset_3944_CN.txt`. + - Try `text_recognition_CRNN_EN_2021sep.onnx` with `charset_36_EN.txt`. + - Try `text_recognition_CRNN_CH_2021sep.onnx` with `charset_94_CH.txt` + - Try `text_recognition_CRNN_CN_2021sep.onnx` with `charset_3944_CN.txt`. Run the demo detecting English: + ```shell # detect on camera input python demo.py # detect on an image python demo.py --input /path/to/image + +# get help regarding various parameters +python demo.py --help ``` Run the demo detecting Chinese: + ```shell # detect on camera input python demo.py --model text_recognition_CRNN_CN_2021nov.onnx --charset charset_3944_CN.txt # detect on an image python demo.py --input /path/to/image --model text_recognition_CRNN_CN_2021nov.onnx --charset charset_3944_CN.txt + +# get help regarding various parameters +python demo.py --help ``` ### Examples diff --git a/models/text_recognition_crnn/demo.py b/models/text_recognition_crnn/demo.py index 7d7336db..1961f1f0 100644 --- a/models/text_recognition_crnn/demo.py +++ b/models/text_recognition_crnn/demo.py @@ -33,17 +33,17 @@ def str2bool(v): help_msg_backends += "; {:d}: TIMVX" help_msg_targets += "; {:d}: NPU" except: - print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.') + print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') parser = argparse.ArgumentParser( description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)") -parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.') -parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx', help='Path to the model.') +parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.') +parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx', help='Usage: Set model path, defaults to text_recognition_CRNN_EN_2021sep.onnx.') parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) -parser.add_argument('--charset', '-c', type=str, default='charset_36_EN.txt', help='Path to the charset file corresponding to the selected model.') -parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.') -parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') +parser.add_argument('--charset', '-c', type=str, default='charset_36_EN.txt', help='Usage: Set the path to the charset file corresponding to the selected model.') +parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save a file with results. Invalid in case of camera input. Default will be set to “False”.') +parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.') parser.add_argument('--width', type=int, default=736, help='Preprocess input image by resizing to a specific width. It should be multiple by 32.') parser.add_argument('--height', type=int, default=736,