diff --git a/.gitignore b/.gitignore index 02074a3..a85b2ba 100644 --- a/.gitignore +++ b/.gitignore @@ -112,7 +112,9 @@ __pycache__/ *.json *.zip -*/tools/demos/* -*/output/* -*/data/pretrained_weights/* -*/data/tfrecord/* +tools/demos/* +tools/test_dota/* +tools/test_icdar2015/* +output/summary/* +data/pretrained_weights/* +data/tfrecord/* diff --git a/README.md b/README.md index 0e71f15..0bb2a46 100644 --- a/README.md +++ b/README.md @@ -6,16 +6,18 @@ This is a tensorflow re-implementation of [Focal Loss for Dense Object Detection ![1](voc_2007.gif) ### Performance -| Model | Backbone | Training data | Val data | mAP | Train Schedule | GPU | Image/GPU | Configuration File | -|:------------:|:------------:|:------------:|:---------:|:-----------:|:----------:|:----------:|:-----------:|:-----------:| -| [Faster-RCNN](https://github.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow) | ResNet50_v1 600 | VOC07 trainval | VOC07 test | 73.09 | - | 1X GTX 1080Ti | 1 | - | -| [FPN](https://github.com/DetectionTeamUCAS/FPN_Tensorflow) | ResNet50_v1 600 | VOC07 trainval | VOC07 test | 74.26 | - | 1X GTX 1080Ti | 1 | - | -| RetinaNet | ResNet50_v1 600 | VOC07 trainval | VOC07 test | 73.16 | - | 8X GeForce RTX 2080 Ti | 1 | cfgs_res50_voc07_v3.py | -| RetinaNet | ResNet50_v1d 600 | VOC07 trainval | VOC07 test | 73.26 | - | 8X GeForce RTX 2080 Ti | 1 | cfgs_res50_voc07_v4.py | -| RetinaNet | ResNet50_v1d 600 | VOC07+12 trainval | VOC07 test | 79.66 | - | 8X GeForce RTX 2080 Ti | 1 | cfgs_res50_voc0712_v1.py | -| RetinaNet | ResNet101_v1d 600 | VOC07+12 trainval | VOC07 test | 81.69 | - | 8X GeForce RTX 2080 Ti | 1 | cfgs_res50_voc0712_v4.py | -| RetinaNet | ResNet101_v1d 800 | VOC07+12 trainval | VOC07 test | 80.69 | - | 8X GeForce RTX 2080 Ti | 1 | cfgs_res50_voc0712_v3.py | -| RetinaNet | ResNet50_v1 600 | COCO train2017 | COCO val2017 (coco minival) | 33.4 | 1x | 8X GeForce RTX 2080 Ti | 1 | cfgs_res50_coco_1x_v4.py | +| Model | Backbone | Training data | Val data | mAP | Inf time (fps) | Model Link | Train Schedule | GPU | Image/GPU | Configuration File | +|:------------:|:------------:|:------------:|:---------:|:-----------:|:----------:|:----------:|:----------:|:----------:|:-----------:|:-----------:| +| [Faster-RCNN](https://github.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow) | ResNet50_v1 600 | VOC07 trainval | VOC07 test | 73.09 | - | - | - | 1X GTX 1080Ti | 1 | - | +| [FPN](https://github.com/DetectionTeamUCAS/FPN_Tensorflow) | ResNet50_v1 600 | VOC07 trainval | VOC07 test | 74.26 | - | - | - | 1X GTX 1080Ti | 1 | - | +| RetinaNet | ResNet50_v1 600 | VOC07 trainval | VOC07 test | 73.16 | 14.6 | - | - | 8X GeForce RTX 2080 Ti | 1 | cfgs_res50_voc07_v3.py | +| RetinaNet | ResNet50_v1d 600 | VOC07 trainval | VOC07 test | 73.26 | 14.6 | - | - | 8X GeForce RTX 2080 Ti | 1 | cfgs_res50_voc07_v4.py | +| RetinaNet | ResNet50_v1d 600 | VOC07 trainval | VOC07 test | 74.00 | 14.6 | [model](https://drive.google.com/file/d/1qjYsAi5uHB-6KgnrgWTN42a7Njkah-rA/view?usp=sharing) | - | 4X GeForce RTX 2080 Ti | 2 | cfgs_res50_voc07_v5.py | +| RetinaNet | ResNet50_v1d 600 | VOC07+12 trainval | VOC07 test | 79.66 | 14.6 | - | - | 8X GeForce RTX 2080 Ti | 1 | cfgs_res50_voc0712_v1.py | +| RetinaNet | ResNet101_v1d 600 | VOC07+12 trainval | VOC07 test | 81.69 | 14.6 | - | - | 8X GeForce RTX 2080 Ti | 1 | cfgs_res50_voc0712_v4.py | +| RetinaNet | ResNet101_v1d 800 | VOC07+12 trainval | VOC07 test | 80.69 | 14.6 | - | - | 8X GeForce RTX 2080 Ti | 1 | cfgs_res50_voc0712_v3.py | +| RetinaNet | ResNet50_v1 600 | COCO train2017 | COCO val2017 (coco minival) | 33.4 | - | - | 1x | 8X GeForce RTX 2080 Ti | 1 | cfgs_res50_coco_1x_v4.py | +| RetinaNet | ResNet50_v1 600 | COCO train2017 | COCO val2017 (coco minival) | | - | - | 1x | 4X GeForce RTX 2080 Ti | 2 | cfgs_res50_coco_1x_v5.py | ## My Development Environment 1、python3.5 (anaconda recommend) @@ -27,10 +29,9 @@ This is a tensorflow re-implementation of [Focal Loss for Dense Object Detection ## Download Model ### Pretrain weights 1、Please download [resnet50_v1](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz), [resnet101_v1](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz) pre-trained models on Imagenet, put it to data/pretrained_weights. -2、Or you can choose to use a better backbone, refer to [gluon2TF](https://github.com/yangJirui/gluon2TF). [Pretrain Model Link](https://pan.baidu.com/s/1GpqKg0dOaaWmwshvv1qWGg), password: 5ht9. - -### Trained weights -**Select a configuration file in the folder ($PATH_ROOT/libs/configs/) and copy its contents into cfgs.py, then download the corresponding [weights](https://github.com/DetectionTeamUCAS/Models/tree/master/RetinaNet_Tensorflow).** +2、**(Recommend in this repo)** Or you can choose to use a better backbone, refer to [gluon2TF](https://github.com/yangJirui/gluon2TF). +* [Baidu Drive](https://pan.baidu.com/s/1GpqKg0dOaaWmwshvv1qWGg), password: 5ht9. +* [Google Drive](https://drive.google.com/drive/folders/1BM8ffn1WnsRRb5RcuAcyJAHX8NS2M1Gz?usp=sharing) ## Compile ``` @@ -44,7 +45,7 @@ python setup.py build_ext --inplace ``` (1) Modify parameters (such as CLASS_NUM, DATASET_NAME, VERSION, etc.) in $PATH_ROOT/libs/configs/cfgs.py (2) Add category information in $PATH_ROOT/libs/label_name_dict/lable_dict.py -(3) Add data_name to line 76 of $PATH_ROOT/data/io/read_tfrecord.py +(3) Add data_name to $PATH_ROOT/data/io/read_tfrecord.py ``` 2、make tfrecord @@ -58,15 +59,16 @@ python convert_data_to_tfrecord_coco.py --VOC_dir='/PATH/TO/JSON/FILE/' 3、multi-gpu train ``` cd $PATH_ROOT/tools -python multi_gpu_train.py +python multi_gpu_train.py (multi_gpu_train_batch.py) ``` ## Eval +### COCO ``` cd $PATH_ROOT/tools python eval_coco.py --eval_data='/PATH/TO/IMAGES/' --eval_gt='/PATH/TO/TEST/ANNOTATION/' - --GPU='0' + --gpu='0' ``` @@ -74,9 +76,18 @@ python eval_coco.py --eval_data='/PATH/TO/IMAGES/' cd $PATH_ROOT/tools python eval_coco_multiprocessing.py --eval_data='/PATH/TO/IMAGES/' --eval_gt='/PATH/TO/TEST/ANNOTATION/' - --gpu_ids='0,1,2,3,4,5,6,7' + --gpus='0,1,2,3,4,5,6,7' ``` +### PASCAL VOC +``` +cd $PATH_ROOT/tools +python eval.py --eval_dir='/PATH/TO/IMAGES/' + --annotation_dir='/PATH/TO/TEST/ANNOTATION/' + --gpu='0' + +``` + ## Tensorboard ``` cd $PATH_ROOT/output/summary diff --git a/data/io/convert_data_to_tfrecord.py b/data/io/convert_data_to_tfrecord.py index afdf6f7..76bf5d7 100644 --- a/data/io/convert_data_to_tfrecord.py +++ b/data/io/convert_data_to_tfrecord.py @@ -10,7 +10,7 @@ from libs.label_name_dict.label_dict import * from help_utils.tools import * -tf.app.flags.DEFINE_string('VOC_dir', '/data/code/VOC2007/VOCdevkit/VOC2007/', 'Voc dir') +tf.app.flags.DEFINE_string('VOC_dir', '/data/yangxue/dataset/VOC2007/VOCdevkit/VOC2007', 'Voc dir') tf.app.flags.DEFINE_string('xml_dir', 'Annotations', 'xml dir') tf.app.flags.DEFINE_string('image_dir', 'JPEGImages', 'image dir') tf.app.flags.DEFINE_string('save_name', 'train', 'save name') @@ -71,9 +71,9 @@ def read_xml_gtbox_and_label(xml_path): def convert_pascal_to_tfrecord(): - xml_path = FLAGS.VOC_dir + FLAGS.xml_dir - image_path = FLAGS.VOC_dir + FLAGS.image_dir - save_path = FLAGS.save_dir + FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord' + xml_path = os.path.join(FLAGS.VOC_dir, FLAGS.xml_dir) + image_path = os.path.join(FLAGS.VOC_dir, FLAGS.image_dir) + save_path = os.path.join(FLAGS.save_dir, FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord') mkdir(FLAGS.save_dir) # writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB) diff --git a/data/io/convert_data_to_tfrecord_voc2012.py b/data/io/convert_data_to_tfrecord_voc2012.py index e8bf49e..c52049c 100644 --- a/data/io/convert_data_to_tfrecord_voc2012.py +++ b/data/io/convert_data_to_tfrecord_voc2012.py @@ -80,9 +80,9 @@ def read_xml_gtbox_and_label(xml_path): def convert_pascal_to_tfrecord(): - xml_path = FLAGS.VOC_dir + FLAGS.xml_dir - image_path = FLAGS.VOC_dir + FLAGS.image_dir - save_path = FLAGS.save_dir + FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord' + xml_path = os.path.join(FLAGS.VOC_dir, FLAGS.xml_dir) + image_path = os.path.join(FLAGS.VOC_dir, FLAGS.image_dir) + save_path = os.path.join(FLAGS.save_dir, FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord') mkdir(FLAGS.save_dir) # writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB) diff --git a/libs/configs/COCO/__init__.py b/libs/configs/COCO/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/libs/configs/cfgs_res50_coco_1x_v1.py b/libs/configs/COCO/cfgs_res50_coco_1x_v1.py similarity index 96% rename from libs/configs/cfgs_res50_coco_1x_v1.py rename to libs/configs/COCO/cfgs_res50_coco_1x_v1.py index 61d6fd5..a6776a6 100644 --- a/libs/configs/cfgs_res50_coco_1x_v1.py +++ b/libs/configs/COCO/cfgs_res50_coco_1x_v1.py @@ -1,104 +1,103 @@ -# -*- coding: utf-8 -*- -from __future__ import division, print_function, absolute_import -import os -import tensorflow as tf -import math - -""" -epoch-00: 00.0 epoch-01: 7.40 -epoch-02: 15.4 epoch-03: 18.8 -epoch-04: 20.7 epoch-05: 23.0 -epoch-06: 23.6 epoch-07: 25.3 -epoch-08: 24.7 epoch-09: 26.7 -epoch-11: 26.2 epoch-12: 30.7 -epoch-13: 30.8 epoch-14: 31.1 -epoch-15: 31.2 epoch-16: 31.4 -epoch-19: 31.5 - -""" - -# ------------------------------------------------ -VERSION = 'RetinaNet_COCO_1x_20190522' -NET_NAME = 'resnet_v1_50' # 'MobilenetV2' -ADD_BOX_IN_TENSORBOARD = True - -# ---------------------------------------- System_config -ROOT_PATH = os.path.abspath('../') -print(20*"++--") -print(ROOT_PATH) -GPU_GROUP = "0,1,2,3,4,5,6,7" -NUM_GPU = len(GPU_GROUP.strip().split(',')) -SHOW_TRAIN_INFO_INTE = 20 -SMRY_ITER = 200 -SAVE_WEIGHTS_INTE = 20000 * 5 - -SUMMARY_PATH = ROOT_PATH + '/output/summary' -TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' - -if NET_NAME.startswith("resnet"): - weights_name = NET_NAME -elif NET_NAME.startswith("MobilenetV2"): - weights_name = "mobilenet/mobilenet_v2_1.0_224" -else: - raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') - -PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' -TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') -EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' - -# ------------------------------------------ Train config -RESTORE_FROM_RPN = False -FIXED_BLOCKS = 1 # allow 0~3 -FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone -USE_07_METRIC = True - -MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy -GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip - -BATCH_SIZE = 1 -EPSILON = 1e-5 -MOMENTUM = 0.9 -LR = 5e-4 * NUM_GPU * BATCH_SIZE -DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] -MAX_ITERATION = SAVE_WEIGHTS_INTE*20 -WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) - -# -------------------------------------------- Data_preprocess_config -DATASET_NAME = 'coco' # 'pascal', 'coco' -PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -PIXEL_MEAN_ = [0.485, 0.456, 0.406] -PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -IMG_SHORT_SIDE_LEN = 600 -IMG_MAX_LENGTH = 1000 -CLASS_NUM = 80 - -# --------------------------------------------- Network_config -BATCH_SIZE = 1 -SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) -SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) -PROBABILITY = 0.01 -FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) -WEIGHT_DECAY = 1e-4 - -# ---------------------------------------------Anchor config -LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] -BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] -ANCHOR_STRIDE = [8, 16, 32, 64, 128] -ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] -ANCHOR_RATIOS = [0.5, 1.0, 2.0] -ANCHOR_SCALE_FACTORS = None -USE_CENTER_OFFSET = True - -# --------------------------------------------RPN config -SHARE_NET = True -USE_P5 = True -IOU_POSITIVE_THRESHOLD = 0.5 -IOU_NEGATIVE_THRESHOLD = 0.4 - -NMS = True -NMS_IOU_THRESHOLD = 0.5 -MAXIMUM_DETECTIONS = 100 -FILTERED_SCORE = 0.05 -VIS_SCORE = 0.5 - - +# -*- coding: utf-8 -*- +from __future__ import division, print_function, absolute_import +import os +import tensorflow as tf +import math + +""" +epoch-00: 00.0 epoch-01: 7.40 +epoch-02: 15.4 epoch-03: 18.8 +epoch-04: 20.7 epoch-05: 23.0 +epoch-06: 23.6 epoch-07: 25.3 +epoch-08: 24.7 epoch-09: 26.7 +epoch-11: 26.2 epoch-12: 30.7 +epoch-13: 30.8 epoch-14: 31.1 +epoch-15: 31.2 epoch-16: 31.4 +epoch-19: 31.5 + +""" + +# ------------------------------------------------ +VERSION = 'RetinaNet_COCO_1x_20190522' +NET_NAME = 'resnet_v1_50' # 'MobilenetV2' +ADD_BOX_IN_TENSORBOARD = True + +# ---------------------------------------- System_config +ROOT_PATH = os.path.abspath('../') +print(20*"++--") +print(ROOT_PATH) +GPU_GROUP = "0,1,2,3,4,5,6,7" +NUM_GPU = len(GPU_GROUP.strip().split(',')) +SHOW_TRAIN_INFO_INTE = 20 +SMRY_ITER = 200 +SAVE_WEIGHTS_INTE = 20000 * 5 + +SUMMARY_PATH = ROOT_PATH + '/output/summary' +TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' + +if NET_NAME.startswith("resnet"): + weights_name = NET_NAME +elif NET_NAME.startswith("MobilenetV2"): + weights_name = "mobilenet/mobilenet_v2_1.0_224" +else: + raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') + +PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' +TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') +EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' + +# ------------------------------------------ Train config +RESTORE_FROM_RPN = False +FIXED_BLOCKS = 1 # allow 0~3 +FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone +USE_07_METRIC = True + +MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy +GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip + +BATCH_SIZE = 1 +EPSILON = 1e-5 +MOMENTUM = 0.9 +LR = 5e-4 * NUM_GPU * BATCH_SIZE +DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] +MAX_ITERATION = SAVE_WEIGHTS_INTE*20 +WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) + +# -------------------------------------------- Data_preprocess_config +DATASET_NAME = 'coco' # 'pascal', 'coco' +PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +PIXEL_MEAN_ = [0.485, 0.456, 0.406] +PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +IMG_SHORT_SIDE_LEN = 600 +IMG_MAX_LENGTH = 1000 +CLASS_NUM = 80 + +# --------------------------------------------- Network_config +SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) +SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) +PROBABILITY = 0.01 +FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) +WEIGHT_DECAY = 1e-4 + +# ---------------------------------------------Anchor config +LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] +BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] +ANCHOR_STRIDE = [8, 16, 32, 64, 128] +ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] +ANCHOR_RATIOS = [0.5, 1.0, 2.0] +ANCHOR_SCALE_FACTORS = None +USE_CENTER_OFFSET = True + +# --------------------------------------------RPN config +SHARE_NET = True +USE_P5 = True +IOU_POSITIVE_THRESHOLD = 0.5 +IOU_NEGATIVE_THRESHOLD = 0.4 + +NMS = True +NMS_IOU_THRESHOLD = 0.5 +MAXIMUM_DETECTIONS = 100 +FILTERED_SCORE = 0.05 +VIS_SCORE = 0.5 + + diff --git a/libs/configs/cfgs_res50_coco_1x_v2.py b/libs/configs/COCO/cfgs_res50_coco_1x_v2.py similarity index 96% rename from libs/configs/cfgs_res50_coco_1x_v2.py rename to libs/configs/COCO/cfgs_res50_coco_1x_v2.py index 828e64a..07dcb83 100644 --- a/libs/configs/cfgs_res50_coco_1x_v2.py +++ b/libs/configs/COCO/cfgs_res50_coco_1x_v2.py @@ -1,101 +1,100 @@ -# -*- coding: utf-8 -*- -from __future__ import division, print_function, absolute_import -import os -import tensorflow as tf -import math - -""" -epoch-00: 00.0 epoch-01: 4.00 -epoch-02: 09.6 epoch-03: 14.6 -epoch-04: 19.6 epoch-05: 20.7 -epoch-06: 21.2 epoch-07: 23.4 -epoch-08: 25.4 epoch-14: 32.1 -epoch-15: 32.2 epoch-16: 32.7 - -""" - -# ------------------------------------------------ -VERSION = 'RetinaNet_COCO_1x_20190523' -NET_NAME = 'resnet50_v1d' # 'MobilenetV2' -ADD_BOX_IN_TENSORBOARD = True - -# ---------------------------------------- System_config -ROOT_PATH = os.path.abspath('../') -print(20*"++--") -print(ROOT_PATH) -GPU_GROUP = "0,1,2,3,4,5,6,7" -NUM_GPU = len(GPU_GROUP.strip().split(',')) -SHOW_TRAIN_INFO_INTE = 20 -SMRY_ITER = 200 -SAVE_WEIGHTS_INTE = 80000 - -SUMMARY_PATH = ROOT_PATH + '/output/summary' -TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' - -if NET_NAME.startswith("resnet"): - weights_name = NET_NAME -elif NET_NAME.startswith("MobilenetV2"): - weights_name = "mobilenet/mobilenet_v2_1.0_224" -else: - raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') - -PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' -TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') -EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' - -# ------------------------------------------ Train config -RESTORE_FROM_RPN = False -FIXED_BLOCKS = 1 # allow 0~3 -FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone -USE_07_METRIC = True - -MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy -GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip - -BATCH_SIZE = 1 -EPSILON = 1e-5 -MOMENTUM = 0.9 -LR = 5e-4 * NUM_GPU * BATCH_SIZE -DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] -MAX_ITERATION = SAVE_WEIGHTS_INTE*20 -WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) - -# -------------------------------------------- Data_preprocess_config -DATASET_NAME = 'coco' # 'pascal', 'coco' -PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -PIXEL_MEAN_ = [0.485, 0.456, 0.406] -PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -IMG_SHORT_SIDE_LEN = 600 -IMG_MAX_LENGTH = 1000 -CLASS_NUM = 80 - -# --------------------------------------------- Network_config -BATCH_SIZE = 1 -SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) -SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) -PROBABILITY = 0.01 -FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) -WEIGHT_DECAY = 1e-4 - -# ---------------------------------------------Anchor config -LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] -BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] -ANCHOR_STRIDE = [8, 16, 32, 64, 128] -ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] -ANCHOR_RATIOS = [0.5, 1.0, 2.0] -ANCHOR_SCALE_FACTORS = None -USE_CENTER_OFFSET = True - -# --------------------------------------------RPN config -SHARE_NET = True -USE_P5 = True -IOU_POSITIVE_THRESHOLD = 0.5 -IOU_NEGATIVE_THRESHOLD = 0.4 - -NMS = True -NMS_IOU_THRESHOLD = 0.5 -MAXIMUM_DETECTIONS = 100 -FILTERED_SCORE = 0.05 -VIS_SCORE = 0.5 - - +# -*- coding: utf-8 -*- +from __future__ import division, print_function, absolute_import +import os +import tensorflow as tf +import math + +""" +epoch-00: 00.0 epoch-01: 4.00 +epoch-02: 09.6 epoch-03: 14.6 +epoch-04: 19.6 epoch-05: 20.7 +epoch-06: 21.2 epoch-07: 23.4 +epoch-08: 25.4 epoch-14: 32.1 +epoch-15: 32.2 epoch-16: 32.7 + +""" + +# ------------------------------------------------ +VERSION = 'RetinaNet_COCO_1x_20190523' +NET_NAME = 'resnet50_v1d' # 'MobilenetV2' +ADD_BOX_IN_TENSORBOARD = True + +# ---------------------------------------- System_config +ROOT_PATH = os.path.abspath('../') +print(20*"++--") +print(ROOT_PATH) +GPU_GROUP = "0,1,2,3,4,5,6,7" +NUM_GPU = len(GPU_GROUP.strip().split(',')) +SHOW_TRAIN_INFO_INTE = 20 +SMRY_ITER = 200 +SAVE_WEIGHTS_INTE = 80000 + +SUMMARY_PATH = ROOT_PATH + '/output/summary' +TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' + +if NET_NAME.startswith("resnet"): + weights_name = NET_NAME +elif NET_NAME.startswith("MobilenetV2"): + weights_name = "mobilenet/mobilenet_v2_1.0_224" +else: + raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') + +PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' +TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') +EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' + +# ------------------------------------------ Train config +RESTORE_FROM_RPN = False +FIXED_BLOCKS = 1 # allow 0~3 +FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone +USE_07_METRIC = True + +MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy +GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip + +BATCH_SIZE = 1 +EPSILON = 1e-5 +MOMENTUM = 0.9 +LR = 5e-4 * NUM_GPU * BATCH_SIZE +DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] +MAX_ITERATION = SAVE_WEIGHTS_INTE*20 +WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) + +# -------------------------------------------- Data_preprocess_config +DATASET_NAME = 'coco' # 'pascal', 'coco' +PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +PIXEL_MEAN_ = [0.485, 0.456, 0.406] +PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +IMG_SHORT_SIDE_LEN = 600 +IMG_MAX_LENGTH = 1000 +CLASS_NUM = 80 + +# --------------------------------------------- Network_config +SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) +SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) +PROBABILITY = 0.01 +FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) +WEIGHT_DECAY = 1e-4 + +# ---------------------------------------------Anchor config +LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] +BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] +ANCHOR_STRIDE = [8, 16, 32, 64, 128] +ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] +ANCHOR_RATIOS = [0.5, 1.0, 2.0] +ANCHOR_SCALE_FACTORS = None +USE_CENTER_OFFSET = True + +# --------------------------------------------RPN config +SHARE_NET = True +USE_P5 = True +IOU_POSITIVE_THRESHOLD = 0.5 +IOU_NEGATIVE_THRESHOLD = 0.4 + +NMS = True +NMS_IOU_THRESHOLD = 0.5 +MAXIMUM_DETECTIONS = 100 +FILTERED_SCORE = 0.05 +VIS_SCORE = 0.5 + + diff --git a/libs/configs/cfgs_res50_coco_1x_v3.py b/libs/configs/COCO/cfgs_res50_coco_1x_v3.py similarity index 96% rename from libs/configs/cfgs_res50_coco_1x_v3.py rename to libs/configs/COCO/cfgs_res50_coco_1x_v3.py index 0eb90a0..4c63afb 100644 --- a/libs/configs/cfgs_res50_coco_1x_v3.py +++ b/libs/configs/COCO/cfgs_res50_coco_1x_v3.py @@ -1,101 +1,100 @@ -# -*- coding: utf-8 -*- -from __future__ import division, print_function, absolute_import -import os -import tensorflow as tf -import math - -""" -epoch-00: 00.0 epoch-01: 2.60 -epoch-02: 06.4 epoch-03: 12.8 -epoch-04: 19.5 epoch-05: 20.3 -epoch-06: 22.7 epoch-07: 23.6 -epoch-14: 32.1 epoch-15: 32.3 -epoch-16: 32.7 epoch-17: 32.8 - -""" - -# ------------------------------------------------ -VERSION = 'RetinaNet_COCO_1x_20190524' -NET_NAME = 'resnet50_v1d' # 'MobilenetV2' -ADD_BOX_IN_TENSORBOARD = True - -# ---------------------------------------- System_config -ROOT_PATH = os.path.abspath('../') -print(20*"++--") -print(ROOT_PATH) -GPU_GROUP = "0,1,2,3,4,5,6,7" -NUM_GPU = len(GPU_GROUP.strip().split(',')) -SHOW_TRAIN_INFO_INTE = 20 -SMRY_ITER = 200 -SAVE_WEIGHTS_INTE = 80000 - -SUMMARY_PATH = ROOT_PATH + '/output/summary' -TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' - -if NET_NAME.startswith("resnet"): - weights_name = NET_NAME -elif NET_NAME.startswith("MobilenetV2"): - weights_name = "mobilenet/mobilenet_v2_1.0_224" -else: - raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') - -PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' -TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') -EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' - -# ------------------------------------------ Train config -RESTORE_FROM_RPN = False -FIXED_BLOCKS = 1 # allow 0~3 -FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone -USE_07_METRIC = True - -MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy -GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip - -BATCH_SIZE = 1 -EPSILON = 1e-5 -MOMENTUM = 0.9 -LR = 5e-4 * NUM_GPU * BATCH_SIZE -DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] -MAX_ITERATION = SAVE_WEIGHTS_INTE*20 -WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) - -# -------------------------------------------- Data_preprocess_config -DATASET_NAME = 'coco' # 'pascal', 'coco' -PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -PIXEL_MEAN_ = [0.485, 0.456, 0.406] -PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -IMG_SHORT_SIDE_LEN = 600 -IMG_MAX_LENGTH = 1000 -CLASS_NUM = 80 - -# --------------------------------------------- Network_config -BATCH_SIZE = 1 -SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) -SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) -PROBABILITY = 0.01 -FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) -WEIGHT_DECAY = 1e-4 - -# ---------------------------------------------Anchor config -LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] -BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] -ANCHOR_STRIDE = [8, 16, 32, 64, 128] -ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] -ANCHOR_RATIOS = [0.5, 1.0, 2.0] -ANCHOR_SCALE_FACTORS = [10.0, 10.0, 5.0, 5.0] -USE_CENTER_OFFSET = True - -# --------------------------------------------RPN config -SHARE_NET = True -USE_P5 = True -IOU_POSITIVE_THRESHOLD = 0.5 -IOU_NEGATIVE_THRESHOLD = 0.4 - -NMS = True -NMS_IOU_THRESHOLD = 0.5 -MAXIMUM_DETECTIONS = 100 -FILTERED_SCORE = 0.05 -VIS_SCORE = 0.5 - - +# -*- coding: utf-8 -*- +from __future__ import division, print_function, absolute_import +import os +import tensorflow as tf +import math + +""" +epoch-00: 00.0 epoch-01: 2.60 +epoch-02: 06.4 epoch-03: 12.8 +epoch-04: 19.5 epoch-05: 20.3 +epoch-06: 22.7 epoch-07: 23.6 +epoch-14: 32.1 epoch-15: 32.3 +epoch-16: 32.7 epoch-17: 32.8 + +""" + +# ------------------------------------------------ +VERSION = 'RetinaNet_COCO_1x_20190524' +NET_NAME = 'resnet50_v1d' # 'MobilenetV2' +ADD_BOX_IN_TENSORBOARD = True + +# ---------------------------------------- System_config +ROOT_PATH = os.path.abspath('../') +print(20*"++--") +print(ROOT_PATH) +GPU_GROUP = "0,1,2,3,4,5,6,7" +NUM_GPU = len(GPU_GROUP.strip().split(',')) +SHOW_TRAIN_INFO_INTE = 20 +SMRY_ITER = 200 +SAVE_WEIGHTS_INTE = 80000 + +SUMMARY_PATH = ROOT_PATH + '/output/summary' +TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' + +if NET_NAME.startswith("resnet"): + weights_name = NET_NAME +elif NET_NAME.startswith("MobilenetV2"): + weights_name = "mobilenet/mobilenet_v2_1.0_224" +else: + raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') + +PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' +TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') +EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' + +# ------------------------------------------ Train config +RESTORE_FROM_RPN = False +FIXED_BLOCKS = 1 # allow 0~3 +FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone +USE_07_METRIC = True + +MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy +GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip + +BATCH_SIZE = 1 +EPSILON = 1e-5 +MOMENTUM = 0.9 +LR = 5e-4 * NUM_GPU * BATCH_SIZE +DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] +MAX_ITERATION = SAVE_WEIGHTS_INTE*20 +WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) + +# -------------------------------------------- Data_preprocess_config +DATASET_NAME = 'coco' # 'pascal', 'coco' +PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +PIXEL_MEAN_ = [0.485, 0.456, 0.406] +PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +IMG_SHORT_SIDE_LEN = 600 +IMG_MAX_LENGTH = 1000 +CLASS_NUM = 80 + +# --------------------------------------------- Network_config +SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) +SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) +PROBABILITY = 0.01 +FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) +WEIGHT_DECAY = 1e-4 + +# ---------------------------------------------Anchor config +LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] +BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] +ANCHOR_STRIDE = [8, 16, 32, 64, 128] +ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] +ANCHOR_RATIOS = [0.5, 1.0, 2.0] +ANCHOR_SCALE_FACTORS = [10.0, 10.0, 5.0, 5.0] +USE_CENTER_OFFSET = True + +# --------------------------------------------RPN config +SHARE_NET = True +USE_P5 = True +IOU_POSITIVE_THRESHOLD = 0.5 +IOU_NEGATIVE_THRESHOLD = 0.4 + +NMS = True +NMS_IOU_THRESHOLD = 0.5 +MAXIMUM_DETECTIONS = 100 +FILTERED_SCORE = 0.05 +VIS_SCORE = 0.5 + + diff --git a/libs/configs/cfgs_res50_coco_1x_v4.py b/libs/configs/COCO/cfgs_res50_coco_1x_v4.py similarity index 96% rename from libs/configs/cfgs_res50_coco_1x_v4.py rename to libs/configs/COCO/cfgs_res50_coco_1x_v4.py index 41b44f1..337cfd9 100644 --- a/libs/configs/cfgs_res50_coco_1x_v4.py +++ b/libs/configs/COCO/cfgs_res50_coco_1x_v4.py @@ -1,101 +1,100 @@ -# -*- coding: utf-8 -*- -from __future__ import division, print_function, absolute_import -import os -import tensorflow as tf -import math - -""" -epoch-00: 3.90 epoch-01: 12.7 -epoch-02: 17.1 epoch-03: 21.6 -epoch-04: 24.1 epoch-05: 24.2 -epoch-06: 25.4 epoch-07: 26.3 -epoch-11: 27.5 epoch-12: 32.2 -epoch-17: 33.4 epoch-18: 33.4 - -""" - -# ------------------------------------------------ -VERSION = 'RetinaNet_COCO_1x_20190525' -NET_NAME = 'resnet50_v1d' # 'MobilenetV2' -ADD_BOX_IN_TENSORBOARD = True - -# ---------------------------------------- System_config -ROOT_PATH = os.path.abspath('../') -print(20*"++--") -print(ROOT_PATH) -GPU_GROUP = "0,1,2,3,4,5,6,7" -NUM_GPU = len(GPU_GROUP.strip().split(',')) -SHOW_TRAIN_INFO_INTE = 20 -SMRY_ITER = 200 -SAVE_WEIGHTS_INTE = 80000 - -SUMMARY_PATH = ROOT_PATH + '/output/summary' -TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' - -if NET_NAME.startswith("resnet"): - weights_name = NET_NAME -elif NET_NAME.startswith("MobilenetV2"): - weights_name = "mobilenet/mobilenet_v2_1.0_224" -else: - raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') - -PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' -TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') -EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' - -# ------------------------------------------ Train config -RESTORE_FROM_RPN = False -FIXED_BLOCKS = 1 # allow 0~3 -FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone -USE_07_METRIC = True - -MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy -GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip - -BATCH_SIZE = 1 -EPSILON = 1e-5 -MOMENTUM = 0.9 -LR = 5e-4 * NUM_GPU * BATCH_SIZE -DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] -MAX_ITERATION = SAVE_WEIGHTS_INTE*20 -WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) - -# -------------------------------------------- Data_preprocess_config -DATASET_NAME = 'coco' # 'pascal', 'coco' -PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -PIXEL_MEAN_ = [0.485, 0.456, 0.406] -PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -IMG_SHORT_SIDE_LEN = 600 -IMG_MAX_LENGTH = 1000 -CLASS_NUM = 80 - -# --------------------------------------------- Network_config -BATCH_SIZE = 1 -SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) -SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) -PROBABILITY = 0.01 -FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) -WEIGHT_DECAY = 1e-4 - -# ---------------------------------------------Anchor config -LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] -BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] -ANCHOR_STRIDE = [8, 16, 32, 64, 128] -ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] -ANCHOR_RATIOS = [0.5, 1.0, 2.0] -ANCHOR_SCALE_FACTORS = None -USE_CENTER_OFFSET = True - -# --------------------------------------------RPN config -SHARE_NET = True -USE_P5 = True -IOU_POSITIVE_THRESHOLD = 0.5 -IOU_NEGATIVE_THRESHOLD = 0.4 - -NMS = True -NMS_IOU_THRESHOLD = 0.5 -MAXIMUM_DETECTIONS = 100 -FILTERED_SCORE = 0.05 -VIS_SCORE = 0.5 - - +# -*- coding: utf-8 -*- +from __future__ import division, print_function, absolute_import +import os +import tensorflow as tf +import math + +""" +epoch-00: 3.90 epoch-01: 12.7 +epoch-02: 17.1 epoch-03: 21.6 +epoch-04: 24.1 epoch-05: 24.2 +epoch-06: 25.4 epoch-07: 26.3 +epoch-11: 27.5 epoch-12: 32.2 +epoch-17: 33.4 epoch-18: 33.4 + +""" + +# ------------------------------------------------ +VERSION = 'RetinaNet_COCO_1x_20190525' +NET_NAME = 'resnet50_v1d' # 'MobilenetV2' +ADD_BOX_IN_TENSORBOARD = True + +# ---------------------------------------- System_config +ROOT_PATH = os.path.abspath('../') +print(20*"++--") +print(ROOT_PATH) +GPU_GROUP = "0,1,2,3,4,5,6,7" +NUM_GPU = len(GPU_GROUP.strip().split(',')) +SHOW_TRAIN_INFO_INTE = 20 +SMRY_ITER = 200 +SAVE_WEIGHTS_INTE = 80000 + +SUMMARY_PATH = ROOT_PATH + '/output/summary' +TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' + +if NET_NAME.startswith("resnet"): + weights_name = NET_NAME +elif NET_NAME.startswith("MobilenetV2"): + weights_name = "mobilenet/mobilenet_v2_1.0_224" +else: + raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') + +PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' +TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') +EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' + +# ------------------------------------------ Train config +RESTORE_FROM_RPN = False +FIXED_BLOCKS = 1 # allow 0~3 +FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone +USE_07_METRIC = True + +MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy +GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip + +BATCH_SIZE = 1 +EPSILON = 1e-5 +MOMENTUM = 0.9 +LR = 5e-4 * NUM_GPU * BATCH_SIZE +DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] +MAX_ITERATION = SAVE_WEIGHTS_INTE*20 +WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) + +# -------------------------------------------- Data_preprocess_config +DATASET_NAME = 'coco' # 'pascal', 'coco' +PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +PIXEL_MEAN_ = [0.485, 0.456, 0.406] +PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +IMG_SHORT_SIDE_LEN = 600 +IMG_MAX_LENGTH = 1000 +CLASS_NUM = 80 + +# --------------------------------------------- Network_config +SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) +SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) +PROBABILITY = 0.01 +FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) +WEIGHT_DECAY = 1e-4 + +# ---------------------------------------------Anchor config +LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] +BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] +ANCHOR_STRIDE = [8, 16, 32, 64, 128] +ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] +ANCHOR_RATIOS = [0.5, 1.0, 2.0] +ANCHOR_SCALE_FACTORS = None +USE_CENTER_OFFSET = True + +# --------------------------------------------RPN config +SHARE_NET = True +USE_P5 = True +IOU_POSITIVE_THRESHOLD = 0.5 +IOU_NEGATIVE_THRESHOLD = 0.4 + +NMS = True +NMS_IOU_THRESHOLD = 0.5 +MAXIMUM_DETECTIONS = 100 +FILTERED_SCORE = 0.05 +VIS_SCORE = 0.5 + + diff --git a/libs/configs/COCO/cfgs_res50_coco_1x_v5.py b/libs/configs/COCO/cfgs_res50_coco_1x_v5.py new file mode 100644 index 0000000..d05a506 --- /dev/null +++ b/libs/configs/COCO/cfgs_res50_coco_1x_v5.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- +from __future__ import division, print_function, absolute_import +import os +import tensorflow as tf +import math + +""" +epoch-00: epoch-01: + + +""" + +# ------------------------------------------------ +VERSION = 'RetinaNet_COCO_1x_20191221' +NET_NAME = 'resnet50_v1d' # 'MobilenetV2' +ADD_BOX_IN_TENSORBOARD = True + +# ---------------------------------------- System_config +ROOT_PATH = os.path.abspath('../') +print(20*"++--") +print(ROOT_PATH) +GPU_GROUP = "0,1,2,3" +NUM_GPU = len(GPU_GROUP.strip().split(',')) +SHOW_TRAIN_INFO_INTE = 20 +SMRY_ITER = 200 +SAVE_WEIGHTS_INTE = 80000 + +SUMMARY_PATH = ROOT_PATH + '/output/summary' +TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' + +if NET_NAME.startswith("resnet"): + weights_name = NET_NAME +elif NET_NAME.startswith("MobilenetV2"): + weights_name = "mobilenet/mobilenet_v2_1.0_224" +else: + raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') + +PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' +TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') +EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' + +# ------------------------------------------ Train config +RESTORE_FROM_RPN = False +FIXED_BLOCKS = 1 # allow 0~3 +FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone +USE_07_METRIC = True + +MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy +GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip + +BATCH_SIZE = 2 +EPSILON = 1e-5 +MOMENTUM = 0.9 +LR = 5e-4 * NUM_GPU * BATCH_SIZE +DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] +MAX_ITERATION = SAVE_WEIGHTS_INTE*20 +WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) + +# -------------------------------------------- Data_preprocess_config +DATASET_NAME = 'coco' # 'pascal', 'coco' +PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +PIXEL_MEAN_ = [0.485, 0.456, 0.406] +PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +IMG_SHORT_SIDE_LEN = 600 +IMG_MAX_LENGTH = 1000 +CLASS_NUM = 80 + +# --------------------------------------------- Network_config +SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) +SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) +PROBABILITY = 0.01 +FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) +WEIGHT_DECAY = 1e-4 + +# ---------------------------------------------Anchor config +LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] +BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] +ANCHOR_STRIDE = [8, 16, 32, 64, 128] +ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] +ANCHOR_RATIOS = [0.5, 1.0, 2.0] +ANCHOR_SCALE_FACTORS = None +USE_CENTER_OFFSET = True + +# --------------------------------------------RPN config +SHARE_NET = True +USE_P5 = True +IOU_POSITIVE_THRESHOLD = 0.5 +IOU_NEGATIVE_THRESHOLD = 0.4 + +NMS = True +NMS_IOU_THRESHOLD = 0.5 +MAXIMUM_DETECTIONS = 100 +FILTERED_SCORE = 0.05 +VIS_SCORE = 0.5 + + diff --git a/libs/configs/VOC0712/__init__.py b/libs/configs/VOC0712/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/libs/configs/cfgs_res50_voc0712_v1.py b/libs/configs/VOC0712/cfgs_res50_voc0712_v1.py similarity index 97% rename from libs/configs/cfgs_res50_voc0712_v1.py rename to libs/configs/VOC0712/cfgs_res50_voc0712_v1.py index deb7f6e..5650640 100644 --- a/libs/configs/cfgs_res50_voc0712_v1.py +++ b/libs/configs/VOC0712/cfgs_res50_voc0712_v1.py @@ -1,115 +1,114 @@ -# -*- coding: utf-8 -*- -from __future__ import division, print_function, absolute_import -import os -import tensorflow as tf -import math - -""" -cls : boat|| Recall: 0.9505703422053232 || Precison: 0.005342907824154218|| AP: 0.7096663579871666 -cls : horse|| Recall: 0.9971264367816092 || Precison: 0.010271438297368499|| AP: 0.8762579915407834 -cls : cat|| Recall: 0.9832402234636871 || Precison: 0.012731941982855283|| AP: 0.8657648932693558 -cls : bottle|| Recall: 0.9147121535181236 || Precison: 0.00529309430097842|| AP: 0.6864996193318956 -cls : pottedplant|| Recall: 0.925 || Precison: 0.005485341536636892|| AP: 0.5534334829931036 -cls : bus|| Recall: 0.9953051643192489 || Precison: 0.006379009448155504|| AP: 0.8598362716395636 -cls : car|| Recall: 0.9891756869275604 || Precison: 0.01537724736917043|| AP: 0.8843564429054561 -cls : aeroplane|| Recall: 0.9543859649122807 || Precison: 0.014160029153001198|| AP: 0.8542149220907074 -cls : bicycle|| Recall: 0.9881305637982196 || Precison: 0.00675963705011875|| AP: 0.8498077362487978 -cls : cow|| Recall: 0.9918032786885246 || Precison: 0.009082720312265426|| AP: 0.8451994158806354 -cls : dog|| Recall: 0.9979550102249489 || Precison: 0.01499231950844854|| AP: 0.8590428896283084 -cls : motorbike|| Recall: 0.9938461538461538 || Precison: 0.006394773312215403|| AP: 0.8564424864150487 -cls : bird|| Recall: 0.9455337690631809 || Precison: 0.017192885156280948|| AP: 0.7948506002769691 -cls : tvmonitor|| Recall: 0.9512987012987013 || Precison: 0.00638038412961108|| AP: 0.7842655696224038 -cls : chair|| Recall: 0.9629629629629629 || Precison: 0.006125780447989768|| AP: 0.6172742830134138 -cls : sheep|| Recall: 0.987603305785124 || Precison: 0.007774886141834743|| AP: 0.8365473938440467 -cls : diningtable|| Recall: 0.9805825242718447 || Precison: 0.001985121416708433|| AP: 0.7371673843361081 -cls : train|| Recall: 0.9468085106382979 || Precison: 0.011325076348829318|| AP: 0.8543968318356752 -cls : person|| Recall: 0.9867491166077739 || Precison: 0.027760691407730496|| AP: 0.8422619773565775 -cls : sofa|| Recall: 0.9874476987447699 || Precison: 0.0038979271616153273|| AP: 0.7647898552993955 -mAP is : 0.7966038202757706 (coco_463120model.ckpt) -""" - -# ------------------------------------------------ -VERSION = 'RetinaNet_VOC0712_20190523' -NET_NAME = 'resnet50_v1d' # 'MobilenetV2' -ADD_BOX_IN_TENSORBOARD = True - -# ---------------------------------------- System_config -ROOT_PATH = os.path.abspath('../') -print(20*"++--") -print(ROOT_PATH) -GPU_GROUP = "0,1,2,3,4,5,6,7" -NUM_GPU = len(GPU_GROUP.strip().split(',')) -SHOW_TRAIN_INFO_INTE = 10 -SMRY_ITER = 100 -SAVE_WEIGHTS_INTE = (11540 + 5000) * 2 - -SUMMARY_PATH = ROOT_PATH + '/output/summary' -TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' - -if NET_NAME.startswith("resnet"): - weights_name = NET_NAME -elif NET_NAME.startswith("MobilenetV2"): - weights_name = "mobilenet/mobilenet_v2_1.0_224" -else: - raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') - -PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' -TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') -EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' - -# ------------------------------------------ Train config -RESTORE_FROM_RPN = False -FIXED_BLOCKS = 1 # allow 0~3 -FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone -USE_07_METRIC = True - -MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy -GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip - -BATCH_SIZE = 1 -EPSILON = 1e-5 -MOMENTUM = 0.9 -LR = 5e-4 * NUM_GPU * BATCH_SIZE -DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] -MAX_ITERATION = SAVE_WEIGHTS_INTE*20 -WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) - -# -------------------------------------------- Data_preprocess_config -DATASET_NAME = 'pascal' # 'pascal', 'coco' -PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -PIXEL_MEAN_ = [0.485, 0.456, 0.406] -PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -IMG_SHORT_SIDE_LEN = 600 -IMG_MAX_LENGTH = 1000 -CLASS_NUM = 20 - -# --------------------------------------------- Network_config -BATCH_SIZE = 1 -SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) -SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) -PROBABILITY = 0.01 -FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) -WEIGHT_DECAY = 1e-4 - -# ---------------------------------------------Anchor config -LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] -BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] -ANCHOR_STRIDE = [8, 16, 32, 64, 128] -ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] -ANCHOR_RATIOS = [0.5, 1.0, 2.0] -ANCHOR_SCALE_FACTORS = [10.0, 10.0, 5.0, 5.0] -USE_CENTER_OFFSET = True - -# --------------------------------------------RPN config -SHARE_NET = True -USE_P5 = True -IOU_POSITIVE_THRESHOLD = 0.5 -IOU_NEGATIVE_THRESHOLD = 0.4 - -NMS = True -NMS_IOU_THRESHOLD = 0.5 -MAXIMUM_DETECTIONS = 100 -FILTERED_SCORE = 0.01 -VIS_SCORE = 0.5 - - +# -*- coding: utf-8 -*- +from __future__ import division, print_function, absolute_import +import os +import tensorflow as tf +import math + +""" +cls : boat|| Recall: 0.9505703422053232 || Precison: 0.005342907824154218|| AP: 0.7096663579871666 +cls : horse|| Recall: 0.9971264367816092 || Precison: 0.010271438297368499|| AP: 0.8762579915407834 +cls : cat|| Recall: 0.9832402234636871 || Precison: 0.012731941982855283|| AP: 0.8657648932693558 +cls : bottle|| Recall: 0.9147121535181236 || Precison: 0.00529309430097842|| AP: 0.6864996193318956 +cls : pottedplant|| Recall: 0.925 || Precison: 0.005485341536636892|| AP: 0.5534334829931036 +cls : bus|| Recall: 0.9953051643192489 || Precison: 0.006379009448155504|| AP: 0.8598362716395636 +cls : car|| Recall: 0.9891756869275604 || Precison: 0.01537724736917043|| AP: 0.8843564429054561 +cls : aeroplane|| Recall: 0.9543859649122807 || Precison: 0.014160029153001198|| AP: 0.8542149220907074 +cls : bicycle|| Recall: 0.9881305637982196 || Precison: 0.00675963705011875|| AP: 0.8498077362487978 +cls : cow|| Recall: 0.9918032786885246 || Precison: 0.009082720312265426|| AP: 0.8451994158806354 +cls : dog|| Recall: 0.9979550102249489 || Precison: 0.01499231950844854|| AP: 0.8590428896283084 +cls : motorbike|| Recall: 0.9938461538461538 || Precison: 0.006394773312215403|| AP: 0.8564424864150487 +cls : bird|| Recall: 0.9455337690631809 || Precison: 0.017192885156280948|| AP: 0.7948506002769691 +cls : tvmonitor|| Recall: 0.9512987012987013 || Precison: 0.00638038412961108|| AP: 0.7842655696224038 +cls : chair|| Recall: 0.9629629629629629 || Precison: 0.006125780447989768|| AP: 0.6172742830134138 +cls : sheep|| Recall: 0.987603305785124 || Precison: 0.007774886141834743|| AP: 0.8365473938440467 +cls : diningtable|| Recall: 0.9805825242718447 || Precison: 0.001985121416708433|| AP: 0.7371673843361081 +cls : train|| Recall: 0.9468085106382979 || Precison: 0.011325076348829318|| AP: 0.8543968318356752 +cls : person|| Recall: 0.9867491166077739 || Precison: 0.027760691407730496|| AP: 0.8422619773565775 +cls : sofa|| Recall: 0.9874476987447699 || Precison: 0.0038979271616153273|| AP: 0.7647898552993955 +mAP is : 0.7966038202757706 (coco_463120model.ckpt) +""" + +# ------------------------------------------------ +VERSION = 'RetinaNet_VOC0712_20190523' +NET_NAME = 'resnet50_v1d' # 'MobilenetV2' +ADD_BOX_IN_TENSORBOARD = True + +# ---------------------------------------- System_config +ROOT_PATH = os.path.abspath('../') +print(20*"++--") +print(ROOT_PATH) +GPU_GROUP = "0,1,2,3,4,5,6,7" +NUM_GPU = len(GPU_GROUP.strip().split(',')) +SHOW_TRAIN_INFO_INTE = 10 +SMRY_ITER = 100 +SAVE_WEIGHTS_INTE = (11540 + 5000) * 2 + +SUMMARY_PATH = ROOT_PATH + '/output/summary' +TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' + +if NET_NAME.startswith("resnet"): + weights_name = NET_NAME +elif NET_NAME.startswith("MobilenetV2"): + weights_name = "mobilenet/mobilenet_v2_1.0_224" +else: + raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') + +PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' +TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') +EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' + +# ------------------------------------------ Train config +RESTORE_FROM_RPN = False +FIXED_BLOCKS = 1 # allow 0~3 +FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone +USE_07_METRIC = True + +MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy +GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip + +BATCH_SIZE = 1 +EPSILON = 1e-5 +MOMENTUM = 0.9 +LR = 5e-4 * NUM_GPU * BATCH_SIZE +DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] +MAX_ITERATION = SAVE_WEIGHTS_INTE*20 +WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) + +# -------------------------------------------- Data_preprocess_config +DATASET_NAME = 'pascal' # 'pascal', 'coco' +PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +PIXEL_MEAN_ = [0.485, 0.456, 0.406] +PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +IMG_SHORT_SIDE_LEN = 600 +IMG_MAX_LENGTH = 1000 +CLASS_NUM = 20 + +# --------------------------------------------- Network_config +SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) +SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) +PROBABILITY = 0.01 +FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) +WEIGHT_DECAY = 1e-4 + +# ---------------------------------------------Anchor config +LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] +BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] +ANCHOR_STRIDE = [8, 16, 32, 64, 128] +ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] +ANCHOR_RATIOS = [0.5, 1.0, 2.0] +ANCHOR_SCALE_FACTORS = [10.0, 10.0, 5.0, 5.0] +USE_CENTER_OFFSET = True + +# --------------------------------------------RPN config +SHARE_NET = True +USE_P5 = True +IOU_POSITIVE_THRESHOLD = 0.5 +IOU_NEGATIVE_THRESHOLD = 0.4 + +NMS = True +NMS_IOU_THRESHOLD = 0.5 +MAXIMUM_DETECTIONS = 100 +FILTERED_SCORE = 0.01 +VIS_SCORE = 0.5 + + diff --git a/libs/configs/cfgs_res50_voc0712_v2.py b/libs/configs/VOC0712/cfgs_res50_voc0712_v2.py similarity index 97% rename from libs/configs/cfgs_res50_voc0712_v2.py rename to libs/configs/VOC0712/cfgs_res50_voc0712_v2.py index c7f817e..067a84c 100644 --- a/libs/configs/cfgs_res50_voc0712_v2.py +++ b/libs/configs/VOC0712/cfgs_res50_voc0712_v2.py @@ -1,116 +1,115 @@ -# -*- coding: utf-8 -*- -from __future__ import division, print_function, absolute_import -import os -import tensorflow as tf -import math - -""" -cls : train|| Recall: 0.9574468085106383 || Precison: 0.011264550043806583|| AP: 0.8429614751356598 -cls : boat|| Recall: 0.9619771863117871 || Precison: 0.005608636857389878|| AP: 0.7247602142711771 -cls : motorbike|| Recall: 0.9938461538461538 || Precison: 0.00780136705069681|| AP: 0.8620336717055882 -cls : aeroplane|| Recall: 0.968421052631579 || Precison: 0.015942698706099816|| AP: 0.877200533188366 -cls : person|| Recall: 0.9882950530035336 || Precison: 0.031197060853440043|| AP: 0.8549563913368059 -cls : pottedplant|| Recall: 0.9145833333333333 || Precison: 0.006810847710065781|| AP: 0.5597989508143375 -cls : sofa|| Recall: 0.99581589958159 || Precison: 0.004477303083319224|| AP: 0.7809330043853933 -cls : car|| Recall: 0.9900083263946711 || Precison: 0.015191781872077275|| AP: 0.8929651092188676 -cls : cat|| Recall: 0.994413407821229 || Precison: 0.0132925098947054|| AP: 0.8900856486739736 -cls : horse|| Recall: 0.9913793103448276 || Precison: 0.009228793836770725|| AP: 0.8822391183389151 -cls : tvmonitor|| Recall: 0.961038961038961 || Precison: 0.006890771952695782|| AP: 0.7956874756219561 -cls : sheep|| Recall: 0.9834710743801653 || Precison: 0.008470956719817768|| AP: 0.847823073416635 -cls : dog|| Recall: 0.9959100204498977 || Precison: 0.0186640095044648|| AP: 0.8711714077751838 -cls : bus|| Recall: 0.9859154929577465 || Precison: 0.006062180652983459|| AP: 0.8646568186454162 -cls : chair|| Recall: 0.9708994708994709 || Precison: 0.007137300661221315|| AP: 0.6544968051248526 -cls : bicycle|| Recall: 0.9881305637982196 || Precison: 0.00703704486380254|| AP: 0.8543458412723929 -cls : bird|| Recall: 0.9760348583877996 || Precison: 0.016314639475600873|| AP: 0.8255116265985485 -cls : diningtable|| Recall: 0.9854368932038835 || Precison: 0.0024620086594787332|| AP: 0.7423151271987897 -cls : bottle|| Recall: 0.9381663113006397 || Precison: 0.0067838421214924454|| AP: 0.7201777267756303 -cls : cow|| Recall: 0.9959016393442623 || Precison: 0.00847162181006833|| AP: 0.8656722996831789 -mAP is : 0.8104896159590833 (pascal_430040model.ckpt) - -""" - -# ------------------------------------------------ -VERSION = 'RetinaNet_VOC0712_20190524' -NET_NAME = 'resnet101_v1d' # 'MobilenetV2' -ADD_BOX_IN_TENSORBOARD = True - -# ---------------------------------------- System_config -ROOT_PATH = os.path.abspath('../') -print(20*"++--") -print(ROOT_PATH) -GPU_GROUP = "0,1,2,3,4,5,6,7" -NUM_GPU = len(GPU_GROUP.strip().split(',')) -SHOW_TRAIN_INFO_INTE = 10 -SMRY_ITER = 100 -SAVE_WEIGHTS_INTE = (11540 + 5000) * 2 - -SUMMARY_PATH = ROOT_PATH + '/output/summary' -TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' - -if NET_NAME.startswith("resnet"): - weights_name = NET_NAME -elif NET_NAME.startswith("MobilenetV2"): - weights_name = "mobilenet/mobilenet_v2_1.0_224" -else: - raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') - -PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' -TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') -EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' - -# ------------------------------------------ Train config -RESTORE_FROM_RPN = False -FIXED_BLOCKS = 1 # allow 0~3 -FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone -USE_07_METRIC = True - -MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy -GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip - -BATCH_SIZE = 1 -EPSILON = 1e-5 -MOMENTUM = 0.9 -LR = 5e-4 * NUM_GPU * BATCH_SIZE -DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] -MAX_ITERATION = SAVE_WEIGHTS_INTE*20 -WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) - -# -------------------------------------------- Data_preprocess_config -DATASET_NAME = 'pascal' # 'pascal', 'coco' -PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -PIXEL_MEAN_ = [0.485, 0.456, 0.406] -PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -IMG_SHORT_SIDE_LEN = 600 -IMG_MAX_LENGTH = 1000 -CLASS_NUM = 20 - -# --------------------------------------------- Network_config -BATCH_SIZE = 1 -SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) -SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) -PROBABILITY = 0.01 -FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) -WEIGHT_DECAY = 1e-4 - -# ---------------------------------------------Anchor config -LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] -BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] -ANCHOR_STRIDE = [8, 16, 32, 64, 128] -ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] -ANCHOR_RATIOS = [0.5, 1.0, 2.0] -ANCHOR_SCALE_FACTORS = [10.0, 10.0, 5.0, 5.0] -USE_CENTER_OFFSET = True - -# --------------------------------------------RPN config -SHARE_NET = True -USE_P5 = True -IOU_POSITIVE_THRESHOLD = 0.5 -IOU_NEGATIVE_THRESHOLD = 0.4 - -NMS = True -NMS_IOU_THRESHOLD = 0.5 -MAXIMUM_DETECTIONS = 100 -FILTERED_SCORE = 0.01 -VIS_SCORE = 0.5 - - +# -*- coding: utf-8 -*- +from __future__ import division, print_function, absolute_import +import os +import tensorflow as tf +import math + +""" +cls : train|| Recall: 0.9574468085106383 || Precison: 0.011264550043806583|| AP: 0.8429614751356598 +cls : boat|| Recall: 0.9619771863117871 || Precison: 0.005608636857389878|| AP: 0.7247602142711771 +cls : motorbike|| Recall: 0.9938461538461538 || Precison: 0.00780136705069681|| AP: 0.8620336717055882 +cls : aeroplane|| Recall: 0.968421052631579 || Precison: 0.015942698706099816|| AP: 0.877200533188366 +cls : person|| Recall: 0.9882950530035336 || Precison: 0.031197060853440043|| AP: 0.8549563913368059 +cls : pottedplant|| Recall: 0.9145833333333333 || Precison: 0.006810847710065781|| AP: 0.5597989508143375 +cls : sofa|| Recall: 0.99581589958159 || Precison: 0.004477303083319224|| AP: 0.7809330043853933 +cls : car|| Recall: 0.9900083263946711 || Precison: 0.015191781872077275|| AP: 0.8929651092188676 +cls : cat|| Recall: 0.994413407821229 || Precison: 0.0132925098947054|| AP: 0.8900856486739736 +cls : horse|| Recall: 0.9913793103448276 || Precison: 0.009228793836770725|| AP: 0.8822391183389151 +cls : tvmonitor|| Recall: 0.961038961038961 || Precison: 0.006890771952695782|| AP: 0.7956874756219561 +cls : sheep|| Recall: 0.9834710743801653 || Precison: 0.008470956719817768|| AP: 0.847823073416635 +cls : dog|| Recall: 0.9959100204498977 || Precison: 0.0186640095044648|| AP: 0.8711714077751838 +cls : bus|| Recall: 0.9859154929577465 || Precison: 0.006062180652983459|| AP: 0.8646568186454162 +cls : chair|| Recall: 0.9708994708994709 || Precison: 0.007137300661221315|| AP: 0.6544968051248526 +cls : bicycle|| Recall: 0.9881305637982196 || Precison: 0.00703704486380254|| AP: 0.8543458412723929 +cls : bird|| Recall: 0.9760348583877996 || Precison: 0.016314639475600873|| AP: 0.8255116265985485 +cls : diningtable|| Recall: 0.9854368932038835 || Precison: 0.0024620086594787332|| AP: 0.7423151271987897 +cls : bottle|| Recall: 0.9381663113006397 || Precison: 0.0067838421214924454|| AP: 0.7201777267756303 +cls : cow|| Recall: 0.9959016393442623 || Precison: 0.00847162181006833|| AP: 0.8656722996831789 +mAP is : 0.8104896159590833 (pascal_430040model.ckpt) + +""" + +# ------------------------------------------------ +VERSION = 'RetinaNet_VOC0712_20190524' +NET_NAME = 'resnet101_v1d' # 'MobilenetV2' +ADD_BOX_IN_TENSORBOARD = True + +# ---------------------------------------- System_config +ROOT_PATH = os.path.abspath('../') +print(20*"++--") +print(ROOT_PATH) +GPU_GROUP = "0,1,2,3,4,5,6,7" +NUM_GPU = len(GPU_GROUP.strip().split(',')) +SHOW_TRAIN_INFO_INTE = 10 +SMRY_ITER = 100 +SAVE_WEIGHTS_INTE = (11540 + 5000) * 2 + +SUMMARY_PATH = ROOT_PATH + '/output/summary' +TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' + +if NET_NAME.startswith("resnet"): + weights_name = NET_NAME +elif NET_NAME.startswith("MobilenetV2"): + weights_name = "mobilenet/mobilenet_v2_1.0_224" +else: + raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') + +PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' +TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') +EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' + +# ------------------------------------------ Train config +RESTORE_FROM_RPN = False +FIXED_BLOCKS = 1 # allow 0~3 +FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone +USE_07_METRIC = True + +MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy +GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip + +BATCH_SIZE = 1 +EPSILON = 1e-5 +MOMENTUM = 0.9 +LR = 5e-4 * NUM_GPU * BATCH_SIZE +DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] +MAX_ITERATION = SAVE_WEIGHTS_INTE*20 +WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) + +# -------------------------------------------- Data_preprocess_config +DATASET_NAME = 'pascal' # 'pascal', 'coco' +PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +PIXEL_MEAN_ = [0.485, 0.456, 0.406] +PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +IMG_SHORT_SIDE_LEN = 600 +IMG_MAX_LENGTH = 1000 +CLASS_NUM = 20 + +# --------------------------------------------- Network_config +SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) +SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) +PROBABILITY = 0.01 +FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) +WEIGHT_DECAY = 1e-4 + +# ---------------------------------------------Anchor config +LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] +BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] +ANCHOR_STRIDE = [8, 16, 32, 64, 128] +ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] +ANCHOR_RATIOS = [0.5, 1.0, 2.0] +ANCHOR_SCALE_FACTORS = [10.0, 10.0, 5.0, 5.0] +USE_CENTER_OFFSET = True + +# --------------------------------------------RPN config +SHARE_NET = True +USE_P5 = True +IOU_POSITIVE_THRESHOLD = 0.5 +IOU_NEGATIVE_THRESHOLD = 0.4 + +NMS = True +NMS_IOU_THRESHOLD = 0.5 +MAXIMUM_DETECTIONS = 100 +FILTERED_SCORE = 0.01 +VIS_SCORE = 0.5 + + diff --git a/libs/configs/cfgs_res50_voc0712_v3.py b/libs/configs/VOC0712/cfgs_res50_voc0712_v3.py similarity index 97% rename from libs/configs/cfgs_res50_voc0712_v3.py rename to libs/configs/VOC0712/cfgs_res50_voc0712_v3.py index 0bfb2ae..a17003e 100644 --- a/libs/configs/cfgs_res50_voc0712_v3.py +++ b/libs/configs/VOC0712/cfgs_res50_voc0712_v3.py @@ -1,116 +1,115 @@ -# -*- coding: utf-8 -*- -from __future__ import division, print_function, absolute_import -import os -import tensorflow as tf -import math - -""" -cls : boat|| Recall: 0.9695817490494296 || Precison: 0.004761726919629519|| AP: 0.7281771379381616 -cls : bicycle|| Recall: 0.9881305637982196 || Precison: 0.006502382254159182|| AP: 0.8489062261958693 -cls : pottedplant|| Recall: 0.9395833333333333 || Precison: 0.005247664149494432|| AP: 0.5544772267130251 -cls : tvmonitor|| Recall: 0.961038961038961 || Precison: 0.005156255443681845|| AP: 0.8054443156582788 -cls : motorbike|| Recall: 0.9938461538461538 || Precison: 0.006123338831067887|| AP: 0.8633424176495925 -cls : horse|| Recall: 0.9942528735632183 || Precison: 0.009731949483869153|| AP: 0.8800299351210068 -cls : car|| Recall: 0.9933388842631141 || Precison: 0.013872415637572967|| AP: 0.8903760611050385 -cls : person|| Recall: 0.9860865724381626 || Precison: 0.02783284088217327|| AP: 0.8557435827115095 -cls : aeroplane|| Recall: 0.9859649122807017 || Precison: 0.011732286752118909|| AP: 0.8786021097248355 -cls : train|| Recall: 0.9680851063829787 || Precison: 0.010527939531834484|| AP: 0.830439540076824 -cls : sofa|| Recall: 1.0 || Precison: 0.0033251248660906827|| AP: 0.7736534869486386 -cls : sheep|| Recall: 0.9917355371900827 || Precison: 0.006887447626700339|| AP: 0.8304997616000941 -cls : dog|| Recall: 0.9938650306748467 || Precison: 0.012805311833056676|| AP: 0.8685913031298029 -cls : bottle|| Recall: 0.9381663113006397 || Precison: 0.006485657852068041|| AP: 0.7242882524763133 -cls : bus|| Recall: 0.9906103286384976 || Precison: 0.005803718780943998|| AP: 0.8669273157836549 -cls : cow|| Recall: 0.9877049180327869 || Precison: 0.0075783780384264645|| AP: 0.8606713765311622 -cls : bird|| Recall: 0.9738562091503268 || Precison: 0.011930817274328724|| AP: 0.8311803084919606 -cls : diningtable|| Recall: 0.970873786407767 || Precison: 0.0017596959245442388|| AP: 0.730199218623714 -cls : cat|| Recall: 0.9888268156424581 || Precison: 0.010336370007007708|| AP: 0.8831441748779935 -cls : chair|| Recall: 0.9708994708994709 || Precison: 0.005643332180063815|| AP: 0.6334387740339388 -mAP is : 0.8069066262695707 (pascal_430040model.ckpt) - -""" - -# ------------------------------------------------ -VERSION = 'RetinaNet_VOC0712_20190525' -NET_NAME = 'resnet101_v1d' # 'MobilenetV2' -ADD_BOX_IN_TENSORBOARD = True - -# ---------------------------------------- System_config -ROOT_PATH = os.path.abspath('../') -print(20*"++--") -print(ROOT_PATH) -GPU_GROUP = "0,1,2,3,4,5,6,7" -NUM_GPU = len(GPU_GROUP.strip().split(',')) -SHOW_TRAIN_INFO_INTE = 10 -SMRY_ITER = 100 -SAVE_WEIGHTS_INTE = (11540 + 5000) * 2 - -SUMMARY_PATH = ROOT_PATH + '/output/summary' -TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' - -if NET_NAME.startswith("resnet"): - weights_name = NET_NAME -elif NET_NAME.startswith("MobilenetV2"): - weights_name = "mobilenet/mobilenet_v2_1.0_224" -else: - raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') - -PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' -TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') -EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' - -# ------------------------------------------ Train config -RESTORE_FROM_RPN = False -FIXED_BLOCKS = 1 # allow 0~3 -FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone -USE_07_METRIC = True - -MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy -GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip - -BATCH_SIZE = 1 -EPSILON = 1e-5 -MOMENTUM = 0.9 -LR = 5e-4 * NUM_GPU * BATCH_SIZE -DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] -MAX_ITERATION = SAVE_WEIGHTS_INTE*20 -WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) - -# -------------------------------------------- Data_preprocess_config -DATASET_NAME = 'pascal' # 'pascal', 'coco' -PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -PIXEL_MEAN_ = [0.485, 0.456, 0.406] -PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -IMG_SHORT_SIDE_LEN = 800 -IMG_MAX_LENGTH = 1333 -CLASS_NUM = 20 - -# --------------------------------------------- Network_config -BATCH_SIZE = 1 -SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) -SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) -PROBABILITY = 0.01 -FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) -WEIGHT_DECAY = 1e-4 - -# ---------------------------------------------Anchor config -LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] -BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] -ANCHOR_STRIDE = [8, 16, 32, 64, 128] -ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] -ANCHOR_RATIOS = [0.5, 1.0, 2.0] -ANCHOR_SCALE_FACTORS = [10.0, 10.0, 5.0, 5.0] -USE_CENTER_OFFSET = True - -# --------------------------------------------RPN config -SHARE_NET = True -USE_P5 = True -IOU_POSITIVE_THRESHOLD = 0.5 -IOU_NEGATIVE_THRESHOLD = 0.4 - -NMS = True -NMS_IOU_THRESHOLD = 0.5 -MAXIMUM_DETECTIONS = 100 -FILTERED_SCORE = 0.01 -VIS_SCORE = 0.5 - - +# -*- coding: utf-8 -*- +from __future__ import division, print_function, absolute_import +import os +import tensorflow as tf +import math + +""" +cls : boat|| Recall: 0.9695817490494296 || Precison: 0.004761726919629519|| AP: 0.7281771379381616 +cls : bicycle|| Recall: 0.9881305637982196 || Precison: 0.006502382254159182|| AP: 0.8489062261958693 +cls : pottedplant|| Recall: 0.9395833333333333 || Precison: 0.005247664149494432|| AP: 0.5544772267130251 +cls : tvmonitor|| Recall: 0.961038961038961 || Precison: 0.005156255443681845|| AP: 0.8054443156582788 +cls : motorbike|| Recall: 0.9938461538461538 || Precison: 0.006123338831067887|| AP: 0.8633424176495925 +cls : horse|| Recall: 0.9942528735632183 || Precison: 0.009731949483869153|| AP: 0.8800299351210068 +cls : car|| Recall: 0.9933388842631141 || Precison: 0.013872415637572967|| AP: 0.8903760611050385 +cls : person|| Recall: 0.9860865724381626 || Precison: 0.02783284088217327|| AP: 0.8557435827115095 +cls : aeroplane|| Recall: 0.9859649122807017 || Precison: 0.011732286752118909|| AP: 0.8786021097248355 +cls : train|| Recall: 0.9680851063829787 || Precison: 0.010527939531834484|| AP: 0.830439540076824 +cls : sofa|| Recall: 1.0 || Precison: 0.0033251248660906827|| AP: 0.7736534869486386 +cls : sheep|| Recall: 0.9917355371900827 || Precison: 0.006887447626700339|| AP: 0.8304997616000941 +cls : dog|| Recall: 0.9938650306748467 || Precison: 0.012805311833056676|| AP: 0.8685913031298029 +cls : bottle|| Recall: 0.9381663113006397 || Precison: 0.006485657852068041|| AP: 0.7242882524763133 +cls : bus|| Recall: 0.9906103286384976 || Precison: 0.005803718780943998|| AP: 0.8669273157836549 +cls : cow|| Recall: 0.9877049180327869 || Precison: 0.0075783780384264645|| AP: 0.8606713765311622 +cls : bird|| Recall: 0.9738562091503268 || Precison: 0.011930817274328724|| AP: 0.8311803084919606 +cls : diningtable|| Recall: 0.970873786407767 || Precison: 0.0017596959245442388|| AP: 0.730199218623714 +cls : cat|| Recall: 0.9888268156424581 || Precison: 0.010336370007007708|| AP: 0.8831441748779935 +cls : chair|| Recall: 0.9708994708994709 || Precison: 0.005643332180063815|| AP: 0.6334387740339388 +mAP is : 0.8069066262695707 (pascal_430040model.ckpt) + +""" + +# ------------------------------------------------ +VERSION = 'RetinaNet_VOC0712_20190525' +NET_NAME = 'resnet101_v1d' # 'MobilenetV2' +ADD_BOX_IN_TENSORBOARD = True + +# ---------------------------------------- System_config +ROOT_PATH = os.path.abspath('../') +print(20*"++--") +print(ROOT_PATH) +GPU_GROUP = "0,1,2,3,4,5,6,7" +NUM_GPU = len(GPU_GROUP.strip().split(',')) +SHOW_TRAIN_INFO_INTE = 10 +SMRY_ITER = 100 +SAVE_WEIGHTS_INTE = (11540 + 5000) * 2 + +SUMMARY_PATH = ROOT_PATH + '/output/summary' +TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' + +if NET_NAME.startswith("resnet"): + weights_name = NET_NAME +elif NET_NAME.startswith("MobilenetV2"): + weights_name = "mobilenet/mobilenet_v2_1.0_224" +else: + raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') + +PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' +TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') +EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' + +# ------------------------------------------ Train config +RESTORE_FROM_RPN = False +FIXED_BLOCKS = 1 # allow 0~3 +FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone +USE_07_METRIC = True + +MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy +GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip + +BATCH_SIZE = 1 +EPSILON = 1e-5 +MOMENTUM = 0.9 +LR = 5e-4 * NUM_GPU * BATCH_SIZE +DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] +MAX_ITERATION = SAVE_WEIGHTS_INTE*20 +WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) + +# -------------------------------------------- Data_preprocess_config +DATASET_NAME = 'pascal' # 'pascal', 'coco' +PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +PIXEL_MEAN_ = [0.485, 0.456, 0.406] +PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +IMG_SHORT_SIDE_LEN = 800 +IMG_MAX_LENGTH = 1333 +CLASS_NUM = 20 + +# --------------------------------------------- Network_config +SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) +SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) +PROBABILITY = 0.01 +FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) +WEIGHT_DECAY = 1e-4 + +# ---------------------------------------------Anchor config +LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] +BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] +ANCHOR_STRIDE = [8, 16, 32, 64, 128] +ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] +ANCHOR_RATIOS = [0.5, 1.0, 2.0] +ANCHOR_SCALE_FACTORS = [10.0, 10.0, 5.0, 5.0] +USE_CENTER_OFFSET = True + +# --------------------------------------------RPN config +SHARE_NET = True +USE_P5 = True +IOU_POSITIVE_THRESHOLD = 0.5 +IOU_NEGATIVE_THRESHOLD = 0.4 + +NMS = True +NMS_IOU_THRESHOLD = 0.5 +MAXIMUM_DETECTIONS = 100 +FILTERED_SCORE = 0.01 +VIS_SCORE = 0.5 + + diff --git a/libs/configs/cfgs_res50_voc0712_v4.py b/libs/configs/VOC0712/cfgs_res50_voc0712_v4.py similarity index 97% rename from libs/configs/cfgs_res50_voc0712_v4.py rename to libs/configs/VOC0712/cfgs_res50_voc0712_v4.py index 830079a..e15f716 100644 --- a/libs/configs/cfgs_res50_voc0712_v4.py +++ b/libs/configs/VOC0712/cfgs_res50_voc0712_v4.py @@ -1,116 +1,115 @@ -# -*- coding: utf-8 -*- -from __future__ import division, print_function, absolute_import -import os -import tensorflow as tf -import math - -""" -cls : motorbike|| Recall: 0.9969230769230769 || Precison: 0.012111244019138757|| AP: 0.8608716836726598 -cls : bus|| Recall: 0.9812206572769953 || Precison: 0.010832944591302546|| AP: 0.8665177973232026 -cls : cow|| Recall: 0.9918032786885246 || Precison: 0.014500569237222122|| AP: 0.8700933653540773 -cls : cat|| Recall: 0.994413407821229 || Precison: 0.019201725997842502|| AP: 0.8901733848581077 -cls : pottedplant|| Recall: 0.89375 || Precison: 0.009184918748795684|| AP: 0.5459186868755065 -cls : sheep|| Recall: 0.987603305785124 || Precison: 0.013457964975505377|| AP: 0.8678004330444818 -cls : aeroplane|| Recall: 0.9649122807017544 || Precison: 0.02806981729100745|| AP: 0.8901971050794585 -cls : boat|| Recall: 0.9467680608365019 || Precison: 0.008034849951597289|| AP: 0.7574725882243678 -cls : bicycle|| Recall: 0.9792284866468842 || Precison: 0.01154047910473859|| AP: 0.8696655881282686 -cls : car|| Recall: 0.9816819317235637 || Precison: 0.021344775146643492|| AP: 0.8910282687046879 -cls : tvmonitor|| Recall: 0.9642857142857143 || Precison: 0.007807981492192018|| AP: 0.8129339370651543 -cls : person|| Recall: 0.9783568904593639 || Precison: 0.04236477698722362|| AP: 0.8492903923966436 -cls : bottle|| Recall: 0.9253731343283582 || Precison: 0.007948572370469406|| AP: 0.7258658516344598 -cls : chair|| Recall: 0.9616402116402116 || Precison: 0.008080920357916967|| AP: 0.643958822348765 -cls : sofa|| Recall: 0.9874476987447699 || Precison: 0.005221354454744574|| AP: 0.7747622585263062 -cls : horse|| Recall: 0.9942528735632183 || Precison: 0.019444756659548163|| AP: 0.8831973097154177 -cls : train|| Recall: 0.9432624113475178 || Precison: 0.016586643387167175|| AP: 0.8595258237569346 -cls : bird|| Recall: 0.9673202614379085 || Precison: 0.02792628467199195|| AP: 0.8461742818633661 -cls : dog|| Recall: 0.9979550102249489 || Precison: 0.02050764834425954|| AP: 0.8791514392825484 -cls : diningtable|| Recall: 0.9805825242718447 || Precison: 0.0026451908596870294|| AP: 0.7533332150753614 -mAP is : 0.8168966116464886 () - -""" - -# ------------------------------------------------ -VERSION = 'RetinaNet_VOC0712_20190526' -NET_NAME = 'resnet101_v1d' # 'MobilenetV2' -ADD_BOX_IN_TENSORBOARD = True - -# ---------------------------------------- System_config -ROOT_PATH = os.path.abspath('../') -print(20*"++--") -print(ROOT_PATH) -GPU_GROUP = "0,1,2,3,4,5,6,7" -NUM_GPU = len(GPU_GROUP.strip().split(',')) -SHOW_TRAIN_INFO_INTE = 10 -SMRY_ITER = 100 -SAVE_WEIGHTS_INTE = (11540 + 5000) * 2 - -SUMMARY_PATH = ROOT_PATH + '/output/summary' -TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' - -if NET_NAME.startswith("resnet"): - weights_name = NET_NAME -elif NET_NAME.startswith("MobilenetV2"): - weights_name = "mobilenet/mobilenet_v2_1.0_224" -else: - raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') - -PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' -TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') -EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' - -# ------------------------------------------ Train config -RESTORE_FROM_RPN = False -FIXED_BLOCKS = 1 # allow 0~3 -FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone -USE_07_METRIC = True - -MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy -GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip - -BATCH_SIZE = 1 -EPSILON = 1e-5 -MOMENTUM = 0.9 -LR = 5e-4 * NUM_GPU * BATCH_SIZE -DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] -MAX_ITERATION = SAVE_WEIGHTS_INTE*20 -WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) - -# -------------------------------------------- Data_preprocess_config -DATASET_NAME = 'pascal' # 'pascal', 'coco' -PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -PIXEL_MEAN_ = [0.485, 0.456, 0.406] -PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -IMG_SHORT_SIDE_LEN = 600 -IMG_MAX_LENGTH = 1000 -CLASS_NUM = 20 - -# --------------------------------------------- Network_config -BATCH_SIZE = 1 -SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) -SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) -PROBABILITY = 0.01 -FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) -WEIGHT_DECAY = 1e-4 - -# ---------------------------------------------Anchor config -LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] -BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] -ANCHOR_STRIDE = [8, 16, 32, 64, 128] -ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] -ANCHOR_RATIOS = [0.5, 1.0, 2.0] -ANCHOR_SCALE_FACTORS = None -USE_CENTER_OFFSET = True - -# --------------------------------------------RPN config -SHARE_NET = True -USE_P5 = True -IOU_POSITIVE_THRESHOLD = 0.5 -IOU_NEGATIVE_THRESHOLD = 0.4 - -NMS = True -NMS_IOU_THRESHOLD = 0.5 -MAXIMUM_DETECTIONS = 100 -FILTERED_SCORE = 0.01 -VIS_SCORE = 0.5 - - +# -*- coding: utf-8 -*- +from __future__ import division, print_function, absolute_import +import os +import tensorflow as tf +import math + +""" +cls : motorbike|| Recall: 0.9969230769230769 || Precison: 0.012111244019138757|| AP: 0.8608716836726598 +cls : bus|| Recall: 0.9812206572769953 || Precison: 0.010832944591302546|| AP: 0.8665177973232026 +cls : cow|| Recall: 0.9918032786885246 || Precison: 0.014500569237222122|| AP: 0.8700933653540773 +cls : cat|| Recall: 0.994413407821229 || Precison: 0.019201725997842502|| AP: 0.8901733848581077 +cls : pottedplant|| Recall: 0.89375 || Precison: 0.009184918748795684|| AP: 0.5459186868755065 +cls : sheep|| Recall: 0.987603305785124 || Precison: 0.013457964975505377|| AP: 0.8678004330444818 +cls : aeroplane|| Recall: 0.9649122807017544 || Precison: 0.02806981729100745|| AP: 0.8901971050794585 +cls : boat|| Recall: 0.9467680608365019 || Precison: 0.008034849951597289|| AP: 0.7574725882243678 +cls : bicycle|| Recall: 0.9792284866468842 || Precison: 0.01154047910473859|| AP: 0.8696655881282686 +cls : car|| Recall: 0.9816819317235637 || Precison: 0.021344775146643492|| AP: 0.8910282687046879 +cls : tvmonitor|| Recall: 0.9642857142857143 || Precison: 0.007807981492192018|| AP: 0.8129339370651543 +cls : person|| Recall: 0.9783568904593639 || Precison: 0.04236477698722362|| AP: 0.8492903923966436 +cls : bottle|| Recall: 0.9253731343283582 || Precison: 0.007948572370469406|| AP: 0.7258658516344598 +cls : chair|| Recall: 0.9616402116402116 || Precison: 0.008080920357916967|| AP: 0.643958822348765 +cls : sofa|| Recall: 0.9874476987447699 || Precison: 0.005221354454744574|| AP: 0.7747622585263062 +cls : horse|| Recall: 0.9942528735632183 || Precison: 0.019444756659548163|| AP: 0.8831973097154177 +cls : train|| Recall: 0.9432624113475178 || Precison: 0.016586643387167175|| AP: 0.8595258237569346 +cls : bird|| Recall: 0.9673202614379085 || Precison: 0.02792628467199195|| AP: 0.8461742818633661 +cls : dog|| Recall: 0.9979550102249489 || Precison: 0.02050764834425954|| AP: 0.8791514392825484 +cls : diningtable|| Recall: 0.9805825242718447 || Precison: 0.0026451908596870294|| AP: 0.7533332150753614 +mAP is : 0.8168966116464886 () + +""" + +# ------------------------------------------------ +VERSION = 'RetinaNet_VOC0712_20190526' +NET_NAME = 'resnet101_v1d' # 'MobilenetV2' +ADD_BOX_IN_TENSORBOARD = True + +# ---------------------------------------- System_config +ROOT_PATH = os.path.abspath('../') +print(20*"++--") +print(ROOT_PATH) +GPU_GROUP = "0,1,2,3,4,5,6,7" +NUM_GPU = len(GPU_GROUP.strip().split(',')) +SHOW_TRAIN_INFO_INTE = 10 +SMRY_ITER = 100 +SAVE_WEIGHTS_INTE = (11540 + 5000) * 2 + +SUMMARY_PATH = ROOT_PATH + '/output/summary' +TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' + +if NET_NAME.startswith("resnet"): + weights_name = NET_NAME +elif NET_NAME.startswith("MobilenetV2"): + weights_name = "mobilenet/mobilenet_v2_1.0_224" +else: + raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') + +PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' +TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') +EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' + +# ------------------------------------------ Train config +RESTORE_FROM_RPN = False +FIXED_BLOCKS = 1 # allow 0~3 +FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone +USE_07_METRIC = True + +MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy +GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip + +BATCH_SIZE = 1 +EPSILON = 1e-5 +MOMENTUM = 0.9 +LR = 5e-4 * NUM_GPU * BATCH_SIZE +DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] +MAX_ITERATION = SAVE_WEIGHTS_INTE*20 +WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE) + +# -------------------------------------------- Data_preprocess_config +DATASET_NAME = 'pascal' # 'pascal', 'coco' +PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +PIXEL_MEAN_ = [0.485, 0.456, 0.406] +PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +IMG_SHORT_SIDE_LEN = 600 +IMG_MAX_LENGTH = 1000 +CLASS_NUM = 20 + +# --------------------------------------------- Network_config +SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) +SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) +PROBABILITY = 0.01 +FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) +WEIGHT_DECAY = 1e-4 + +# ---------------------------------------------Anchor config +LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] +BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] +ANCHOR_STRIDE = [8, 16, 32, 64, 128] +ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] +ANCHOR_RATIOS = [0.5, 1.0, 2.0] +ANCHOR_SCALE_FACTORS = None +USE_CENTER_OFFSET = True + +# --------------------------------------------RPN config +SHARE_NET = True +USE_P5 = True +IOU_POSITIVE_THRESHOLD = 0.5 +IOU_NEGATIVE_THRESHOLD = 0.4 + +NMS = True +NMS_IOU_THRESHOLD = 0.5 +MAXIMUM_DETECTIONS = 100 +FILTERED_SCORE = 0.01 +VIS_SCORE = 0.5 + + diff --git a/libs/configs/VOC2007/__init__.py b/libs/configs/VOC2007/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/libs/configs/cfgs_res50_voc07_v1.py b/libs/configs/VOC2007/cfgs_res50_voc07_v1.py similarity index 97% rename from libs/configs/cfgs_res50_voc07_v1.py rename to libs/configs/VOC2007/cfgs_res50_voc07_v1.py index 85750f8..17493c8 100644 --- a/libs/configs/cfgs_res50_voc07_v1.py +++ b/libs/configs/VOC2007/cfgs_res50_voc07_v1.py @@ -1,115 +1,114 @@ -# -*- coding: utf-8 -*- -from __future__ import division, print_function, absolute_import -import os -import tensorflow as tf -import math - -""" -cls : aeroplane|| Recall: 0.9263157894736842 || Precison: 0.03303303303303303|| AP: 0.784830711465123 -cls : cat|| Recall: 0.9832402234636871 || Precison: 0.024157573261958686|| AP: 0.850236615704726 -cls : person|| Recall: 0.9750441696113075 || Precison: 0.02212888383213124|| AP: 0.810026599570606 -cls : boat|| Recall: 0.9429657794676806 || Precison: 0.008830022075055188|| AP: 0.5809657874405858 -cls : tvmonitor|| Recall: 0.9383116883116883 || Precison: 0.008098187014879368|| AP: 0.7375754411267105 -cls : cow|| Recall: 0.9836065573770492 || Precison: 0.011059398184415465|| AP: 0.7811246236306398 -cls : diningtable|| Recall: 0.970873786407767 || Precison: 0.0016768957306234698|| AP: 0.6460893690943793 -cls : bottle|| Recall: 0.8763326226012793 || Precison: 0.005528577770005784|| AP: 0.5819266031025552 -cls : sofa|| Recall: 0.9874476987447699 || Precison: 0.003152888366376316|| AP: 0.6819754644795543 -cls : motorbike|| Recall: 0.9907692307692307 || Precison: 0.007024432809773124|| AP: 0.7992360964706795 -cls : sheep|| Recall: 0.9834710743801653 || Precison: 0.015635264748390488|| AP: 0.7077056201470088 -cls : bird|| Recall: 0.9520697167755992 || Precison: 0.023698481561822127|| AP: 0.7697731877317816 -cls : bus|| Recall: 0.9577464788732394 || Precison: 0.005901411710252257|| AP: 0.7751698924057402 -cls : pottedplant|| Recall: 0.86875 || Precison: 0.006732756393696719|| AP: 0.4587773532876927 -cls : dog|| Recall: 0.9959100204498977 || Precison: 0.020203277328355113|| AP: 0.8272196688960622 -cls : train|| Recall: 0.9184397163120568 || Precison: 0.013024238157497738|| AP: 0.7859268074059661 -cls : horse|| Recall: 0.9798850574712644 || Precison: 0.010243624019946529|| AP: 0.8180135223615187 -cls : car|| Recall: 0.9783513738551207 || Precison: 0.01963175833723184|| AP: 0.843137188974707 -cls : chair|| Recall: 0.9484126984126984 || Precison: 0.005720120944258738|| AP: 0.5283113983671771 -cls : bicycle|| Recall: 0.9762611275964391 || Precison: 0.008112439896436938|| AP: 0.7976561577200354 -mAP is : 0.7282839054691624 -""" - -# ------------------------------------------------ -VERSION = 'RetinaNet_20190521' -NET_NAME = 'resnet_v1_50' # 'MobilenetV2' -ADD_BOX_IN_TENSORBOARD = True - -# ---------------------------------------- System_config -ROOT_PATH = os.path.abspath('../') -print(20*"++--") -print(ROOT_PATH) -GPU_GROUP = "0,1,2,3,4,5,6,7" -NUM_GPU = len(GPU_GROUP.strip().split(',')) -SHOW_TRAIN_INFO_INTE = 10 -SMRY_ITER = 100 -SAVE_WEIGHTS_INTE = 5000 * 2 - -SUMMARY_PATH = ROOT_PATH + '/output/summary' -TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' - -if NET_NAME.startswith("resnet"): - weights_name = NET_NAME -elif NET_NAME.startswith("MobilenetV2"): - weights_name = "mobilenet/mobilenet_v2_1.0_224" -else: - raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') - -PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' -TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') -EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' - -# ------------------------------------------ Train config -RESTORE_FROM_RPN = False -FIXED_BLOCKS = 1 # allow 0~3 -FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone -USE_07_METRIC = True - -MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy -GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip - -BATCH_SIZE = 1 -EPSILON = 1e-5 -MOMENTUM = 0.9 -LR = 5e-4 * NUM_GPU * BATCH_SIZE -DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] -MAX_ITERATION = SAVE_WEIGHTS_INTE*20 -WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) - -# -------------------------------------------- Data_preprocess_config -DATASET_NAME = 'pascal' # 'pascal', 'coco' -PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -PIXEL_MEAN_ = [0.485, 0.456, 0.406] -PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -IMG_SHORT_SIDE_LEN = 600 -IMG_MAX_LENGTH = 1000 -CLASS_NUM = 20 - -# --------------------------------------------- Network_config -BATCH_SIZE = 1 -SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) -SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) -PROBABILITY = 0.01 -FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) -WEIGHT_DECAY = 1e-4 - -# ---------------------------------------------Anchor config -LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] -BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] -ANCHOR_STRIDE = [8, 16, 32, 64, 128] -ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] -ANCHOR_RATIOS = [0.5, 1.0, 2.0] -ANCHOR_SCALE_FACTORS = None -USE_CENTER_OFFSET = True - -# --------------------------------------------RPN config -SHARE_NET = True -USE_P5 = False -IOU_POSITIVE_THRESHOLD = 0.5 -IOU_NEGATIVE_THRESHOLD = 0.4 - -NMS = True -NMS_IOU_THRESHOLD = 0.5 -MAXIMUM_DETECTIONS = 100 -FILTERED_SCORE = 0.01 -VIS_SCORE = 0.5 - - +# -*- coding: utf-8 -*- +from __future__ import division, print_function, absolute_import +import os +import tensorflow as tf +import math + +""" +cls : aeroplane|| Recall: 0.9263157894736842 || Precison: 0.03303303303303303|| AP: 0.784830711465123 +cls : cat|| Recall: 0.9832402234636871 || Precison: 0.024157573261958686|| AP: 0.850236615704726 +cls : person|| Recall: 0.9750441696113075 || Precison: 0.02212888383213124|| AP: 0.810026599570606 +cls : boat|| Recall: 0.9429657794676806 || Precison: 0.008830022075055188|| AP: 0.5809657874405858 +cls : tvmonitor|| Recall: 0.9383116883116883 || Precison: 0.008098187014879368|| AP: 0.7375754411267105 +cls : cow|| Recall: 0.9836065573770492 || Precison: 0.011059398184415465|| AP: 0.7811246236306398 +cls : diningtable|| Recall: 0.970873786407767 || Precison: 0.0016768957306234698|| AP: 0.6460893690943793 +cls : bottle|| Recall: 0.8763326226012793 || Precison: 0.005528577770005784|| AP: 0.5819266031025552 +cls : sofa|| Recall: 0.9874476987447699 || Precison: 0.003152888366376316|| AP: 0.6819754644795543 +cls : motorbike|| Recall: 0.9907692307692307 || Precison: 0.007024432809773124|| AP: 0.7992360964706795 +cls : sheep|| Recall: 0.9834710743801653 || Precison: 0.015635264748390488|| AP: 0.7077056201470088 +cls : bird|| Recall: 0.9520697167755992 || Precison: 0.023698481561822127|| AP: 0.7697731877317816 +cls : bus|| Recall: 0.9577464788732394 || Precison: 0.005901411710252257|| AP: 0.7751698924057402 +cls : pottedplant|| Recall: 0.86875 || Precison: 0.006732756393696719|| AP: 0.4587773532876927 +cls : dog|| Recall: 0.9959100204498977 || Precison: 0.020203277328355113|| AP: 0.8272196688960622 +cls : train|| Recall: 0.9184397163120568 || Precison: 0.013024238157497738|| AP: 0.7859268074059661 +cls : horse|| Recall: 0.9798850574712644 || Precison: 0.010243624019946529|| AP: 0.8180135223615187 +cls : car|| Recall: 0.9783513738551207 || Precison: 0.01963175833723184|| AP: 0.843137188974707 +cls : chair|| Recall: 0.9484126984126984 || Precison: 0.005720120944258738|| AP: 0.5283113983671771 +cls : bicycle|| Recall: 0.9762611275964391 || Precison: 0.008112439896436938|| AP: 0.7976561577200354 +mAP is : 0.7282839054691624 +""" + +# ------------------------------------------------ +VERSION = 'RetinaNet_20190521' +NET_NAME = 'resnet_v1_50' # 'MobilenetV2' +ADD_BOX_IN_TENSORBOARD = True + +# ---------------------------------------- System_config +ROOT_PATH = os.path.abspath('../') +print(20*"++--") +print(ROOT_PATH) +GPU_GROUP = "0,1,2,3,4,5,6,7" +NUM_GPU = len(GPU_GROUP.strip().split(',')) +SHOW_TRAIN_INFO_INTE = 10 +SMRY_ITER = 100 +SAVE_WEIGHTS_INTE = 5000 * 2 + +SUMMARY_PATH = ROOT_PATH + '/output/summary' +TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' + +if NET_NAME.startswith("resnet"): + weights_name = NET_NAME +elif NET_NAME.startswith("MobilenetV2"): + weights_name = "mobilenet/mobilenet_v2_1.0_224" +else: + raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') + +PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' +TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') +EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' + +# ------------------------------------------ Train config +RESTORE_FROM_RPN = False +FIXED_BLOCKS = 1 # allow 0~3 +FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone +USE_07_METRIC = True + +MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy +GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip + +BATCH_SIZE = 1 +EPSILON = 1e-5 +MOMENTUM = 0.9 +LR = 5e-4 * NUM_GPU * BATCH_SIZE +DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] +MAX_ITERATION = SAVE_WEIGHTS_INTE*20 +WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) + +# -------------------------------------------- Data_preprocess_config +DATASET_NAME = 'pascal' # 'pascal', 'coco' +PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +PIXEL_MEAN_ = [0.485, 0.456, 0.406] +PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +IMG_SHORT_SIDE_LEN = 600 +IMG_MAX_LENGTH = 1000 +CLASS_NUM = 20 + +# --------------------------------------------- Network_config +SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) +SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) +PROBABILITY = 0.01 +FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) +WEIGHT_DECAY = 1e-4 + +# ---------------------------------------------Anchor config +LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] +BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] +ANCHOR_STRIDE = [8, 16, 32, 64, 128] +ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] +ANCHOR_RATIOS = [0.5, 1.0, 2.0] +ANCHOR_SCALE_FACTORS = None +USE_CENTER_OFFSET = True + +# --------------------------------------------RPN config +SHARE_NET = True +USE_P5 = False +IOU_POSITIVE_THRESHOLD = 0.5 +IOU_NEGATIVE_THRESHOLD = 0.4 + +NMS = True +NMS_IOU_THRESHOLD = 0.5 +MAXIMUM_DETECTIONS = 100 +FILTERED_SCORE = 0.01 +VIS_SCORE = 0.5 + + diff --git a/libs/configs/cfgs_res50_voc07_v2.py b/libs/configs/VOC2007/cfgs_res50_voc07_v2.py similarity index 97% rename from libs/configs/cfgs_res50_voc07_v2.py rename to libs/configs/VOC2007/cfgs_res50_voc07_v2.py index f096704..0ca8484 100644 --- a/libs/configs/cfgs_res50_voc07_v2.py +++ b/libs/configs/VOC2007/cfgs_res50_voc07_v2.py @@ -1,115 +1,114 @@ -# -*- coding: utf-8 -*- -from __future__ import division, print_function, absolute_import -import os -import tensorflow as tf -import math - -""" -cls : aeroplane|| Recall: 0.9438596491228071 || Precison: 0.03152836380684482|| AP: 0.7773597235133686 -cls : person|| Recall: 0.9754858657243817 || Precison: 0.021128815456515397|| AP: 0.8110363404804426 -cls : sofa|| Recall: 0.9916317991631799 || Precison: 0.003753563509661071|| AP: 0.6926241276381035 -cls : car|| Recall: 0.980849292256453 || Precison: 0.016935994019207545|| AP: 0.8612891545940344 -cls : motorbike|| Recall: 0.9876923076923076 || Precison: 0.008316708552478172|| AP: 0.7979344387167545 -cls : sheep|| Recall: 0.9752066115702479 || Precison: 0.01267318225754484|| AP: 0.6992188982186315 -cls : horse|| Recall: 0.9798850574712644 || Precison: 0.010116893134753457|| AP: 0.8387371790700674 -cls : train|| Recall: 0.9326241134751773 || Precison: 0.01885980638221585|| AP: 0.7910990164518434 -cls : pottedplant|| Recall: 0.9020833333333333 || Precison: 0.0065086355915643275|| AP: 0.4667916780665033 -cls : bus|| Recall: 0.9906103286384976 || Precison: 0.005389252145484267|| AP: 0.8013951733234702 -cls : diningtable|| Recall: 0.9757281553398058 || Precison: 0.001669157947184853|| AP: 0.6493900213188186 -cls : tvmonitor|| Recall: 0.9642857142857143 || Precison: 0.00617386604581549|| AP: 0.7361445157222517 -cls : cat|| Recall: 0.9776536312849162 || Precison: 0.016213461805716402|| AP: 0.8708244700458685 -cls : bottle|| Recall: 0.8976545842217484 || Precison: 0.004023394942563887|| AP: 0.5570627230945586 -cls : cow|| Recall: 0.9877049180327869 || Precison: 0.009388025398309376|| AP: 0.7709867256180059 -cls : bird|| Recall: 0.954248366013072 || Precison: 0.01937024588713957|| AP: 0.7632915804610957 -cls : boat|| Recall: 0.9163498098859315 || Precison: 0.005341193679218102|| AP: 0.5818730701325913 -cls : dog|| Recall: 0.9938650306748467 || Precison: 0.020776333789329686|| AP: 0.8264152853325744 -cls : chair|| Recall: 0.9563492063492064 || Precison: 0.005576079160271786|| AP: 0.5278822308428679 -cls : bicycle|| Recall: 0.9762611275964391 || Precison: 0.007213646728644098|| AP: 0.811791972130752 -mAP is : 0.7316574162386302 -""" - -# ------------------------------------------------ -VERSION = 'RetinaNet_20190522' -NET_NAME = 'resnet_v1_50' # 'MobilenetV2' -ADD_BOX_IN_TENSORBOARD = True - -# ---------------------------------------- System_config -ROOT_PATH = os.path.abspath('../') -print(20*"++--") -print(ROOT_PATH) -GPU_GROUP = "0,1,2,3,4,5,6,7" -NUM_GPU = len(GPU_GROUP.strip().split(',')) -SHOW_TRAIN_INFO_INTE = 10 -SMRY_ITER = 100 -SAVE_WEIGHTS_INTE = 5000 * 2 - -SUMMARY_PATH = ROOT_PATH + '/output/summary' -TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' - -if NET_NAME.startswith("resnet"): - weights_name = NET_NAME -elif NET_NAME.startswith("MobilenetV2"): - weights_name = "mobilenet/mobilenet_v2_1.0_224" -else: - raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') - -PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' -TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') -EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' - -# ------------------------------------------ Train config -RESTORE_FROM_RPN = False -FIXED_BLOCKS = 1 # allow 0~3 -FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone -USE_07_METRIC = True - -MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy -GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip - -BATCH_SIZE = 1 -EPSILON = 1e-5 -MOMENTUM = 0.9 -LR = 5e-4 * NUM_GPU * BATCH_SIZE -DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] -MAX_ITERATION = SAVE_WEIGHTS_INTE*20 -WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) - -# -------------------------------------------- Data_preprocess_config -DATASET_NAME = 'pascal' # 'pascal', 'coco' -PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -PIXEL_MEAN_ = [0.485, 0.456, 0.406] -PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -IMG_SHORT_SIDE_LEN = 600 -IMG_MAX_LENGTH = 1000 -CLASS_NUM = 20 - -# --------------------------------------------- Network_config -BATCH_SIZE = 1 -SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) -SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) -PROBABILITY = 0.01 -FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) -WEIGHT_DECAY = 1e-4 - -# ---------------------------------------------Anchor config -LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] -BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] -ANCHOR_STRIDE = [8, 16, 32, 64, 128] -ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] -ANCHOR_RATIOS = [0.5, 1.0, 2.0] -ANCHOR_SCALE_FACTORS = None -USE_CENTER_OFFSET = True - -# --------------------------------------------RPN config -SHARE_NET = True -USE_P5 = True -IOU_POSITIVE_THRESHOLD = 0.5 -IOU_NEGATIVE_THRESHOLD = 0.4 - -NMS = True -NMS_IOU_THRESHOLD = 0.5 -MAXIMUM_DETECTIONS = 100 -FILTERED_SCORE = 0.01 -VIS_SCORE = 0.5 - - +# -*- coding: utf-8 -*- +from __future__ import division, print_function, absolute_import +import os +import tensorflow as tf +import math + +""" +cls : aeroplane|| Recall: 0.9438596491228071 || Precison: 0.03152836380684482|| AP: 0.7773597235133686 +cls : person|| Recall: 0.9754858657243817 || Precison: 0.021128815456515397|| AP: 0.8110363404804426 +cls : sofa|| Recall: 0.9916317991631799 || Precison: 0.003753563509661071|| AP: 0.6926241276381035 +cls : car|| Recall: 0.980849292256453 || Precison: 0.016935994019207545|| AP: 0.8612891545940344 +cls : motorbike|| Recall: 0.9876923076923076 || Precison: 0.008316708552478172|| AP: 0.7979344387167545 +cls : sheep|| Recall: 0.9752066115702479 || Precison: 0.01267318225754484|| AP: 0.6992188982186315 +cls : horse|| Recall: 0.9798850574712644 || Precison: 0.010116893134753457|| AP: 0.8387371790700674 +cls : train|| Recall: 0.9326241134751773 || Precison: 0.01885980638221585|| AP: 0.7910990164518434 +cls : pottedplant|| Recall: 0.9020833333333333 || Precison: 0.0065086355915643275|| AP: 0.4667916780665033 +cls : bus|| Recall: 0.9906103286384976 || Precison: 0.005389252145484267|| AP: 0.8013951733234702 +cls : diningtable|| Recall: 0.9757281553398058 || Precison: 0.001669157947184853|| AP: 0.6493900213188186 +cls : tvmonitor|| Recall: 0.9642857142857143 || Precison: 0.00617386604581549|| AP: 0.7361445157222517 +cls : cat|| Recall: 0.9776536312849162 || Precison: 0.016213461805716402|| AP: 0.8708244700458685 +cls : bottle|| Recall: 0.8976545842217484 || Precison: 0.004023394942563887|| AP: 0.5570627230945586 +cls : cow|| Recall: 0.9877049180327869 || Precison: 0.009388025398309376|| AP: 0.7709867256180059 +cls : bird|| Recall: 0.954248366013072 || Precison: 0.01937024588713957|| AP: 0.7632915804610957 +cls : boat|| Recall: 0.9163498098859315 || Precison: 0.005341193679218102|| AP: 0.5818730701325913 +cls : dog|| Recall: 0.9938650306748467 || Precison: 0.020776333789329686|| AP: 0.8264152853325744 +cls : chair|| Recall: 0.9563492063492064 || Precison: 0.005576079160271786|| AP: 0.5278822308428679 +cls : bicycle|| Recall: 0.9762611275964391 || Precison: 0.007213646728644098|| AP: 0.811791972130752 +mAP is : 0.7316574162386302 +""" + +# ------------------------------------------------ +VERSION = 'RetinaNet_20190522' +NET_NAME = 'resnet_v1_50' # 'MobilenetV2' +ADD_BOX_IN_TENSORBOARD = True + +# ---------------------------------------- System_config +ROOT_PATH = os.path.abspath('../') +print(20*"++--") +print(ROOT_PATH) +GPU_GROUP = "0,1,2,3,4,5,6,7" +NUM_GPU = len(GPU_GROUP.strip().split(',')) +SHOW_TRAIN_INFO_INTE = 10 +SMRY_ITER = 100 +SAVE_WEIGHTS_INTE = 5000 * 2 + +SUMMARY_PATH = ROOT_PATH + '/output/summary' +TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' + +if NET_NAME.startswith("resnet"): + weights_name = NET_NAME +elif NET_NAME.startswith("MobilenetV2"): + weights_name = "mobilenet/mobilenet_v2_1.0_224" +else: + raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') + +PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' +TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') +EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' + +# ------------------------------------------ Train config +RESTORE_FROM_RPN = False +FIXED_BLOCKS = 1 # allow 0~3 +FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone +USE_07_METRIC = True + +MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy +GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip + +BATCH_SIZE = 1 +EPSILON = 1e-5 +MOMENTUM = 0.9 +LR = 5e-4 * NUM_GPU * BATCH_SIZE +DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] +MAX_ITERATION = SAVE_WEIGHTS_INTE*20 +WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) + +# -------------------------------------------- Data_preprocess_config +DATASET_NAME = 'pascal' # 'pascal', 'coco' +PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +PIXEL_MEAN_ = [0.485, 0.456, 0.406] +PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +IMG_SHORT_SIDE_LEN = 600 +IMG_MAX_LENGTH = 1000 +CLASS_NUM = 20 + +# --------------------------------------------- Network_config +SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) +SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) +PROBABILITY = 0.01 +FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) +WEIGHT_DECAY = 1e-4 + +# ---------------------------------------------Anchor config +LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] +BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] +ANCHOR_STRIDE = [8, 16, 32, 64, 128] +ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] +ANCHOR_RATIOS = [0.5, 1.0, 2.0] +ANCHOR_SCALE_FACTORS = None +USE_CENTER_OFFSET = True + +# --------------------------------------------RPN config +SHARE_NET = True +USE_P5 = True +IOU_POSITIVE_THRESHOLD = 0.5 +IOU_NEGATIVE_THRESHOLD = 0.4 + +NMS = True +NMS_IOU_THRESHOLD = 0.5 +MAXIMUM_DETECTIONS = 100 +FILTERED_SCORE = 0.01 +VIS_SCORE = 0.5 + + diff --git a/libs/configs/cfgs_res50_voc07_v3.py b/libs/configs/VOC2007/cfgs_res50_voc07_v3.py similarity index 97% rename from libs/configs/cfgs_res50_voc07_v3.py rename to libs/configs/VOC2007/cfgs_res50_voc07_v3.py index 3c97086..057c172 100644 --- a/libs/configs/cfgs_res50_voc07_v3.py +++ b/libs/configs/VOC2007/cfgs_res50_voc07_v3.py @@ -1,115 +1,114 @@ -# -*- coding: utf-8 -*- -from __future__ import division, print_function, absolute_import -import os -import tensorflow as tf -import math - -""" -cls : cat|| Recall: 0.9860335195530726 || Precison: 0.014905206266098045|| AP: 0.8527470573973246 -cls : dog|| Recall: 0.9897750511247444 || Precison: 0.021586905133580126|| AP: 0.8274904377033 -cls : aeroplane|| Recall: 0.9403508771929825 || Precison: 0.02553110412498809|| AP: 0.7767873078576267 -cls : diningtable|| Recall: 0.9805825242718447 || Precison: 0.0015802855466458049|| AP: 0.6266256425436841 -cls : sofa|| Recall: 0.9874476987447699 || Precison: 0.0036989436067834864|| AP: 0.6896703646112717 -cls : chair|| Recall: 0.9523809523809523 || Precison: 0.00560245885694277|| AP: 0.5297096851641577 -cls : boat|| Recall: 0.9467680608365019 || Precison: 0.008553173948887056|| AP: 0.6212390013387435 -cls : horse|| Recall: 0.9683908045977011 || Precison: 0.010542781166901298|| AP: 0.8207037469800831 -cls : motorbike|| Recall: 0.9876923076923076 || Precison: 0.008685064935064934|| AP: 0.7891094623537858 -cls : bus|| Recall: 0.9812206572769953 || Precison: 0.005848280493606067|| AP: 0.7896720643953054 -cls : bottle|| Recall: 0.8848614072494669 || Precison: 0.004798076144890338|| AP: 0.5944787858656694 -cls : tvmonitor|| Recall: 0.961038961038961 || Precison: 0.007717980809345015|| AP: 0.744114515720316 -cls : bicycle|| Recall: 0.9910979228486647 || Precison: 0.006523182688176243|| AP: 0.8000690709871533 -cls : train|| Recall: 0.925531914893617 || Precison: 0.018238993710691823|| AP: 0.7820253371438621 -cls : sheep|| Recall: 0.9710743801652892 || Precison: 0.014397745374341379|| AP: 0.7390184562935749 -cls : car|| Recall: 0.9766860949208993 || Precison: 0.01623844066670358|| AP: 0.8465859465895317 -cls : cow|| Recall: 0.9877049180327869 || Precison: 0.013388145103049831|| AP: 0.7609817867782512 -cls : person|| Recall: 0.974160777385159 || Precison: 0.02294217386329356|| AP: 0.8104809262654363 -cls : bird|| Recall: 0.9477124183006536 || Precison: 0.026685479418440586|| AP: 0.7601748193240988 -cls : pottedplant|| Recall: 0.8729166666666667 || Precison: 0.006048969220996708|| AP: 0.4708156926277445 -mAP is : 0.731625005397046 -""" - -# ------------------------------------------------ -VERSION = 'RetinaNet_20190523' -NET_NAME = 'resnet_v1_50' # 'MobilenetV2' -ADD_BOX_IN_TENSORBOARD = True - -# ---------------------------------------- System_config -ROOT_PATH = os.path.abspath('../') -print(20*"++--") -print(ROOT_PATH) -GPU_GROUP = "0,1,2,3,4,5,6,7" -NUM_GPU = len(GPU_GROUP.strip().split(',')) -SHOW_TRAIN_INFO_INTE = 10 -SMRY_ITER = 100 -SAVE_WEIGHTS_INTE = 5000 * 2 - -SUMMARY_PATH = ROOT_PATH + '/output/summary' -TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' - -if NET_NAME.startswith("resnet"): - weights_name = NET_NAME -elif NET_NAME.startswith("MobilenetV2"): - weights_name = "mobilenet/mobilenet_v2_1.0_224" -else: - raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') - -PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' -TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') -EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' - -# ------------------------------------------ Train config -RESTORE_FROM_RPN = False -FIXED_BLOCKS = 1 # allow 0~3 -FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone -USE_07_METRIC = True - -MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy -GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip - -BATCH_SIZE = 1 -EPSILON = 1e-5 -MOMENTUM = 0.9 -LR = 5e-4 * NUM_GPU * BATCH_SIZE -DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] -MAX_ITERATION = SAVE_WEIGHTS_INTE*20 -WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) - -# -------------------------------------------- Data_preprocess_config -DATASET_NAME = 'pascal' # 'pascal', 'coco' -PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -PIXEL_MEAN_ = [0.485, 0.456, 0.406] -PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -IMG_SHORT_SIDE_LEN = 600 -IMG_MAX_LENGTH = 1000 -CLASS_NUM = 20 - -# --------------------------------------------- Network_config -BATCH_SIZE = 1 -SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) -SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) -PROBABILITY = 0.01 -FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) -WEIGHT_DECAY = 1e-4 - -# ---------------------------------------------Anchor config -LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] -BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] -ANCHOR_STRIDE = [8, 16, 32, 64, 128] -ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] -ANCHOR_RATIOS = [0.5, 1.0, 2.0] -ANCHOR_SCALE_FACTORS = [10.0, 10.0, 5.0, 5.0] -USE_CENTER_OFFSET = True - -# --------------------------------------------RPN config -SHARE_NET = True -USE_P5 = True -IOU_POSITIVE_THRESHOLD = 0.5 -IOU_NEGATIVE_THRESHOLD = 0.4 - -NMS = True -NMS_IOU_THRESHOLD = 0.5 -MAXIMUM_DETECTIONS = 100 -FILTERED_SCORE = 0.01 -VIS_SCORE = 0.5 - - +# -*- coding: utf-8 -*- +from __future__ import division, print_function, absolute_import +import os +import tensorflow as tf +import math + +""" +cls : cat|| Recall: 0.9860335195530726 || Precison: 0.014905206266098045|| AP: 0.8527470573973246 +cls : dog|| Recall: 0.9897750511247444 || Precison: 0.021586905133580126|| AP: 0.8274904377033 +cls : aeroplane|| Recall: 0.9403508771929825 || Precison: 0.02553110412498809|| AP: 0.7767873078576267 +cls : diningtable|| Recall: 0.9805825242718447 || Precison: 0.0015802855466458049|| AP: 0.6266256425436841 +cls : sofa|| Recall: 0.9874476987447699 || Precison: 0.0036989436067834864|| AP: 0.6896703646112717 +cls : chair|| Recall: 0.9523809523809523 || Precison: 0.00560245885694277|| AP: 0.5297096851641577 +cls : boat|| Recall: 0.9467680608365019 || Precison: 0.008553173948887056|| AP: 0.6212390013387435 +cls : horse|| Recall: 0.9683908045977011 || Precison: 0.010542781166901298|| AP: 0.8207037469800831 +cls : motorbike|| Recall: 0.9876923076923076 || Precison: 0.008685064935064934|| AP: 0.7891094623537858 +cls : bus|| Recall: 0.9812206572769953 || Precison: 0.005848280493606067|| AP: 0.7896720643953054 +cls : bottle|| Recall: 0.8848614072494669 || Precison: 0.004798076144890338|| AP: 0.5944787858656694 +cls : tvmonitor|| Recall: 0.961038961038961 || Precison: 0.007717980809345015|| AP: 0.744114515720316 +cls : bicycle|| Recall: 0.9910979228486647 || Precison: 0.006523182688176243|| AP: 0.8000690709871533 +cls : train|| Recall: 0.925531914893617 || Precison: 0.018238993710691823|| AP: 0.7820253371438621 +cls : sheep|| Recall: 0.9710743801652892 || Precison: 0.014397745374341379|| AP: 0.7390184562935749 +cls : car|| Recall: 0.9766860949208993 || Precison: 0.01623844066670358|| AP: 0.8465859465895317 +cls : cow|| Recall: 0.9877049180327869 || Precison: 0.013388145103049831|| AP: 0.7609817867782512 +cls : person|| Recall: 0.974160777385159 || Precison: 0.02294217386329356|| AP: 0.8104809262654363 +cls : bird|| Recall: 0.9477124183006536 || Precison: 0.026685479418440586|| AP: 0.7601748193240988 +cls : pottedplant|| Recall: 0.8729166666666667 || Precison: 0.006048969220996708|| AP: 0.4708156926277445 +mAP is : 0.731625005397046 +""" + +# ------------------------------------------------ +VERSION = 'RetinaNet_20190523' +NET_NAME = 'resnet_v1_50' # 'MobilenetV2' +ADD_BOX_IN_TENSORBOARD = True + +# ---------------------------------------- System_config +ROOT_PATH = os.path.abspath('../') +print(20*"++--") +print(ROOT_PATH) +GPU_GROUP = "0,1,2,3,4,5,6,7" +NUM_GPU = len(GPU_GROUP.strip().split(',')) +SHOW_TRAIN_INFO_INTE = 10 +SMRY_ITER = 100 +SAVE_WEIGHTS_INTE = 5000 * 2 + +SUMMARY_PATH = ROOT_PATH + '/output/summary' +TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' + +if NET_NAME.startswith("resnet"): + weights_name = NET_NAME +elif NET_NAME.startswith("MobilenetV2"): + weights_name = "mobilenet/mobilenet_v2_1.0_224" +else: + raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') + +PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' +TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') +EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' + +# ------------------------------------------ Train config +RESTORE_FROM_RPN = False +FIXED_BLOCKS = 1 # allow 0~3 +FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone +USE_07_METRIC = True + +MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy +GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip + +BATCH_SIZE = 1 +EPSILON = 1e-5 +MOMENTUM = 0.9 +LR = 5e-4 * NUM_GPU * BATCH_SIZE +DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] +MAX_ITERATION = SAVE_WEIGHTS_INTE*20 +WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) + +# -------------------------------------------- Data_preprocess_config +DATASET_NAME = 'pascal' # 'pascal', 'coco' +PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +PIXEL_MEAN_ = [0.485, 0.456, 0.406] +PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +IMG_SHORT_SIDE_LEN = 600 +IMG_MAX_LENGTH = 1000 +CLASS_NUM = 20 + +# --------------------------------------------- Network_config +SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) +SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) +PROBABILITY = 0.01 +FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) +WEIGHT_DECAY = 1e-4 + +# ---------------------------------------------Anchor config +LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] +BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] +ANCHOR_STRIDE = [8, 16, 32, 64, 128] +ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] +ANCHOR_RATIOS = [0.5, 1.0, 2.0] +ANCHOR_SCALE_FACTORS = [10.0, 10.0, 5.0, 5.0] +USE_CENTER_OFFSET = True + +# --------------------------------------------RPN config +SHARE_NET = True +USE_P5 = True +IOU_POSITIVE_THRESHOLD = 0.5 +IOU_NEGATIVE_THRESHOLD = 0.4 + +NMS = True +NMS_IOU_THRESHOLD = 0.5 +MAXIMUM_DETECTIONS = 100 +FILTERED_SCORE = 0.01 +VIS_SCORE = 0.5 + + diff --git a/libs/configs/cfgs_res50_voc07_v4.py b/libs/configs/VOC2007/cfgs_res50_voc07_v4.py similarity index 97% rename from libs/configs/cfgs_res50_voc07_v4.py rename to libs/configs/VOC2007/cfgs_res50_voc07_v4.py index 2269e02..626047d 100644 --- a/libs/configs/cfgs_res50_voc07_v4.py +++ b/libs/configs/VOC2007/cfgs_res50_voc07_v4.py @@ -1,115 +1,114 @@ -# -*- coding: utf-8 -*- -from __future__ import division, print_function, absolute_import -import os -import tensorflow as tf -import math - -""" -cls : sheep|| Recall: 0.9834710743801653 || Precison: 0.01343721770551039|| AP: 0.7366031642566769 -cls : chair|| Recall: 0.9470899470899471 || Precison: 0.005148153207889042|| AP: 0.5386288781309365 -cls : bus|| Recall: 0.9812206572769953 || Precison: 0.005191127890514393|| AP: 0.8133672291455416 -cls : bicycle|| Recall: 0.9910979228486647 || Precison: 0.009744427587816549|| AP: 0.800899636526437 -cls : motorbike|| Recall: 0.9938461538461538 || Precison: 0.006038963467075496|| AP: 0.7903998534004216 -cls : dog|| Recall: 0.9897750511247444 || Precison: 0.01628204265626051|| AP: 0.8218698566712613 -cls : aeroplane|| Recall: 0.9157894736842105 || Precison: 0.0318603515625|| AP: 0.763830044188939 -cls : bottle|| Recall: 0.8784648187633263 || Precison: 0.004865950159442542|| AP: 0.6133434933374315 -cls : train|| Recall: 0.925531914893617 || Precison: 0.0163125|| AP: 0.7771032196731267 -cls : pottedplant|| Recall: 0.8916666666666667 || Precison: 0.008113590263691683|| AP: 0.4671915218987904 -cls : horse|| Recall: 0.9971264367816092 || Precison: 0.010073738605353306|| AP: 0.8489630879498543 -cls : boat|| Recall: 0.9467680608365019 || Precison: 0.005862271924661566|| AP: 0.5963519636065165 -cls : person|| Recall: 0.9763692579505301 || Precison: 0.02421948186414958|| AP: 0.8160043258059765 -cls : diningtable|| Recall: 0.9757281553398058 || Precison: 0.00147582510371159|| AP: 0.6417165363609527 -cls : car|| Recall: 0.984179850124896 || Precison: 0.01689681790891157|| AP: 0.8698967787733508 -cls : tvmonitor|| Recall: 0.9415584415584416 || Precison: 0.007871878393051032|| AP: 0.7485201769391812 -cls : bird|| Recall: 0.9607843137254902 || Precison: 0.02073635209479475|| AP: 0.7663305161757082 -cls : cat|| Recall: 0.9664804469273743 || Precison: 0.018309784621897654|| AP: 0.8533618661718122 -cls : cow|| Recall: 0.9836065573770492 || Precison: 0.009199279389781134|| AP: 0.7622093974966482 -cls : sofa|| Recall: 0.9916317991631799 || Precison: 0.003074129320967637|| AP: 0.6257163312044963 -mAP is : 0.732615393885703 -""" - -# ------------------------------------------------ -VERSION = 'RetinaNet_20190524' -NET_NAME = 'resnet50_v1d' # 'MobilenetV2' -ADD_BOX_IN_TENSORBOARD = True - -# ---------------------------------------- System_config -ROOT_PATH = os.path.abspath('../') -print(20*"++--") -print(ROOT_PATH) -GPU_GROUP = "0,1,2,3,4,5,6,7" -NUM_GPU = len(GPU_GROUP.strip().split(',')) -SHOW_TRAIN_INFO_INTE = 10 -SMRY_ITER = 100 -SAVE_WEIGHTS_INTE = 5000 * 2 - -SUMMARY_PATH = ROOT_PATH + '/output/summary' -TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' - -if NET_NAME.startswith("resnet"): - weights_name = NET_NAME -elif NET_NAME.startswith("MobilenetV2"): - weights_name = "mobilenet/mobilenet_v2_1.0_224" -else: - raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') - -PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' -TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') -EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' - -# ------------------------------------------ Train config -RESTORE_FROM_RPN = False -FIXED_BLOCKS = 1 # allow 0~3 -FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone -USE_07_METRIC = True - -MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy -GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip - -BATCH_SIZE = 1 -EPSILON = 1e-5 -MOMENTUM = 0.9 -LR = 5e-4 * NUM_GPU * BATCH_SIZE -DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] -MAX_ITERATION = SAVE_WEIGHTS_INTE*20 -WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) - -# -------------------------------------------- Data_preprocess_config -DATASET_NAME = 'pascal' # 'pascal', 'coco' -PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -PIXEL_MEAN_ = [0.485, 0.456, 0.406] -PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR -IMG_SHORT_SIDE_LEN = 600 -IMG_MAX_LENGTH = 1000 -CLASS_NUM = 20 - -# --------------------------------------------- Network_config -BATCH_SIZE = 1 -SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) -SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) -PROBABILITY = 0.01 -FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) -WEIGHT_DECAY = 1e-4 - -# ---------------------------------------------Anchor config -LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] -BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] -ANCHOR_STRIDE = [8, 16, 32, 64, 128] -ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] -ANCHOR_RATIOS = [0.5, 1.0, 2.0] -ANCHOR_SCALE_FACTORS = [10.0, 10.0, 5.0, 5.0] -USE_CENTER_OFFSET = True - -# --------------------------------------------RPN config -SHARE_NET = True -USE_P5 = True -IOU_POSITIVE_THRESHOLD = 0.5 -IOU_NEGATIVE_THRESHOLD = 0.4 - -NMS = True -NMS_IOU_THRESHOLD = 0.5 -MAXIMUM_DETECTIONS = 100 -FILTERED_SCORE = 0.01 -VIS_SCORE = 0.5 - - +# -*- coding: utf-8 -*- +from __future__ import division, print_function, absolute_import +import os +import tensorflow as tf +import math + +""" +cls : sheep|| Recall: 0.9834710743801653 || Precison: 0.01343721770551039|| AP: 0.7366031642566769 +cls : chair|| Recall: 0.9470899470899471 || Precison: 0.005148153207889042|| AP: 0.5386288781309365 +cls : bus|| Recall: 0.9812206572769953 || Precison: 0.005191127890514393|| AP: 0.8133672291455416 +cls : bicycle|| Recall: 0.9910979228486647 || Precison: 0.009744427587816549|| AP: 0.800899636526437 +cls : motorbike|| Recall: 0.9938461538461538 || Precison: 0.006038963467075496|| AP: 0.7903998534004216 +cls : dog|| Recall: 0.9897750511247444 || Precison: 0.01628204265626051|| AP: 0.8218698566712613 +cls : aeroplane|| Recall: 0.9157894736842105 || Precison: 0.0318603515625|| AP: 0.763830044188939 +cls : bottle|| Recall: 0.8784648187633263 || Precison: 0.004865950159442542|| AP: 0.6133434933374315 +cls : train|| Recall: 0.925531914893617 || Precison: 0.0163125|| AP: 0.7771032196731267 +cls : pottedplant|| Recall: 0.8916666666666667 || Precison: 0.008113590263691683|| AP: 0.4671915218987904 +cls : horse|| Recall: 0.9971264367816092 || Precison: 0.010073738605353306|| AP: 0.8489630879498543 +cls : boat|| Recall: 0.9467680608365019 || Precison: 0.005862271924661566|| AP: 0.5963519636065165 +cls : person|| Recall: 0.9763692579505301 || Precison: 0.02421948186414958|| AP: 0.8160043258059765 +cls : diningtable|| Recall: 0.9757281553398058 || Precison: 0.00147582510371159|| AP: 0.6417165363609527 +cls : car|| Recall: 0.984179850124896 || Precison: 0.01689681790891157|| AP: 0.8698967787733508 +cls : tvmonitor|| Recall: 0.9415584415584416 || Precison: 0.007871878393051032|| AP: 0.7485201769391812 +cls : bird|| Recall: 0.9607843137254902 || Precison: 0.02073635209479475|| AP: 0.7663305161757082 +cls : cat|| Recall: 0.9664804469273743 || Precison: 0.018309784621897654|| AP: 0.8533618661718122 +cls : cow|| Recall: 0.9836065573770492 || Precison: 0.009199279389781134|| AP: 0.7622093974966482 +cls : sofa|| Recall: 0.9916317991631799 || Precison: 0.003074129320967637|| AP: 0.6257163312044963 +mAP is : 0.732615393885703 +""" + +# ------------------------------------------------ +VERSION = 'RetinaNet_20190524' +NET_NAME = 'resnet50_v1d' # 'MobilenetV2' +ADD_BOX_IN_TENSORBOARD = True + +# ---------------------------------------- System_config +ROOT_PATH = os.path.abspath('../') +print(20*"++--") +print(ROOT_PATH) +GPU_GROUP = "0,1,2,3,4,5,6,7" +NUM_GPU = len(GPU_GROUP.strip().split(',')) +SHOW_TRAIN_INFO_INTE = 10 +SMRY_ITER = 100 +SAVE_WEIGHTS_INTE = 5000 * 2 + +SUMMARY_PATH = ROOT_PATH + '/output/summary' +TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' + +if NET_NAME.startswith("resnet"): + weights_name = NET_NAME +elif NET_NAME.startswith("MobilenetV2"): + weights_name = "mobilenet/mobilenet_v2_1.0_224" +else: + raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') + +PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' +TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') +EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' + +# ------------------------------------------ Train config +RESTORE_FROM_RPN = False +FIXED_BLOCKS = 1 # allow 0~3 +FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone +USE_07_METRIC = True + +MUTILPY_BIAS_GRADIENT = None # 2.0 # if None, will not multipy +GRADIENT_CLIPPING_BY_NORM = None # 10.0 if None, will not clip + +BATCH_SIZE = 1 +EPSILON = 1e-5 +MOMENTUM = 0.9 +LR = 5e-4 * NUM_GPU * BATCH_SIZE +DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] +MAX_ITERATION = SAVE_WEIGHTS_INTE*20 +WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) + +# -------------------------------------------- Data_preprocess_config +DATASET_NAME = 'pascal' # 'pascal', 'coco' +PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +PIXEL_MEAN_ = [0.485, 0.456, 0.406] +PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +IMG_SHORT_SIDE_LEN = 600 +IMG_MAX_LENGTH = 1000 +CLASS_NUM = 20 + +# --------------------------------------------- Network_config +SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) +SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) +PROBABILITY = 0.01 +FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) +WEIGHT_DECAY = 1e-4 + +# ---------------------------------------------Anchor config +LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] +BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] +ANCHOR_STRIDE = [8, 16, 32, 64, 128] +ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] +ANCHOR_RATIOS = [0.5, 1.0, 2.0] +ANCHOR_SCALE_FACTORS = [10.0, 10.0, 5.0, 5.0] +USE_CENTER_OFFSET = True + +# --------------------------------------------RPN config +SHARE_NET = True +USE_P5 = True +IOU_POSITIVE_THRESHOLD = 0.5 +IOU_NEGATIVE_THRESHOLD = 0.4 + +NMS = True +NMS_IOU_THRESHOLD = 0.5 +MAXIMUM_DETECTIONS = 100 +FILTERED_SCORE = 0.01 +VIS_SCORE = 0.5 + + diff --git a/libs/configs/VOC2007/cfgs_res50_voc07_v5.py b/libs/configs/VOC2007/cfgs_res50_voc07_v5.py new file mode 100644 index 0000000..8a67230 --- /dev/null +++ b/libs/configs/VOC2007/cfgs_res50_voc07_v5.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- +from __future__ import division, print_function, absolute_import +import os +import tensorflow as tf +import math + +""" +cls : aeroplane|| Recall: 0.9298245614035088 || Precison: 0.014690393037308055|| AP: 0.7660086217842756 +cls : horse|| Recall: 0.985632183908046 || Precison: 0.007518137781382198|| AP: 0.8268787046290528 +cls : cat|| Recall: 0.9916201117318436 || Precison: 0.01215254005203341|| AP: 0.816459566212998 +cls : bottle|| Recall: 0.8869936034115139 || Precison: 0.004867888319408364|| AP: 0.6310054432564516 +cls : tvmonitor|| Recall: 0.9415584415584416 || Precison: 0.00633658174190447|| AP: 0.7378410310650945 +cls : bird|| Recall: 0.9607843137254902 || Precison: 0.014496564872949607|| AP: 0.7759534809691434 +cls : cow|| Recall: 0.9918032786885246 || Precison: 0.008217317487266554|| AP: 0.7719480426459225 +cls : chair|| Recall: 0.9523809523809523 || Precison: 0.005081515985602371|| AP: 0.5370566290465252 +cls : diningtable|| Recall: 0.9757281553398058 || Precison: 0.0017804627431527477|| AP: 0.6515301548750645 +cls : pottedplant|| Recall: 0.8729166666666667 || Precison: 0.007105669272644021|| AP: 0.45575480742084057 +cls : dog|| Recall: 0.9938650306748467 || Precison: 0.018066914498141264|| AP: 0.8087163643031827 +cls : bicycle|| Recall: 0.9940652818991098 || Precison: 0.006976986358429658|| AP: 0.8328331802386089 +cls : boat|| Recall: 0.9467680608365019 || Precison: 0.004885993485342019|| AP: 0.6198683933081276 +cls : person|| Recall: 0.9743816254416962 || Precison: 0.02173024355406703|| AP: 0.8165996449954711 +cls : train|| Recall: 0.9326241134751773 || Precison: 0.010920112938050158|| AP: 0.7916449500646859 +cls : bus|| Recall: 0.971830985915493 || Precison: 0.004070476265387187|| AP: 0.8298737220933807 +cls : motorbike|| Recall: 0.9907692307692307 || Precison: 0.00751669078855222|| AP: 0.810734399188341 +cls : sofa|| Recall: 0.9790794979079498 || Precison: 0.0039634146341463415|| AP: 0.7046285231064454 +cls : car|| Recall: 0.9833472106577852 || Precison: 0.012978735095334908|| AP: 0.8655978769543121 +cls : sheep|| Recall: 0.9834710743801653 || Precison: 0.011968218847430353|| AP: 0.7497673422604396 +mAP is : 0.7400350439209181 + +""" + +# ------------------------------------------------ +VERSION = 'RetinaNet_20191221' +NET_NAME = 'resnet50_v1d' # 'MobilenetV2' +ADD_BOX_IN_TENSORBOARD = True + +# ---------------------------------------- System_config +ROOT_PATH = os.path.abspath('../') +print(20*"++--") +print(ROOT_PATH) +GPU_GROUP = "0,1,2,3" +NUM_GPU = len(GPU_GROUP.strip().split(',')) +SHOW_TRAIN_INFO_INTE = 10 +SMRY_ITER = 100 +SAVE_WEIGHTS_INTE = 5000 * 2 + +SUMMARY_PATH = ROOT_PATH + '/output/summary' +TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result' + +if NET_NAME.startswith("resnet"): + weights_name = NET_NAME +elif NET_NAME.startswith("MobilenetV2"): + weights_name = "mobilenet/mobilenet_v2_1.0_224" +else: + raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]') + +PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt' +TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights') +EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/' + +# ------------------------------------------ Train config +RESTORE_FROM_RPN = False +FIXED_BLOCKS = 1 # allow 0~3 +FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone +USE_07_METRIC = True + +MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy +GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip + +BATCH_SIZE = 2 +EPSILON = 1e-5 +MOMENTUM = 0.9 +LR = 5e-4 * NUM_GPU * BATCH_SIZE +DECAY_STEP = [SAVE_WEIGHTS_INTE*12, SAVE_WEIGHTS_INTE*16, SAVE_WEIGHTS_INTE*20] +MAX_ITERATION = SAVE_WEIGHTS_INTE*20 +WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) + +# -------------------------------------------- Data_preprocess_config +DATASET_NAME = 'pascal' # 'pascal', 'coco' +PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +PIXEL_MEAN_ = [0.485, 0.456, 0.406] +PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR +IMG_SHORT_SIDE_LEN = 600 +IMG_MAX_LENGTH = 1000 +CLASS_NUM = 20 + +# --------------------------------------------- Network_config +SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) +SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) +PROBABILITY = 0.01 +FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) +WEIGHT_DECAY = 1e-4 + +# ---------------------------------------------Anchor config +LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] +BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] +ANCHOR_STRIDE = [8, 16, 32, 64, 128] +ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] +ANCHOR_RATIOS = [0.5, 1.0, 2.0] +ANCHOR_SCALE_FACTORS = [10.0, 10.0, 5.0, 5.0] +USE_CENTER_OFFSET = True + +# --------------------------------------------RPN config +SHARE_NET = True +USE_P5 = True +IOU_POSITIVE_THRESHOLD = 0.5 +IOU_NEGATIVE_THRESHOLD = 0.4 + +NMS = True +NMS_IOU_THRESHOLD = 0.5 +MAXIMUM_DETECTIONS = 100 +FILTERED_SCORE = 0.01 +VIS_SCORE = 0.5 + + diff --git a/libs/configs/cfgs.py b/libs/configs/cfgs.py index 120a81e..90fe1c1 100644 --- a/libs/configs/cfgs.py +++ b/libs/configs/cfgs.py @@ -5,17 +5,13 @@ import math """ -epoch-00: 3.90 epoch-01: 12.7 -epoch-02: 17.1 epoch-03: 21.6 -epoch-04: 24.1 epoch-05: 24.2 -epoch-06: 25.4 epoch-07: 26.3 -epoch-11: 27.5 epoch-12: 32.2 -epoch-17: 33.4 epoch-18: 33.4 +epoch-00: epoch-01: + """ # ------------------------------------------------ -VERSION = 'RetinaNet_COCO_1x_20190525' +VERSION = 'RetinaNet_COCO_1x_20191221' NET_NAME = 'resnet50_v1d' # 'MobilenetV2' ADD_BOX_IN_TENSORBOARD = True @@ -23,7 +19,7 @@ ROOT_PATH = os.path.abspath('../') print(20*"++--") print(ROOT_PATH) -GPU_GROUP = "0,1,2,3,4,5,6,7" +GPU_GROUP = "0,1,2,3" NUM_GPU = len(GPU_GROUP.strip().split(',')) SHOW_TRAIN_INFO_INTE = 20 SMRY_ITER = 200 @@ -52,7 +48,7 @@ MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip -BATCH_SIZE = 1 +BATCH_SIZE = 2 EPSILON = 1e-5 MOMENTUM = 0.9 LR = 5e-4 * NUM_GPU * BATCH_SIZE @@ -70,7 +66,6 @@ CLASS_NUM = 80 # --------------------------------------------- Network_config -BATCH_SIZE = 1 SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) PROBABILITY = 0.01 @@ -95,7 +90,7 @@ NMS = True NMS_IOU_THRESHOLD = 0.5 MAXIMUM_DETECTIONS = 100 -FILTERED_SCORE = 0.03 +FILTERED_SCORE = 0.05 VIS_SCORE = 0.5 diff --git a/libs/detection_oprations/anchor_target_layer_without_boxweight_batch.py b/libs/detection_oprations/anchor_target_layer_without_boxweight_batch.py new file mode 100644 index 0000000..8da7112 --- /dev/null +++ b/libs/detection_oprations/anchor_target_layer_without_boxweight_batch.py @@ -0,0 +1,69 @@ +# -------------------------------------------------------- +# Faster R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick and Xinlei Chen +# -------------------------------------------------------- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from libs.configs import cfgs +import numpy as np +from libs.box_utils.cython_utils.cython_bbox import bbox_overlaps +from libs.box_utils import bbox_transform + + +def anchor_target_layer(gt_boxes_batch, anchors): + """ + :param gt_boxes: np.array of shape (batch, M, 5) for (x1, y1, x2, y2, label). + :param img_shape: + :param anchors: np.array of annotations of shape (N, 4) for (x1, y1, x2, y2). + :return: + """ + all_labels, all_target_delta, all_anchor_states = [], [], [] + for i in range(cfgs.BATCH_SIZE): + gt_boxes = gt_boxes_batch[i, :, :] + anchor_states = np.zeros((anchors.shape[0],)) + labels = np.zeros((anchors.shape[0], cfgs.CLASS_NUM)) + if gt_boxes.shape[0]: + # [N, M] + overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), + np.ascontiguousarray(gt_boxes, dtype=np.float)) + + argmax_overlaps_inds = np.argmax(overlaps, axis=1) + max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds] + + positive_indices = max_overlaps >= cfgs.IOU_POSITIVE_THRESHOLD + ignore_indices = (max_overlaps > cfgs.IOU_NEGATIVE_THRESHOLD) & ~positive_indices + anchor_states[ignore_indices] = -1 + anchor_states[positive_indices] = 1 + + # compute box regression targets + target_boxes = gt_boxes[argmax_overlaps_inds] + + # compute target class labels + labels[positive_indices, target_boxes[positive_indices, 4].astype(int) - 1] = 1 + else: + # no annotations? then everything is background + target_boxes = np.zeros((anchors.shape[0], gt_boxes.shape[1])) + + target_delta = bbox_transform.bbox_transform(ex_rois=anchors, gt_rois=target_boxes) + all_labels.append(labels) + all_target_delta.append(target_delta) + all_anchor_states.append(anchor_states) + + return np.array(all_labels, np.float32), np.array(all_target_delta, np.float32), \ + np.array(all_anchor_states, np.float32) + + +if __name__ == '__main__': + anchors = np.array([[0, 0, 4, 4], + [1, 1, 4, 4], + [4, 4, 6, 6]]) + + gt_boxes = np.array([[0, 0, 5, 5, 1]]) + + labels, gt_boxes, anchor_states = anchor_target_layer(gt_boxes, anchors) + + diff --git a/libs/networks/build_whole_network.py b/libs/networks/build_whole_network.py index e5637ee..36c9e80 100644 --- a/libs/networks/build_whole_network.py +++ b/libs/networks/build_whole_network.py @@ -23,6 +23,7 @@ def __init__(self, base_network_name, is_training): self.base_network_name = base_network_name self.is_training = is_training self.num_anchors_per_location = len(cfgs.ANCHOR_SCALES) * len(cfgs.ANCHOR_RATIOS) + self.losses_dict = {} def build_base_network(self, input_img_batch): @@ -198,17 +199,7 @@ def build_whole_detection_network(self, input_img_batch, gtboxes_batch): anchors = self.make_anchors(feature_pyramid) # 4. postprocess rpn proposals. such as: decode, clip, filter - if not self.is_training: - with tf.variable_scope('postprocess_detctions'): - boxes, scores, category = postprocess_detctions(rpn_bbox_pred=rpn_box_pred, - rpn_cls_prob=rpn_cls_prob, - img_shape=img_shape, - anchors=anchors, - is_training=self.is_training) - return boxes, scores, category - - # 5. build loss - else: + if self.is_training: with tf.variable_scope('build_loss'): labels, target_delta, anchor_states = tf.py_func(func=anchor_target_layer, inp=[gtboxes_batch, anchors], @@ -220,19 +211,22 @@ def build_whole_detection_network(self, input_img_batch, gtboxes_batch): reg_loss = losses.smooth_l1_loss(target_delta, rpn_box_pred, anchor_states) - losses_dict = {'cls_loss': cls_loss, 'reg_loss': reg_loss * 2} + self.losses_dict = {'cls_loss': cls_loss, 'reg_loss': reg_loss * 2} - with tf.variable_scope('postprocess_detctions'): - boxes, scores, category = postprocess_detctions(rpn_bbox_pred=rpn_box_pred, - rpn_cls_prob=rpn_cls_prob, - img_shape=img_shape, - anchors=anchors, - is_training=self.is_training) - boxes = tf.stop_gradient(boxes) - scores = tf.stop_gradient(scores) - category = tf.stop_gradient(category) + with tf.variable_scope('postprocess_detctions'): + boxes, scores, category = postprocess_detctions(rpn_bbox_pred=rpn_box_pred, + rpn_cls_prob=rpn_cls_prob, + img_shape=img_shape, + anchors=anchors, + is_training=self.is_training) + boxes = tf.stop_gradient(boxes) + scores = tf.stop_gradient(scores) + category = tf.stop_gradient(category) - return boxes, scores, category, losses_dict + if self.is_training: + return boxes, scores, category, self.losses_dict + else: + return boxes, scores, category def get_restorer(self): checkpoint_path = tf.train.latest_checkpoint(os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)) diff --git a/libs/networks/build_whole_network_batch.py b/libs/networks/build_whole_network_batch.py new file mode 100644 index 0000000..9ae457b --- /dev/null +++ b/libs/networks/build_whole_network_batch.py @@ -0,0 +1,353 @@ +# -*-coding: utf-8 -*- + +from __future__ import absolute_import, division, print_function + +import os +import tensorflow as tf +import tensorflow.contrib.slim as slim +import numpy as np + +from libs.networks import resnet, resnet_gluoncv, mobilenet_v2, xception +from libs.box_utils import anchor_utils, generate_anchors +from libs.configs import cfgs +from libs.losses import losses +from libs.box_utils import show_box_in_tensor +from libs.detection_oprations.proposal_opr import postprocess_detctions +from libs.detection_oprations.anchor_target_layer_without_boxweight_batch import anchor_target_layer + + +class DetectionNetwork(object): + + def __init__(self, base_network_name, is_training): + + self.base_network_name = base_network_name + self.is_training = is_training + self.batch_size = cfgs.BATCH_SIZE if is_training else 1 + self.num_anchors_per_location = len(cfgs.ANCHOR_SCALES) * len(cfgs.ANCHOR_RATIOS) + self.losses_dict = {} + + def build_base_network(self, input_img_batch): + + if self.base_network_name.startswith('resnet_v1'): + return resnet.resnet_base(input_img_batch, scope_name=self.base_network_name, is_training=self.is_training) + + elif self.base_network_name in ['resnet101_v1d', 'resnet50_v1d']: + + return resnet_gluoncv.resnet_base(input_img_batch, scope_name=self.base_network_name, + is_training=self.is_training) + + elif self.base_network_name.startswith('MobilenetV2'): + return mobilenet_v2.mobilenetv2_base(input_img_batch, is_training=self.is_training) + + elif self.base_network_name.startswith('xception'): + return xception.xception_base(input_img_batch, is_training=self.is_training) + + else: + raise ValueError('Sry, we only support resnet, mobilenet_v2 and xception') + + def rpn_cls_net(self, inputs, scope_list, reuse_flag, level): + rpn_conv2d_3x3 = inputs + for i in range(4): + rpn_conv2d_3x3 = slim.conv2d(inputs=rpn_conv2d_3x3, + num_outputs=256, + kernel_size=[3, 3], + stride=1, + activation_fn=tf.nn.relu, + weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER, + biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER, + scope='{}_{}'.format(scope_list[0], i), + reuse=reuse_flag) + + rpn_box_scores = slim.conv2d(rpn_conv2d_3x3, + num_outputs=cfgs.CLASS_NUM * self.num_anchors_per_location, + kernel_size=[3, 3], + stride=1, + weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER, + biases_initializer=cfgs.FINAL_CONV_BIAS_INITIALIZER, + scope=scope_list[2], + activation_fn=None, + reuse=reuse_flag) + + rpn_box_scores = tf.reshape(rpn_box_scores, [self.batch_size, -1, cfgs.CLASS_NUM], + name='rpn_{}_classification_reshape'.format(level)) + rpn_box_probs = tf.sigmoid(rpn_box_scores, name='rpn_{}_classification_sigmoid'.format(level)) + + return rpn_box_scores, rpn_box_probs + + def rpn_reg_net(self, inputs, scope_list, reuse_flag, level): + rpn_delta_boxes = inputs + for i in range(4): + rpn_delta_boxes = slim.conv2d(inputs=rpn_delta_boxes, + num_outputs=256, + kernel_size=[3, 3], + weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER, + biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER, + stride=1, + activation_fn=tf.nn.relu, + scope='{}_{}'.format(scope_list[1], i), + reuse=reuse_flag) + + rpn_delta_boxes = slim.conv2d(rpn_delta_boxes, + num_outputs=4 * self.num_anchors_per_location, + kernel_size=[3, 3], + stride=1, + weights_initializer=cfgs.SUBNETS_WEIGHTS_INITIALIZER, + biases_initializer=cfgs.SUBNETS_BIAS_INITIALIZER, + scope=scope_list[3], + activation_fn=None, + reuse=reuse_flag) + + rpn_delta_boxes = tf.reshape(rpn_delta_boxes, [self.batch_size, -1, 4], + name='rpn_{}_regression_reshape'.format(level)) + return rpn_delta_boxes + + def rpn_net(self, feature_pyramid): + + rpn_delta_boxes_list = [] + rpn_scores_list = [] + rpn_probs_list = [] + with tf.variable_scope('rpn_net'): + with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(cfgs.WEIGHT_DECAY)): + for level in cfgs.LEVEL: + + if cfgs.SHARE_NET: + reuse_flag = None if level == 'P3' else True + scope_list = ['conv2d_3x3_cls', 'conv2d_3x3_reg', 'rpn_classification', 'rpn_regression'] + else: + reuse_flag = None + scope_list = ['conv2d_3x3_cls_' + level, 'conv2d_3x3_reg_' + level, + 'rpn_classification_' + level, 'rpn_regression_' + level] + + rpn_box_scores, rpn_box_probs = self.rpn_cls_net(feature_pyramid[level], scope_list, reuse_flag, level) + rpn_delta_boxes = self.rpn_reg_net(feature_pyramid[level], scope_list, reuse_flag, level) + + rpn_scores_list.append(rpn_box_scores) + rpn_probs_list.append(rpn_box_probs) + rpn_delta_boxes_list.append(rpn_delta_boxes) + + rpn_all_delta_boxes = tf.concat(rpn_delta_boxes_list, axis=1) + rpn_all_boxes_scores = tf.concat(rpn_scores_list, axis=1) + rpn_all_boxes_probs = tf.concat(rpn_probs_list, axis=1) + + return rpn_all_delta_boxes, rpn_all_boxes_scores, rpn_all_boxes_probs + + def make_anchors(self, feature_pyramid): + with tf.variable_scope('make_anchors'): + anchor_list = [] + level_list = cfgs.LEVEL + with tf.name_scope('make_anchors_all_level'): + for level, base_anchor_size, stride in zip(level_list, cfgs.BASE_ANCHOR_SIZE_LIST, cfgs.ANCHOR_STRIDE): + ''' + (level, base_anchor_size) tuple: + (P3, 32), (P4, 64), (P5, 128), (P6, 256), (P7, 512) + ''' + featuremap_height, featuremap_width = tf.shape(feature_pyramid[level])[1], \ + tf.shape(feature_pyramid[level])[2] + + featuremap_height = tf.cast(featuremap_height, tf.float32) + featuremap_width = tf.cast(featuremap_width, tf.float32) + + # tmp_anchors = anchor_utils.make_anchors(base_anchor_size=base_anchor_size, + # anchor_scales=cfgs.ANCHOR_SCALES, + # anchor_ratios=cfgs.ANCHOR_RATIOS, + # featuremap_height=featuremap_height, + # featuremap_width=featuremap_width, + # stride=stride, + # name='make_anchors_{}'.format(level)) + + tmp_anchors = tf.py_func(generate_anchors.generate_anchors_pre, + inp=[featuremap_height, featuremap_width, stride, + np.array(cfgs.ANCHOR_SCALES) * stride, cfgs.ANCHOR_RATIOS, 4.0], + Tout=[tf.float32]) + + tmp_anchors = tf.reshape(tmp_anchors, [-1, 4]) + anchor_list.append(tmp_anchors) + + all_level_anchors = tf.concat(anchor_list, axis=0) + return all_level_anchors + + def add_anchor_img_smry(self, img, anchors, labels): + + positive_anchor_indices = tf.reshape(tf.where(tf.greater_equal(labels, 1)), [-1]) + # negative_anchor_indices = tf.reshape(tf.where(tf.equal(labels, 0)), [-1]) + + positive_anchor = tf.gather(anchors, positive_anchor_indices) + # negative_anchor = tf.gather(anchors, negative_anchor_indices) + + pos_in_img = show_box_in_tensor.only_draw_boxes(img_batch=img, + boxes=positive_anchor) + # neg_in_img = show_box_in_tensor.only_draw_boxes(img_batch=img, + # boxes=negative_anchor) + + tf.summary.image('positive_anchor', pos_in_img) + # tf.summary.image('negative_anchors', neg_in_img) + + def build_whole_detection_network(self, input_img_batch, gtboxes_batch): + + if self.is_training: + # ensure shape is [M, 5] + gtboxes_batch = tf.reshape(gtboxes_batch, [self.batch_size, -1, 5]) + gtboxes_batch = tf.cast(gtboxes_batch, tf.float32) + + img_shape = tf.shape(input_img_batch) + + # 1. build base network + feature_pyramid = self.build_base_network(input_img_batch) + + # 2. build rpn + rpn_box_pred, rpn_cls_score, rpn_cls_prob = self.rpn_net(feature_pyramid) + + # 3. generate_anchors + anchors = self.make_anchors(feature_pyramid) + + # 4. postprocess rpn proposals. such as: decode, clip, filter + if self.is_training: + with tf.variable_scope('build_loss'): + labels, target_delta, anchor_states = tf.py_func(func=anchor_target_layer, + inp=[gtboxes_batch, anchors], + Tout=[tf.float32, tf.float32, tf.float32]) + + self.add_anchor_img_smry(tf.expand_dims(input_img_batch[0, :, :, :], axis=0), + anchors, anchor_states[0]) + + labels = tf.reshape(labels, [-1, cfgs.CLASS_NUM]) + target_delta = tf.reshape(target_delta, [-1, 4]) + anchor_states = tf.reshape(anchor_states, [-1, ]) + + cls_loss = losses.focal_loss(labels, tf.reshape(rpn_cls_score, [-1, cfgs.CLASS_NUM]), anchor_states) + reg_loss = losses.smooth_l1_loss(target_delta, tf.reshape(rpn_box_pred, [-1, 4]), anchor_states) + + self.losses_dict = {'cls_loss': cls_loss, 'reg_loss': reg_loss * 2} + + with tf.variable_scope('postprocess_detctions'): + boxes, scores, category = postprocess_detctions(rpn_bbox_pred=rpn_box_pred[0, :, :], + rpn_cls_prob=rpn_cls_prob[0, :, :], + img_shape=img_shape, + anchors=anchors, + is_training=self.is_training) + boxes = tf.stop_gradient(boxes) + scores = tf.stop_gradient(scores) + category = tf.stop_gradient(category) + + if self.is_training: + return boxes, scores, category, self.losses_dict + else: + return boxes, scores, category + + def get_restorer(self): + checkpoint_path = tf.train.latest_checkpoint(os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)) + + if checkpoint_path != None: + if cfgs.RESTORE_FROM_RPN: + print('___restore from rpn___') + model_variables = slim.get_model_variables() + restore_variables = [var for var in model_variables if not var.name.startswith('FastRCNN_Head')] + \ + [slim.get_or_create_global_step()] + for var in restore_variables: + print(var.name) + restorer = tf.train.Saver(restore_variables) + else: + restorer = tf.train.Saver() + print("model restore from :", checkpoint_path) + else: + checkpoint_path = cfgs.PRETRAINED_CKPT + print("model restore from pretrained mode, path is :", checkpoint_path) + + model_variables = slim.get_model_variables() + + # for var in model_variables: + # print(var.name) + # print(20*"__++__++__") + + def name_in_ckpt_rpn(var): + return var.op.name + + def name_in_ckpt_fastrcnn_head(var): + ''' + Fast-RCNN/resnet_v1_50/block4 -->resnet_v1_50/block4 + Fast-RCNN/MobilenetV2/** -- > MobilenetV2 ** + :param var: + :return: + ''' + return '/'.join(var.op.name.split('/')[1:]) + + nameInCkpt_Var_dict = {} + for var in model_variables: + if var.name.startswith('Fast-RCNN/'+self.base_network_name): # +'/block4' + var_name_in_ckpt = name_in_ckpt_fastrcnn_head(var) + nameInCkpt_Var_dict[var_name_in_ckpt] = var + else: + if var.name.startswith(self.base_network_name): + var_name_in_ckpt = name_in_ckpt_rpn(var) + nameInCkpt_Var_dict[var_name_in_ckpt] = var + else: + continue + restore_variables = nameInCkpt_Var_dict + for key, item in restore_variables.items(): + print("var_in_graph: ", item.name) + print("var_in_ckpt: ", key) + print(20*"___") + restorer = tf.train.Saver(restore_variables) + print(20 * "****") + print("restore from pretrained_weighs in IMAGE_NET") + return restorer, checkpoint_path + + def get_gradients(self, optimizer, loss): + ''' + + :param optimizer: + :param loss: + :return: + + return vars and grads that not be fixed + ''' + + # if cfgs.FIXED_BLOCKS > 0: + # trainable_vars = tf.trainable_variables() + # # trained_vars = slim.get_trainable_variables() + # start_names = [cfgs.NET_NAME + '/block%d'%i for i in range(1, cfgs.FIXED_BLOCKS+1)] + \ + # [cfgs.NET_NAME + '/conv1'] + # start_names = tuple(start_names) + # trained_var_list = [] + # for var in trainable_vars: + # if not var.name.startswith(start_names): + # trained_var_list.append(var) + # # slim.learning.train() + # grads = optimizer.compute_gradients(loss, var_list=trained_var_list) + # return grads + # else: + # return optimizer.compute_gradients(loss) + return optimizer.compute_gradients(loss) + + def enlarge_gradients_for_bias(self, gradients): + + final_gradients = [] + with tf.variable_scope("Gradient_Mult") as scope: + for grad, var in gradients: + scale = 1.0 + if cfgs.MUTILPY_BIAS_GRADIENT and './biases' in var.name: + scale = scale * cfgs.MUTILPY_BIAS_GRADIENT + if not np.allclose(scale, 1.0): + grad = tf.multiply(grad, scale) + final_gradients.append((grad, var)) + return final_gradients + + + + + + + + + + + + + + + + + + + + diff --git a/tools/eval.py b/tools/eval.py index c38a4a4..8c6ffaf 100644 --- a/tools/eval.py +++ b/tools/eval.py @@ -20,7 +20,6 @@ from libs.val_libs import voc_eval from libs.box_utils import draw_box_in_img import argparse -from help_utils import tools def eval_with_plac(det_net, real_test_imgname_list, img_root, draw_imgs=False): @@ -151,10 +150,10 @@ def parse_args(): parser.add_argument('--eval_imgs', dest='eval_imgs', help='evaluate imgs dir ', - default='/data/VOCdevkit/VOC2007/JPEGImages', type=str) + default='/data/yangxue/dataset/VOC2007/VOC2007_test/VOCdevkit/VOC2007/JPEGImages', type=str) parser.add_argument('--annotation_dir', dest='test_annotation_dir', help='the dir save annotations', - default='/data/VOCdevkit/VOC2007/Annotations', type=str) + default='/data/yangxue/dataset/VOC2007/VOC2007_test/VOCdevkit/VOC2007/Annotations', type=str) parser.add_argument('--showbox', dest='showbox', help='whether show detecion results when evaluation', default=False, type=bool) diff --git a/tools/eval_coco.py b/tools/eval_coco.py index b46385f..36c86ef 100644 --- a/tools/eval_coco.py +++ b/tools/eval_coco.py @@ -177,7 +177,7 @@ def parse_args(): parser.add_argument('--showbox', dest='showbox', help='whether show detecion results when evaluation', default=True, type=bool) - parser.add_argument('--GPU', dest='GPU', + parser.add_argument('--gpu', dest='gpu', help='gpu id', default='0', type=str) parser.add_argument('--eval_num', dest='eval_num', diff --git a/tools/eval_coco_multiprocessing.py b/tools/eval_coco_multiprocessing.py index 26b8b15..977dd4e 100644 --- a/tools/eval_coco_multiprocessing.py +++ b/tools/eval_coco_multiprocessing.py @@ -201,20 +201,20 @@ def parse_args(): if __name__ == '__main__': - # args = parse_args() - # print(20*"--") - # print(args) - # print(20*"--") - # eval(args.eval_num, # use np.inf to test all the imgs. use 10 to test 10 imgs. - # eval_data=args.eval_data, - # eval_gt=args.eval_gt, - # gpu_ids=args.gpus) + args = parse_args() + print(20*"--") + print(args) + print(20*"--") + eval(args.eval_num, # use np.inf to test all the imgs. use 10 to test 10 imgs. + eval_data=args.eval_data, + eval_gt=args.eval_gt, + gpu_ids=args.gpus) # os.environ["CUDA_VISIBLE_DEVICES"] = cfgs.GPU_GROUP - eval(np.inf, # use np.inf to test all the imgs. use 10 to test 10 imgs. - eval_data='/data/COCO/coco_minival2014.odgt', - eval_gt='/data/COCO/instances_minival2014.json', - gpu_ids='0,1,2,3,4,5,6,7') + # eval(np.inf, # use np.inf to test all the imgs. use 10 to test 10 imgs. + # eval_data='/data/COCO/coco_minival2014.odgt', + # eval_gt='/data/COCO/instances_minival2014.json', + # gpu_ids='0,1,2,3,4,5,6,7') diff --git a/tools/multi_gpu_train.py b/tools/multi_gpu_train.py index fd26346..386249e 100644 --- a/tools/multi_gpu_train.py +++ b/tools/multi_gpu_train.py @@ -126,7 +126,7 @@ def train(): optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) retinanet = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME, - is_training=True) + is_training=True) with tf.name_scope('get_batch'): img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \ @@ -241,21 +241,22 @@ def train(): else: grads = tower_grads[0] - final_gvs = [] - with tf.variable_scope('Gradient_Mult'): - for grad, var in grads: - scale = 1. - if '/biases:' in var.name: - scale *= cfgs.MUTILPY_BIAS_GRADIENT - if 'conv_new' in var.name: - scale *= 3. - if not np.allclose(scale, 1.0): - grad = tf.multiply(grad, scale) - - final_gvs.append((grad, var)) - - # apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step) - apply_gradient_op = optimizer.apply_gradients(final_gvs, global_step=global_step) + if cfgs.MUTILPY_BIAS_GRADIENT is not None: + final_gvs = [] + with tf.variable_scope('Gradient_Mult'): + for grad, var in grads: + scale = 1. + if '/biases:' in var.name: + scale *= cfgs.MUTILPY_BIAS_GRADIENT + if 'conv_new' in var.name: + scale *= 3. + if not np.allclose(scale, 1.0): + grad = tf.multiply(grad, scale) + + final_gvs.append((grad, var)) + apply_gradient_op = optimizer.apply_gradients(final_gvs, global_step=global_step) + else: + apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage(0.9999, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) @@ -275,6 +276,7 @@ def train(): tfconfig = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False) tfconfig.gpu_options.allow_growth = True + with tf.Session(config=tfconfig) as sess: sess.run(init_op) diff --git a/tools/multi_gpu_train_batch.py b/tools/multi_gpu_train_batch.py new file mode 100644 index 0000000..a796a27 --- /dev/null +++ b/tools/multi_gpu_train_batch.py @@ -0,0 +1,360 @@ +# -*- coding:utf-8 -*- + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import tensorflow as tf +import tensorflow.contrib.slim as slim +import os, sys +import numpy as np +import time +sys.path.append("../") + +from libs.configs import cfgs +from libs.networks import build_whole_network_batch +from data.io.read_tfrecord_multi_gpu import next_batch +from libs.box_utils import show_box_in_tensor +from help_utils import tools + +os.environ["CUDA_VISIBLE_DEVICES"] = cfgs.GPU_GROUP + + +def average_gradients(tower_grads): + """Calculate the average gradient for each shared variable across all towers. + Note that this function provides a synchronization point across all towers. + Args: + tower_grads: List of lists of (gradient, variable) tuples. The outer list + is over individual gradients. The inner list is over the gradient + calculation for each tower. + Returns: + List of pairs of (gradient, variable) where the gradient has been averaged + across all towers. + """ + average_grads = [] + for grad_and_vars in zip(*tower_grads): + # Note that each grad_and_vars looks like the following: + # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) + grads = [] + for g, _ in grad_and_vars: + # Add 0 dimension to the gradients to represent the tower. + expanded_g = tf.expand_dims(g, 0) + + # Append on a 'tower' dimension which we will average over below. + grads.append(expanded_g) + + # Average over the 'tower' dimension. + grad = tf.concat(axis=0, values=grads) + grad = tf.reduce_mean(grad, 0) + + # Keep in mind that the Variables are redundant because they are shared + # across towers. So .. we will just return the first tower's pointer to + # the Variable. + v = grad_and_vars[0][1] + grad_and_var = (grad, v) + average_grads.append(grad_and_var) + return average_grads + + +def sum_gradients(tower_grads): + """Calculate the average gradient for each shared variable across all towers. + Note that this function provides a synchronization point across all towers. + Args: + tower_grads: List of lists of (gradient, variable) tuples. The outer list + is over individual gradients. The inner list is over the gradient + calculation for each tower. + Returns: + List of pairs of (gradient, variable) where the gradient has been averaged + across all towers. + """ + sum_grads = [] + for grad_and_vars in zip(*tower_grads): + # Note that each grad_and_vars looks like the following: + # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) + grads = [] + for g, _ in grad_and_vars: + # Add 0 dimension to the gradients to represent the tower. + expanded_g = tf.expand_dims(g, 0) + + # Append on a 'tower' dimension which we will average over below. + grads.append(expanded_g) + + # Average over the 'tower' dimension. + grad = tf.concat(axis=0, values=grads) + grad = tf.reduce_sum(grad, 0) + + # Keep in mind that the Variables are redundant because they are shared + # across towers. So .. we will just return the first tower's pointer to + # the Variable. + v = grad_and_vars[0][1] + grad_and_var = (grad, v) + sum_grads.append(grad_and_var) + return sum_grads + + +def get_gtboxes_and_label(gtboxes_and_label, num_objects): + return gtboxes_and_label[:, :int(max(num_objects)), :] + + +def warmup_lr(init_lr, global_step, warmup_step, num_per_iter): + def warmup(end_lr, global_step, warmup_step): + start_lr = end_lr * 0.1 + global_step = tf.cast(global_step, tf.float32) + return start_lr + (end_lr - start_lr) * global_step / warmup_step + + def decay(start_lr, global_step, num_per_iter): + lr = tf.train.piecewise_constant(global_step, + boundaries=[np.int64(cfgs.DECAY_STEP[0] // num_per_iter), + np.int64(cfgs.DECAY_STEP[1] // num_per_iter), + np.int64(cfgs.DECAY_STEP[2] // num_per_iter)], + values=[start_lr, start_lr / 10., start_lr / 100., start_lr / 1000.]) + return lr + + return tf.cond(tf.less_equal(global_step, warmup_step), + true_fn=lambda: warmup(init_lr, global_step, warmup_step), + false_fn=lambda: decay(init_lr, global_step, num_per_iter)) + + +def train(): + + with tf.Graph().as_default(), tf.device('/cpu:0'): + + num_gpu = len(cfgs.GPU_GROUP.strip().split(',')) + global_step = slim.get_or_create_global_step() + lr = warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, num_gpu*cfgs.BATCH_SIZE) + tf.summary.scalar('lr', lr) + + optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) + retinanet = build_whole_network_batch.DetectionNetwork(base_network_name=cfgs.NET_NAME, + is_training=True) + + with tf.name_scope('get_batch'): + img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \ + next_batch(dataset_name=cfgs.DATASET_NAME, # 'pascal', 'coco' + batch_size=cfgs.BATCH_SIZE * num_gpu, + shortside_len=cfgs.IMG_SHORT_SIDE_LEN, + is_training=True) + + # data processing + inputs_list = [] + for i in range(num_gpu): + start = i*cfgs.BATCH_SIZE + end = (i+1)*cfgs.BATCH_SIZE + img = img_batch[start:end, :, :, :] + if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: + img = img / tf.constant([cfgs.PIXEL_STD]) + + gtboxes_and_label = tf.cast(tf.reshape(gtboxes_and_label_batch[start:end, :, :], + [cfgs.BATCH_SIZE, -1, 5]), tf.float32) + num_objects = num_objects_batch[start:end] + num_objects = tf.cast(tf.reshape(num_objects, [cfgs.BATCH_SIZE, -1, ]), tf.float32) + + img_h = img_h_batch[start:end] + img_w = img_w_batch[start:end] + # img_h = tf.cast(tf.reshape(img_h, [-1, ]), tf.float32) + # img_w = tf.cast(tf.reshape(img_w, [-1, ]), tf.float32) + + inputs_list.append([img, gtboxes_and_label, num_objects, img_h, img_w]) + + # put_op_list = [] + # get_op_list = [] + # for i in range(cfgs.NUM_GPU): + # with tf.device("/GPU:%s" % i): + # area = tf.contrib.staging.StagingArea( + # dtypes=[tf.float32, tf.float32, tf.float32]) + # put_op_list.append(area.put(inputs_list[i])) + # get_op_list.append(area.get()) + + tower_grads = [] + biases_regularizer = tf.no_regularizer + weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY) + + total_loss_dict = { + 'cls_loss': tf.constant(0., tf.float32), + 'reg_loss': tf.constant(0., tf.float32), + 'total_losses': tf.constant(0., tf.float32), + } + + with tf.variable_scope(tf.get_variable_scope()): + for i in range(cfgs.NUM_GPU): + with tf.device('/gpu:%d' % i): + with tf.name_scope('tower_%d' % i): + with slim.arg_scope( + [slim.model_variable, slim.variable], + device='/device:CPU:0'): + with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, + slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], + weights_regularizer=weights_regularizer, + biases_regularizer=biases_regularizer, + biases_initializer=tf.constant_initializer(0.0)): + + gtboxes_and_label = tf.py_func(get_gtboxes_and_label, + inp=[inputs_list[i][1], inputs_list[i][2]], + Tout=tf.float32) + gtboxes_and_label = tf.reshape(gtboxes_and_label, [cfgs.BATCH_SIZE, -1, 5]) + + img = inputs_list[i][0] + img_shape = inputs_list[i][-2:] + h_crop = tf.reduce_max(img_shape[0]) + w_crop = tf.reduce_max(img_shape[1]) + img = tf.image.crop_to_bounding_box(image=img, + offset_height=0, + offset_width=0, + target_height=tf.cast(h_crop, tf.int32), + target_width=tf.cast(w_crop, tf.int32)) + + outputs = retinanet.build_whole_detection_network(input_img_batch=img, + gtboxes_batch=gtboxes_and_label) + gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(img_batch=tf.expand_dims(img[0, :, :, :], axis=0), + boxes=gtboxes_and_label[0, :, :-1], + labels=gtboxes_and_label[0, :, -1]) + tf.summary.image('Compare/gtboxes_gpu:%d' % i, gtboxes_in_img) + + if cfgs.ADD_BOX_IN_TENSORBOARD: + detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores( + img_batch=tf.expand_dims(img[0, :, :, :], axis=0), + boxes=outputs[0], + scores=outputs[1], + labels=outputs[2]) + tf.summary.image('Compare/final_detection_gpu:%d' % i, detections_in_img) + + loss_dict = outputs[-1] + + total_losses = 0.0 + for k in loss_dict.keys(): + total_losses += loss_dict[k] + total_loss_dict[k] += loss_dict[k] / cfgs.NUM_GPU + + total_losses = total_losses / cfgs.NUM_GPU + total_loss_dict['total_losses'] += total_losses + + if i == cfgs.NUM_GPU - 1: + regularization_losses = tf.get_collection( + tf.GraphKeys.REGULARIZATION_LOSSES) + # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) + total_losses = total_losses + tf.add_n(regularization_losses) + + tf.get_variable_scope().reuse_variables() + grads = optimizer.compute_gradients(total_losses) + if cfgs.GRADIENT_CLIPPING_BY_NORM is not None: + grads = slim.learning.clip_gradient_norms(grads, cfgs.GRADIENT_CLIPPING_BY_NORM) + tower_grads.append(grads) + + for k in total_loss_dict.keys(): + tf.summary.scalar('{}/{}'.format(k.split('_')[0], k), total_loss_dict[k]) + + if len(tower_grads) > 1: + grads = sum_gradients(tower_grads) + else: + grads = tower_grads[0] + + if cfgs.MUTILPY_BIAS_GRADIENT is not None: + final_gvs = [] + with tf.variable_scope('Gradient_Mult'): + for grad, var in grads: + scale = 1. + if '/biases:' in var.name: + scale *= cfgs.MUTILPY_BIAS_GRADIENT + if 'conv_new' in var.name: + scale *= 3. + if not np.allclose(scale, 1.0): + grad = tf.multiply(grad, scale) + + final_gvs.append((grad, var)) + apply_gradient_op = optimizer.apply_gradients(final_gvs, global_step=global_step) + else: + apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step) + + variable_averages = tf.train.ExponentialMovingAverage(0.9999, global_step) + variables_averages_op = variable_averages.apply(tf.trainable_variables()) + + train_op = tf.group(apply_gradient_op, variables_averages_op) + # train_op = optimizer.apply_gradients(final_gvs, global_step=global_step) + summary_op = tf.summary.merge_all() + + restorer, restore_ckpt = retinanet.get_restorer() + saver = tf.train.Saver(max_to_keep=5) + + init_op = tf.group( + tf.global_variables_initializer(), + tf.local_variables_initializer() + ) + + tfconfig = tf.ConfigProto( + allow_soft_placement=True, log_device_placement=False) + tfconfig.gpu_options.allow_growth = True + + num_per_iter = cfgs.NUM_GPU * cfgs.BATCH_SIZE + with tf.Session(config=tfconfig) as sess: + sess.run(init_op) + + # sess.run(tf.initialize_all_variables()) + coord = tf.train.Coordinator() + threads = tf.train.start_queue_runners(coord=coord, sess=sess) + + summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) + tools.mkdir(summary_path) + summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) + + if not restorer is None: + restorer.restore(sess, restore_ckpt) + print('restore model') + + for step in range(cfgs.MAX_ITERATION // num_per_iter): + training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) + + if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: + _, global_stepnp = sess.run([train_op, global_step]) + + else: + if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: + start = time.time() + + _, global_stepnp, total_loss_dict_ = \ + sess.run([train_op, global_step, total_loss_dict]) + + end = time.time() + + print('***'*20) + print("""%s: global_step:%d current_step:%d""" + % (training_time, (global_stepnp-1)*num_per_iter, step*num_per_iter)) + print("""per_cost_time:%.3fs""" + % ((end - start) / num_per_iter)) + loss_str = '' + for k in total_loss_dict_.keys(): + loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k]) + print(loss_str) + + else: + if step % cfgs.SMRY_ITER == 0: + _, global_stepnp, summary_str = sess.run([train_op, global_step, summary_op]) + summary_writer.add_summary(summary_str, (global_stepnp-1)*num_per_iter) + summary_writer.flush() + + if (step > 0 and step % (cfgs.SAVE_WEIGHTS_INTE // num_per_iter) == 0) or (step >= cfgs.MAX_ITERATION // num_per_iter - 1): + + save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) + if not os.path.exists(save_dir): + os.mkdir(save_dir) + + save_ckpt = os.path.join(save_dir, '{}_'.format(cfgs.DATASET_NAME) + str((global_stepnp-1)*num_per_iter) + 'model.ckpt') + saver.save(sess, save_ckpt) + print(' weights had been saved') + + coord.request_stop() + coord.join(threads) + + +if __name__ == '__main__': + + train() + + + + + + + + + +