Skip to content

Commit

Permalink
add core code
Browse files Browse the repository at this point in the history
  • Loading branch information
yangxue committed May 24, 2019
1 parent 2eaaf4c commit 485ece2
Show file tree
Hide file tree
Showing 148 changed files with 56,328 additions and 1 deletion.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ This is a tensorflow re-implementation of [Focal Loss for Dense Object Detection
| RetinaNet | ResNet50_v1 600 | VOC07 trainval | VOC07 test | 73.16 | - | 8X GeForce RTX 2080 Ti | 1 |
| RetinaNet | ResNet50_v1d 600 | VOC07 trainval | VOC07 test | 73.26 | - | 8X GeForce RTX 2080 Ti | 1 |
| RetinaNet | ResNet50_v1d 600 | VOC07+12 trainval | VOC07 test | 79.66 | - | 8X GeForce RTX 2080 Ti | 1 |
| RetinaNet | ResNet101_v1d 600 | VOC07+12 trainval | VOC07 test | 81.05 | - | 8X GeForce RTX 2080 Ti | 1 |
| RetinaNet | ResNet101_v1d 800 | VOC07+12 trainval | VOC07 test | 80.69 | - | 8X GeForce RTX 2080 Ti | 1 |
| RetinaNet | ResNet50_v1 600 | COCO train2017 | COCO val2017 (coco minival) | | 1x | 8X GeForce RTX 2080 Ti | 1 |

## My Development Environment
Expand Down Expand Up @@ -88,5 +90,5 @@ tensorboard --logdir=.
## Reference
1、https://github.com/endernewton/tf-faster-rcnn
2、https://github.com/zengarden/light_head_rcnn
3、https://github.com/tensorflow/models/tree/master/research/object_detection
3、https://github.com/tensorflow/models/tree/master/research/object_detection
4、https://github.com/fizyr/keras-retinanet
Empty file added data/__init__.py
Empty file.
Empty file added data/io/__init__.py
Empty file.
122 changes: 122 additions & 0 deletions data/io/convert_data_to_tfrecord.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# -*- coding: utf-8 -*-
from __future__ import division, print_function, absolute_import
import sys
sys.path.append('../../')
import xml.etree.cElementTree as ET
import numpy as np
import tensorflow as tf
import glob
import cv2
from libs.label_name_dict.label_dict import *
from help_utils.tools import *

tf.app.flags.DEFINE_string('VOC_dir', '/data/code/VOC2007/VOCdevkit/VOC2007/', 'Voc dir')
tf.app.flags.DEFINE_string('xml_dir', 'Annotations', 'xml dir')
tf.app.flags.DEFINE_string('image_dir', 'JPEGImages', 'image dir')
tf.app.flags.DEFINE_string('save_name', 'train', 'save name')
tf.app.flags.DEFINE_string('save_dir', '../tfrecord/', 'save name')
tf.app.flags.DEFINE_string('img_format', '.jpg', 'format of image')
tf.app.flags.DEFINE_string('dataset', 'pascal', 'dataset')
FLAGS = tf.app.flags.FLAGS


def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def read_xml_gtbox_and_label(xml_path):
"""
:param xml_path: the path of voc xml
:return: a list contains gtboxes and labels, shape is [num_of_gtboxes, 9],
and has [x1, y1, x2, y2, x3, y3, x4, y4, label] in a per row
"""

tree = ET.parse(xml_path)
root = tree.getroot()
img_width = None
img_height = None
box_list = []
for child_of_root in root:
# if child_of_root.tag == 'filename':
# assert child_of_root.text == xml_path.split('/')[-1].split('.')[0] \
# + FLAGS.img_format, 'xml_name and img_name cannot match'

if child_of_root.tag == 'size':
for child_item in child_of_root:
if child_item.tag == 'width':
img_width = int(child_item.text)
if child_item.tag == 'height':
img_height = int(child_item.text)

if child_of_root.tag == 'object':
label = None
for child_item in child_of_root:
if child_item.tag == 'name':
label = NAME_LABEL_MAP[child_item.text]
if child_item.tag == 'bndbox':
tmp_box = []
for node in child_item:
tmp_box.append(int(node.text))
assert label is not None, 'label is none, error'
tmp_box.append(label)
box_list.append(tmp_box)

gtbox_label = np.array(box_list, dtype=np.int32)

return img_height, img_width, gtbox_label


def convert_pascal_to_tfrecord():
xml_path = FLAGS.VOC_dir + FLAGS.xml_dir
image_path = FLAGS.VOC_dir + FLAGS.image_dir
save_path = FLAGS.save_dir + FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord'
mkdir(FLAGS.save_dir)

# writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)
# writer = tf.python_io.TFRecordWriter(path=save_path, options=writer_options)
writer = tf.python_io.TFRecordWriter(path=save_path)
for count, xml in enumerate(glob.glob(xml_path + '/*.xml')):
# to avoid path error in different development platform
xml = xml.replace('\\', '/')

img_name = xml.split('/')[-1].split('.')[0] + FLAGS.img_format
img_path = image_path + '/' + img_name

if not os.path.exists(img_path):
print('{} is not exist!'.format(img_path))
continue

img_height, img_width, gtbox_label = read_xml_gtbox_and_label(xml)

# img = np.array(Image.open(img_path))
img = cv2.imread(img_path)[:, :, ::-1]

feature = tf.train.Features(feature={
# do not need encode() in linux
'img_name': _bytes_feature(img_name.encode()),
# 'img_name': _bytes_feature(img_name),
'img_height': _int64_feature(img_height),
'img_width': _int64_feature(img_width),
'img': _bytes_feature(img.tostring()),
'gtboxes_and_label': _bytes_feature(gtbox_label.tostring()),
'num_objects': _int64_feature(gtbox_label.shape[0])
})

example = tf.train.Example(features=feature)

writer.write(example.SerializeToString())

view_bar('Conversion progress', count + 1, len(glob.glob(xml_path + '/*.xml')))

print('\nConversion is complete!')


if __name__ == '__main__':
# xml_path = '../data/dataset/VOCdevkit/VOC2007/Annotations/000005.xml'
# read_xml_gtbox_and_label(xml_path)

convert_pascal_to_tfrecord()
103 changes: 103 additions & 0 deletions data/io/convert_data_to_tfrecord_coco.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# -*- coding: utf-8 -*-
from __future__ import division, print_function, absolute_import
import sys
sys.path.append('../../')
import xml.etree.cElementTree as ET
import numpy as np
import tensorflow as tf
import glob
import cv2
import json
from libs.label_name_dict.label_dict import *
from help_utils.tools import *

tf.app.flags.DEFINE_string('coco_dir', '/data/COCO/coco_trainvalmini.odgt', 'coco dir')
tf.app.flags.DEFINE_string('save_name', 'train', 'save name')
tf.app.flags.DEFINE_string('save_dir', '../tfrecord/', 'save name')
tf.app.flags.DEFINE_string('dataset', 'coco', 'dataset')
FLAGS = tf.app.flags.FLAGS


def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def convert_pascal_to_tfrecord(coco_trainvalmini):
save_path = FLAGS.save_dir + FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord'
mkdir(FLAGS.save_dir)

# writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)
# writer = tf.python_io.TFRecordWriter(path=save_path, options=writer_options)
writer = tf.python_io.TFRecordWriter(path=save_path)

with open(coco_trainvalmini) as f:
files = f.readlines()

img_count = 0
gt_count = 0

for count, raw_line in enumerate(files):
file = json.loads(raw_line)
img_path = os.path.join('/data/COCO/train2017', file['fpath'].split('_')[-1])
img_name = file['ID']

if not os.path.exists(img_path):
# print('{} is not exist!'.format(img_path))
img_count += 1
continue
# img = np.array(Image.open(img_path))
img = cv2.imread(img_path)[:, :, ::-1]

if img is None:
continue

gtboxes = file['gtboxes']
img_height = file['height']
img_width = file['width']

if len(gtboxes) == 0:
# print('{}: gt is not exist!'.format(img_path))
gt_count += 1
continue

gtbox_label = []
for gt in gtboxes:
box = gt['box']
label = gt['tag']
gtbox_label.append([box[0], box[1], box[0]+box[2], box[1]+box[3], NAME_LABEL_MAP[label]])

gtbox_label = np.array(gtbox_label, np.int32)

feature = tf.train.Features(feature={
# do not need encode() in linux
'img_name': _bytes_feature(img_name.encode()),
# 'img_name': _bytes_feature(img_name),
'img_height': _int64_feature(img_height),
'img_width': _int64_feature(img_width),
'img': _bytes_feature(img.tostring()),
'gtboxes_and_label': _bytes_feature(gtbox_label.tostring()),
'num_objects': _int64_feature(gtbox_label.shape[0])
})

example = tf.train.Example(features=feature)

writer.write(example.SerializeToString())

view_bar('Conversion progress', count + 1, len(files))

print('{} images not exist!'.format(img_count))
print('{} gts not exist!'.format(gt_count))
print('\nConversion is complete!')


if __name__ == '__main__':
# xml_path = '../data/dataset/VOCdevkit/VOC2007/Annotations/000005.xml'
# read_xml_gtbox_and_label(xml_path)

# coco_path = '/unsullied/sharefs/_research_detection/GeneralDetection/COCO/data/MSCOCO/odformat/coco_trainvalmini.odgt'
# convert_pascal_to_tfrecord(coco_path)
convert_pascal_to_tfrecord(FLAGS.coco_dir)
141 changes: 141 additions & 0 deletions data/io/convert_data_to_tfrecord_voc2012.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# -*- coding: utf-8 -*-
from __future__ import division, print_function, absolute_import
import sys
sys.path.append('../../')
import xml.etree.cElementTree as ET
import numpy as np
import tensorflow as tf
import glob
import cv2
from tqdm import tqdm

from libs.label_name_dict.label_dict import *
from help_utils.tools import *

tf.app.flags.DEFINE_string('VOC_dir', '/data/VOC2012/VOCdevkit/VOC2012/', 'Voc dir')
tf.app.flags.DEFINE_string('xml_dir', 'Annotations', 'xml dir')
tf.app.flags.DEFINE_string('image_dir', 'JPEGImages', 'image dir')
tf.app.flags.DEFINE_string('save_name', 'train2012', 'save name')
tf.app.flags.DEFINE_string('save_dir', '../tfrecord/', 'save name')
tf.app.flags.DEFINE_string('img_format', '.jpg', 'format of image')
tf.app.flags.DEFINE_string('dataset', 'pascal', 'dataset')
FLAGS = tf.app.flags.FLAGS


def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def read_xml_gtbox_and_label(xml_path):
"""
:param xml_path: the path of voc xml
:return: a list contains gtboxes and labels, shape is [num_of_gtboxes, 5],
and has [xmin, ymin, xmax, ymax, label] in a per row
"""

tree = ET.parse(xml_path)
root = tree.getroot()
img_width = None
img_height = None
box_list = []
for child_of_root in root:
# if child_of_root.tag == 'filename':
# assert child_of_root.text == xml_path.split('/')[-1].split('.')[0] \
# + FLAGS.img_format, 'xml_name and img_name cannot match'

if child_of_root.tag == 'size':
for child_item in child_of_root:
if child_item.tag == 'width':
img_width = int(child_item.text)
if child_item.tag == 'height':
img_height = int(child_item.text)

if child_of_root.tag == 'object':
label = None
for child_item in child_of_root:
if child_item.tag == 'name':
label = NAME_LABEL_MAP[child_item.text]
if child_item.tag == 'bndbox':
tmp_box = [0, 0, 0, 0]
for node in child_item:
if node.tag == 'xmin':
tmp_box[0] = int(node.text)
if node.tag == 'ymin':
tmp_box[1] = int(node.text)
if node.tag == 'xmax':
tmp_box[2] = int(node.text)
if node.tag == 'ymax':
tmp_box[3] = int(node.text)
assert label is not None, 'label is none, error'
tmp_box.append(label)
box_list.append(tmp_box)

gtbox_label = np.array(box_list, dtype=np.int32)

return img_height, img_width, gtbox_label


def convert_pascal_to_tfrecord():
xml_path = FLAGS.VOC_dir + FLAGS.xml_dir
image_path = FLAGS.VOC_dir + FLAGS.image_dir
save_path = FLAGS.save_dir + FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord'
mkdir(FLAGS.save_dir)

# writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)
# writer = tf.python_io.TFRecordWriter(path=save_path, options=writer_options)
writer = tf.python_io.TFRecordWriter(path=save_path)

fr = open('/data/VOC2012/VOCdevkit/VOC2012/ImageSets/Main/trainval.txt', 'r')
lines = fr.readlines()

real_cnt = 0

pbar = tqdm(glob.glob(xml_path + '/*.xml'))
for xml in pbar:
xml = xml.replace('\\', '/')
tmp = xml.split('/')[-1].split('.')[0] + "\n"
if tmp not in lines:
continue

img_name = xml.split('/')[-1].split('.')[0] + FLAGS.img_format
img_path = image_path + '/' + img_name

if not os.path.exists(img_path):
print('{} is not exist!'.format(img_path))
continue

img_height, img_width, gtbox_label = read_xml_gtbox_and_label(xml)

# img = np.array(Image.open(img_path))
img = cv2.imread(img_path)[:, :, ::-1]

feature = tf.train.Features(feature={
# do not need encode() in linux
'img_name': _bytes_feature(img_name.encode()),
# 'img_name': _bytes_feature(img_name),
'img_height': _int64_feature(img_height),
'img_width': _int64_feature(img_width),
'img': _bytes_feature(img.tostring()),
'gtboxes_and_label': _bytes_feature(gtbox_label.tostring()),
'num_objects': _int64_feature(gtbox_label.shape[0])
})

example = tf.train.Example(features=feature)

writer.write(example.SerializeToString())
real_cnt += 1

pbar.set_description("Conversion progress")

print('\nConversion is complete! {} images.'.format(real_cnt))


if __name__ == '__main__':
# xml_path = '../data/dataset/VOCdevkit/VOC2007/Annotations/000005.xml'
# read_xml_gtbox_and_label(xml_path)

convert_pascal_to_tfrecord()
Loading

0 comments on commit 485ece2

Please sign in to comment.