airzeus
diff --git a/‎README.md
Lines changed: 19 additions & 3 deletions b/‎README.md
Lines changed: 19 additions & 3 deletions
diff --git a/‎convert.py
Lines changed: 12 additions & 2 deletions b/‎convert.py
Lines changed: 12 additions & 2 deletions
diff --git a/‎model_data/tiny_yolo_anchors.txt
Lines changed: 1 addition & 0 deletions b/‎model_data/tiny_yolo_anchors.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎train.py
Lines changed: 96 additions & 84 deletions b/‎train.py
Lines changed: 96 additions & 84 deletions
diff --git a/‎yolo.py
Lines changed: 26 additions & 14 deletions b/‎yolo.py
Lines changed: 26 additions & 14 deletions
@@ -20,14 +20,16 @@ python convert.py yolov3.cfg yolov3.weights model_data/yolo.h5
 python yolo.py   OR   python yolo_video.py
 ```
 
+For Tiny YOLOv3, just do in the similar way. And modify model path and anchor path in `yolo.py`.
+
 ---
 
 ## Training
 
 1. Generate your own annotation file and class names file.  
     One row for one image;  
-    Row format: image_file_path box1 box2 ... boxN;  
-    Box format: x_min,y_min,x_max,y_max,class_id (no space).  
+    Row format: `image_file_path box1 box2 ... boxN`;  
+    Box format: `x_min,y_min,x_max,y_max,class_id` (no space).  
     For VOC dataset, try `python voc_annotation.py`
 
 2. Make sure you have run `python convert.py yolov3.cfg yolov3.weights model_data/yolo.h5`  
@@ -36,4 +38,18 @@ python yolo.py   OR   python yolo_video.py
 
 3. Modify train.py and start training.  
     `python train.py`  
-    You will get the trained model model_data/my_yolo.h5.
+    Use your trained weights or checkpoint weights in `yolo.py`.  
+    Remember to modify class path or anchor path.
+
+---
+
+## Some issues to know
+
+1. The test environment is
+    - Python 3.5.2
+    - Keras 2.1.5
+    - tensorflow 1.6.0
+
+2. Default anchors are used. If you use your own anchors, probably some changes are needed.
+
+3. The training strategy is for reference only. Adjust it according to your dataset and your goal. And add further strategy if needed.
@@ -13,7 +13,7 @@
 import numpy as np
 from keras import backend as K
 from keras.layers import (Conv2D, Input, ZeroPadding2D, Add,
-                          UpSampling2D, Concatenate)
+                          UpSampling2D, MaxPooling2D, Concatenate)
 from keras.layers.advanced_activations import LeakyReLU
 from keras.layers.normalization import BatchNormalization
 from keras.models import Model
@@ -194,13 +194,23 @@ def _main(args):
                 all_layers.append(skip_layer)
                 prev_layer = skip_layer
 
+        elif section.startswith('maxpool'):
+            size = int(cfg_parser[section]['size'])
+            stride = int(cfg_parser[section]['stride'])
+            all_layers.append(
+                MaxPooling2D(
+                    pool_size=(size, size),
+                    strides=(stride, stride),
+                    padding='same')(prev_layer))
+            prev_layer = all_layers[-1]
+
         elif section.startswith('shortcut'):
             index = int(cfg_parser[section]['from'])
             activation = cfg_parser[section]['activation']
             assert activation == 'linear', 'Only linear activation supported.'
             all_layers.append(Add()([all_layers[index], prev_layer]))
             prev_layer = all_layers[-1]
-        
+
         elif section.startswith('upsample'):
             stride = int(cfg_parser[section]['stride'])
             assert stride == 2, 'Only stride=2 supported.'
 
@@ -0,0 +1 @@
+10,14,  23,27,  37,58,  81,82,  135,169,  344,319
@@ -4,40 +4,60 @@
 import os
 
 import numpy as np
-from PIL import Image
+import keras.backend as K
 from keras.layers import Input, Lambda
 from keras.models import load_model, Model
 from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping
 
-from yolo3.model import preprocess_true_boxes, yolo_body, yolo_loss
-from yolo3.utils import letterbox_image
+from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss
+from yolo3.utils import get_random_data
 
-# Default anchor boxes
-YOLO_ANCHORS = np.array(((10,13), (16,30), (33,23), (30,61),
-    (62,45), (59,119), (116,90), (156,198), (373,326)))
 
 def _main():
     annotation_path = 'train.txt'
-    data_path = 'train.npz'
-    output_path = 'model_data/my_yolo.h5'
     log_dir = 'logs/000/'
     classes_path = 'model_data/voc_classes.txt'
     anchors_path = 'model_data/yolo_anchors.txt'
     class_names = get_classes(classes_path)
     anchors = get_anchors(anchors_path)
 
-    input_shape = (416,416) # multiple of 32
-    image_data, box_data = get_training_data(annotation_path, data_path,
-        input_shape, max_boxes=100, load_previous=True)
-    y_true = preprocess_true_boxes(box_data, input_shape, anchors, len(class_names))
+    input_shape = (416,416) # multiple of 32, hw
 
-    infer_model, model = create_model(input_shape, anchors, len(class_names),
+    is_tiny_version = len(anchors)==6 # default setting
+    create_func = create_tiny_model if is_tiny_version else create_model
+    model = create_func(input_shape, anchors, len(class_names),
         load_pretrained=True, freeze_body=True)
 
-    train(model, image_data/255., y_true, log_dir=log_dir)
+    train(model, annotation_path, input_shape, anchors, len(class_names), log_dir=log_dir)
 
-    infer_model.save(output_path)
+def train(model, annotation_path, input_shape, anchors, num_classes, log_dir='logs/'):
+    '''retrain/fine-tune the model'''
+    model.compile(optimizer='adam', loss={
+        # use custom yolo_loss Lambda layer.
+        'yolo_loss': lambda y_true, y_pred: y_pred})
 
+    logging = TensorBoard(log_dir=log_dir)
+    checkpoint = ModelCheckpoint(log_dir + "ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5",
+        monitor='val_loss', save_weights_only=True, save_best_only=True)
+    early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=1, mode='auto')
+
+    batch_size = 32
+    val_split = 0.1
+    with open(annotation_path) as f:
+        lines = f.readlines()
+    np.random.shuffle(lines)
+    num_val = int(len(lines)*val_split)
+    num_train = len(lines) - num_val
+
+    model.fit_generator(data_generator_wrap(lines[:num_train], batch_size, input_shape, anchors, num_classes),
+            steps_per_epoch=max(1, num_train//batch_size),
+            validation_data=data_generator_wrap(lines[num_train:], batch_size, input_shape, anchors, num_classes),
+            validation_steps=max(1, num_val//batch_size),
+            epochs=30,
+            initial_epoch=0,
+            callbacks=[logging, checkpoint, early_stopping])
+    model.save_weights(log_dir + 'trained_weights.h5')
+    # Further training.
 
 
 def get_classes(classes_path):
@@ -49,63 +69,22 @@ def get_classes(classes_path):
 
 def get_anchors(anchors_path):
     '''loads the anchors from a file'''
-    if os.path.isfile(anchors_path):
-        with open(anchors_path) as f:
-            anchors = f.readline()
-            anchors = [float(x) for x in anchors.split(',')]
-            return np.array(anchors).reshape(-1, 2)
-    else:
-        Warning("Could not open anchors file, using default.")
-        return YOLO_ANCHORS
-
-def get_training_data(annotation_path, data_path, input_shape, max_boxes=100, load_previous=True):
-    '''processes the data into standard shape
-    annotation row format: image_file_path box1 box2 ... boxN
-    box format: x_min,y_min,x_max,y_max,class_index (no space)
-    '''
-    if load_previous==True and os.path.isfile(data_path):
-        data = np.load(data_path)
-        print('Loading training data from ' + data_path)
-        return data['image_data'], data['box_data']
-    image_data = []
-    box_data = []
-    with open(annotation_path) as f:
-        for line in f.readlines():
-            line = line.split(' ')
-            filename = line[0]
-            image = Image.open(filename)
-            boxed_image = letterbox_image(image, tuple(reversed(input_shape)))
-            image_data.append(np.array(boxed_image,dtype='uint8'))
-
-            boxes = np.zeros((max_boxes,5), dtype='int32')
-            for i, box in enumerate(line[1:]):
-                if i < max_boxes:
-                    boxes[i] = np.array(list(map(int,box.split(','))))
-                else:
-                    break
-            image_size = np.array(image.size)
-            input_size = np.array(input_shape[::-1])
-            new_size = (image_size * np.min(input_size/image_size)).astype('int32')
-            boxes[:i+1, 0:2] = (boxes[:i+1, 0:2]*new_size/image_size + (input_size-new_size)/2).astype('int32')
-            boxes[:i+1, 2:4] = (boxes[:i+1, 2:4]*new_size/image_size + (input_size-new_size)/2).astype('int32')
-            box_data.append(boxes)
-    image_data = np.array(image_data)
-    box_data = np.array(box_data)
-    np.savez(data_path, image_data=image_data, box_data=box_data)
-    print('Saving training data into ' + data_path)
-    return image_data, box_data
+    with open(anchors_path) as f:
+        anchors = f.readline()
+    anchors = [float(x) for x in anchors.split(',')]
+    return np.array(anchors).reshape(-1, 2)
 
 
 def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=True):
     '''create the training model'''
     image_input = Input(shape=(None, None, 3))
     h, w = input_shape
-    num_anchors = len(anchors)//3
-    y_true = [Input(shape=(h//32, w//32, num_anchors, num_classes+5)),
-              Input(shape=(h//16, w//16, num_anchors, num_classes+5)),
-              Input(shape=(h//8, w//8, num_anchors, num_classes+5))]
+    num_anchors = len(anchors)
 
-    model_body = yolo_body(image_input, num_anchors, num_classes)
+    y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \
+        num_anchors//3, num_classes+5)) for l in range(3)]
+
+    model_body = yolo_body(image_input, num_anchors//3, num_classes)
 
     if load_pretrained:
         weights_path = os.path.join('model_data', 'yolo_weights.h5')
@@ -121,33 +100,66 @@ def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze
                 model_body.layers[i].trainable = False
 
     model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
-        arguments={'anchors': anchors, 'num_classes': num_classes})(
+        arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})(
         [*model_body.output, *y_true])
     model = Model([model_body.input, *y_true], model_loss)
 
-    return model_body, model
+    return model
 
-def train(model, image_data, y_true, log_dir='logs/'):
-    '''retrain/fine-tune the model'''
-    model.compile(optimizer='adam', loss={
-        # use custom yolo_loss Lambda layer.
-        'yolo_loss': lambda y_true, y_pred: y_pred})
+def create_tiny_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=True):
+    '''create the training model, for Tiny YOLOv3'''
+    image_input = Input(shape=(None, None, 3))
+    h, w = input_shape
+    num_anchors = len(anchors)
 
-    logging = TensorBoard(log_dir=log_dir)
-    checkpoint = ModelCheckpoint(log_dir + "ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5",
-        monitor='val_loss', save_weights_only=True, save_best_only=True)
-    early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=1, mode='auto')
+    y_true = [Input(shape=(h//{0:32, 1:16}[l], w//{0:32, 1:16}[l], \
+        num_anchors//2, num_classes+5)) for l in range(2)]
 
-    model.fit([image_data, *y_true],
-              np.zeros(len(image_data)),
-              validation_split=.1,
-              batch_size=32,
-              epochs=30,
-              callbacks=[logging, checkpoint, early_stopping])
-    model.save_weights(log_dir + 'trained_weights.h5')
-    # Further training.
+    model_body = tiny_yolo_body(image_input, num_anchors//2, num_classes)
 
+    if load_pretrained:
+        weights_path = os.path.join('model_data/', 'tiny_yolo_weights.h5')
+        if not os.path.exists(weights_path):
+            print("CREATING WEIGHTS FILE" + weights_path)
+            yolo_path = os.path.join('model_data', 'tiny_yolo.h5')
+            orig_model = load_model(yolo_path, compile=False)
+            orig_model.save_weights(weights_path)
+        model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
+        if freeze_body:
+            # Do not freeze 2 output layers.
+            for i in range(len(model_body.layers)-2):
+                model_body.layers[i].trainable = False
+
+    model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
+        arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.7})(
+        [*model_body.output, *y_true])
+    model = Model([model_body.input, *y_true], model_loss)
 
+    return model
+
+def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes):
+    '''data generator for fit_generator'''
+    n = len(annotation_lines)
+    np.random.shuffle(annotation_lines)
+    i = 0
+    while True:
+        image_data = []
+        box_data = []
+        for b in range(batch_size):
+            i %= n
+            image, box = get_random_data(annotation_lines[i], input_shape)
+            image_data.append(image)
+            box_data.append(box)
+            i += 1
+        image_data = np.array(image_data)
+        box_data = np.array(box_data)
+        y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
+        yield [image_data, *y_true], np.zeros(batch_size)
+
+def data_generator_wrap(annotation_lines, batch_size, input_shape, anchors, num_classes):
+    n = len(annotation_lines)
+    if n==0 or batch_size<=0: return None
+    return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes)
 
 if __name__ == '__main__':
     _main()
@@ -7,29 +7,28 @@
 import colorsys
 import os
 import random
-from timeit import time
-from timeit import default_timer as timer  ### to calculate FPS
+from timeit import default_timer as timer
 
 import numpy as np
 from keras import backend as K
 from keras.models import load_model
+from keras.layers import Input
 from PIL import Image, ImageFont, ImageDraw
 
-from yolo3.model import yolo_eval
+from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body
 from yolo3.utils import letterbox_image
 
 class YOLO(object):
     def __init__(self):
-        self.model_path = 'model_data/yolo.h5'
+        self.model_path = 'model_data/yolo.h5' # model path or trained weights path
         self.anchors_path = 'model_data/yolo_anchors.txt'
         self.classes_path = 'model_data/coco_classes.txt'
         self.score = 0.3
         self.iou = 0.5
         self.class_names = self._get_class()
         self.anchors = self._get_anchors()
         self.sess = K.get_session()
-        self.model_image_size = (416, 416) # fixed size or (None, None)
-        self.is_fixed_size = self.model_image_size != (None, None)
+        self.model_image_size = (416, 416) # fixed size or (None, None), hw
         self.boxes, self.scores, self.classes = self.generate()
 
     def _get_class(self):
@@ -43,15 +42,28 @@ def _get_anchors(self):
         anchors_path = os.path.expanduser(self.anchors_path)
         with open(anchors_path) as f:
             anchors = f.readline()
-            anchors = [float(x) for x in anchors.split(',')]
-            anchors = np.array(anchors).reshape(-1, 2)
-        return anchors
+        anchors = [float(x) for x in anchors.split(',')]
+        return np.array(anchors).reshape(-1, 2)
 
     def generate(self):
         model_path = os.path.expanduser(self.model_path)
-        assert model_path.endswith('.h5'), 'Keras model must be a .h5 file.'
+        assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
+
+        # Load model, or construct model and load weights.
+        num_anchors = len(self.anchors)
+        num_classes = len(self.class_names)
+        is_tiny_version = num_anchors==6 # default setting
+        try:
+            self.yolo_model = load_model(model_path, compile=False)
+        except:
+            self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \
+                if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)
+            self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match
+        else:
+            assert self.yolo_model.layers[-1].output_shape[-1] == \
+                num_anchors/len(self.yolo_model.output) * (num_classes + 5), \
+                'Mismatch between model and given anchor and class sizes'
 
-        self.yolo_model = load_model(model_path, compile=False)
         print('{} model, anchors, and classes loaded.'.format(model_path))
 
         # Generate colors for drawing bounding boxes.
@@ -73,9 +85,9 @@ def generate(self):
         return boxes, scores, classes
 
     def detect_image(self, image):
-        start = time.time()
+        start = timer()
 
-        if self.is_fixed_size:
+        if self.model_image_size != (None, None):
             assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
             assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
             boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
@@ -135,7 +147,7 @@ def detect_image(self, image):
             draw.text(text_origin, label, fill=(0, 0, 0), font=font)
             del draw
 
-        end = time.time()
+        end = timer()
         print(end - start)
         return image
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+10,14, 23,27, 37,58, 81,82, 135,169, 344,319`