Fixed the compare module and made it use MTCNN for alignment

joltup · Nov 9, 2016 · 1fa3729 · 1fa3729
1 parent aea233a
commit 1fa3729
Show file tree

Hide file tree

Showing 7 changed files with 60 additions and 31 deletions.
diff --git a/data/images/Anthony_Hopkins_0001.jpg b/data/images/Anthony_Hopkins_0001.jpg
diff --git a/data/images/Anthony_Hopkins_0002.jpg b/data/images/Anthony_Hopkins_0002.jpg
diff --git a/src/align/align_dataset_mtcnn.py b/src/align/align_dataset_mtcnn.py
@@ -45,27 +45,13 @@ def main(args):
     facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
     dataset = facenet.get_dataset(args.input_dir)
 
-    print('Creating networks and loading parameters')    
+    print('Creating networks and loading parameters')
+
     with tf.Graph().as_default():
         gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
         sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
         with sess.as_default():
-            with tf.variable_scope('pnet'):
-                data = tf.placeholder(tf.float32, (None,None,None,3), 'input')
-                pnet = align.detect_face.PNet({'data':data})
-                pnet.load('../../data/det1.npy', sess)
-            with tf.variable_scope('rnet'):
-                data = tf.placeholder(tf.float32, (None,24,24,3), 'input')
-                rnet = align.detect_face.RNet({'data':data})
-                rnet.load('../../data/det2.npy', sess)
-            with tf.variable_scope('onet'):
-                data = tf.placeholder(tf.float32, (None,48,48,3), 'input')
-                onet = align.detect_face.ONet({'data':data})
-                onet.load('../../data/det3.npy', sess)
-
-            pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img})
-            rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img})
-            onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img})
+            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, '../../data/')
 
     minsize = 20 # minimum size of face
     threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold
@@ -106,7 +92,7 @@ def main(args):
                             img = facenet.to_rgb(img)
                         img = img[:,:,0:3]
 
-                        bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet_fun, rnet_fun, onet_fun, threshold, factor)
+                        bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
                         nrof_faces = bounding_boxes.shape[0]
                         if nrof_faces>0:
                             det = bounding_boxes[:,0:4]

diff --git a/src/align/detect_face.py b/src/align/detect_face.py
@@ -31,6 +31,7 @@
 import tensorflow as tf
 #from math import floor
 import cv2
+import os
 
 def layer(op):
     '''Decorator for composable network layers.'''
@@ -270,6 +271,25 @@ def setup(self):
         (self.feed('prelu5') #pylint: disable=no-value-for-parameter
              .fc(10, relu=False, name='conv6-3'))
 
+def create_mtcnn(sess, model_path):
+    with tf.variable_scope('pnet'):
+        data = tf.placeholder(tf.float32, (None,None,None,3), 'input')
+        pnet = PNet({'data':data})
+        pnet.load(os.path.join(model_path, 'det1.npy'), sess)
+    with tf.variable_scope('rnet'):
+        data = tf.placeholder(tf.float32, (None,24,24,3), 'input')
+        rnet = RNet({'data':data})
+        rnet.load(os.path.join(model_path, 'det2.npy'), sess)
+    with tf.variable_scope('onet'):
+        data = tf.placeholder(tf.float32, (None,48,48,3), 'input')
+        onet = ONet({'data':data})
+        onet.load(os.path.join(model_path, 'det3.npy'), sess)
+
+    pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img})
+    rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img})
+    onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img})
+    return pnet_fun, rnet_fun, onet_fun
+
 def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
     # im: input image
     # minsize: minimum of faces' size

diff --git a/src/compare.py b/src/compare.py
@@ -29,17 +29,15 @@
 from scipy import misc
 import tensorflow as tf
 import numpy as np
-import os
 import sys
 import argparse
 import facenet
-import align_dlib
+import align.detect_face
 
 def main(args):
-    align = align_dlib.AlignDlib(os.path.expanduser(args.dlib_face_predictor))
     image_paths = [args.image1, args.image2]
-    landmarkIndices = align_dlib.AlignDlib.OUTER_EYES_AND_NOSE
 
+    images = load_and_align_data(image_paths, args.image_size, args.margin, args.gpu_memory_fraction)
     with tf.Graph().as_default():
 
         with tf.Session() as sess:
@@ -52,21 +50,40 @@ def main(args):
             images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
             phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
             embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
-            image_size = int(images_placeholder.get_shape()[1])
 
             # Run forward pass to calculate embeddings
-            images = load_and_align_data(image_paths, image_size, align, landmarkIndices)
             feed_dict = { images_placeholder: images, phase_train_placeholder: False }
             emb = sess.run(embeddings, feed_dict=feed_dict)
             dist = np.sqrt(np.mean(np.square(np.subtract(emb[0,:], emb[1,:]))))
             print('Distance between the embeddings: %3.6f' % dist)
 
-def load_and_align_data(image_paths, image_size, align, landmarkIndices):
+def load_and_align_data(image_paths, image_size, margin, gpu_memory_fraction):
+
+    minsize = 20 # minimum size of face
+    threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold
+    factor = 0.709 # scale factor
+
+    print('Creating networks and loading parameters')
+    with tf.Graph().as_default():
+        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction)
+        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
+        with sess.as_default():
+            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, '../data/')
+
     nrof_samples = len(image_paths)
     img_list = [None] * nrof_samples
     for i in xrange(nrof_samples):
         img = misc.imread(image_paths[i])
-        aligned = align.align(image_size, img, landmarkIndices=landmarkIndices, skipMulti=True)
+        img_size = np.asarray(img.shape)[0:2]
+        bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
+        det = np.squeeze(bounding_boxes[0,0:4])
+        bb = np.zeros(4, dtype=np.int32)
+        bb[0] = np.maximum(det[0]-margin/2, 0)
+        bb[1] = np.maximum(det[1]-margin/2, 0)
+        bb[2] = np.minimum(det[2]+margin/2, img_size[1])
+        bb[3] = np.minimum(det[3]+margin/2, img_size[0])
+        cropped = img[bb[1]:bb[3],bb[0]:bb[2],:]
+        aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear')
         prewhitened = facenet.prewhiten(aligned)
         img_list[i] = prewhitened
     images = np.stack(img_list)
@@ -83,8 +100,12 @@ def parse_arguments(argv):
         help='Checkpoint file (with extention ".ckpt-XXXXX"')
     parser.add_argument('image1', type=str, help='First image to compare.')
     parser.add_argument('image2', type=str, help='Second image to compare.')
-    parser.add_argument('--dlib_face_predictor', type=str,
-        help='File containing the dlib face predictor.', default='../data/shape_predictor_68_face_landmarks.dat')
+    parser.add_argument('--image_size', type=int,
+        help='Image size (height, width) in pixels.', default=160)
+    parser.add_argument('--margin', type=int,
+        help='Margin for the crop around the bounding box (height, width) in pixels.', default=44)
+    parser.add_argument('--gpu_memory_fraction', type=float,
+        help='Upper bound on the amount of GPU memory that will be used by the process.', default=1.0)
     return parser.parse_args(argv)
 
 if __name__ == '__main__':

diff --git a/src/models/__init__.pyc b/src/models/__init__.pyc
diff --git a/test/train_test.py b/test/train_test.py
@@ -137,9 +137,11 @@ def test_training_classifier_inception_resnet_v2(self):
         facenet_train_classifier.main(args)
 
     def test_compare(self):
-        argv = ['../data/model/20160620-173927/model.ckpt-500000', 
-                '../data/images/Anthony_Hopkins_0001.png',
-                '../data/images/Anthony_Hopkins_0002.png' ]
+        argv = ['../data/model/20161030-023650/',
+                'model-20161030-023650.meta',
+                'model-20161030-023650.ckpt-80000',
+                '../data/images/Anthony_Hopkins_0001.jpg',
+                '../data/images/Anthony_Hopkins_0002.jpg' ]
         args = compare.parse_arguments(argv)
         compare.main(args)