Refactoring.

rigolepe · rigolepe · commit d3d3c1ac3516 · 2020-01-13T21:57:06.000+01:00
diff --git a/posenet/base_model.py b/posenet/base_model.py
@@ -1,18 +1,34 @@
 from abc import ABC, abstractmethod
+import tensorflow as tf
 
 
 class BaseModel(ABC):
 
-    def __init__(self, output_stride):
+    def __init__(self, sess, input_tensor_name, output_tensor_names, output_stride):
         self.output_stride = output_stride
+        self.sess = sess
+        self.input_tensor_name = input_tensor_name
+        self.output_tensors = [
+            tf.sigmoid(sess.graph.get_tensor_by_name(output_tensor_names['heatmap']), 'heatmap'),  # sigmoid!!!
+            sess.graph.get_tensor_by_name(output_tensor_names['offsets']),
+            sess.graph.get_tensor_by_name(output_tensor_names['displacement_fwd']),
+            sess.graph.get_tensor_by_name(output_tensor_names['displacement_bwd'])
+        ]
 
-    @abstractmethod
-    def preprocess_input(self):
-        pass
+    def valid_resolution(self, width, height):
+        # calculate closest smaller width and height that is divisible by the stride after subtracting 1 (for the bias?)
+        target_width = (int(width) // self.output_stride) * self.output_stride + 1
+        target_height = (int(height) // self.output_stride) * self.output_stride + 1
+        return target_width, target_height
 
     @abstractmethod
-    def name_output_results(self, graph):
-        return graph
+    def preprocess_input(self, image):
+        pass
 
-    def predict(self, nhwc_images):
-        return nhwc_images
+    def predict(self, image):
+        input_image, image_scale = self.preprocess_input(image)
+        heatmap_result, offsets_result, displacement_fwd_result, displacement_bwd_result = self.sess.run(
+            self.output_tensors,
+            feed_dict={self.input_tensor_name: input_image}
+        )
+        return heatmap_result, offsets_result, displacement_fwd_result, displacement_bwd_result, image_scale
diff --git a/posenet/mobilenet.py b/posenet/mobilenet.py
@@ -1,13 +1,20 @@
 from posenet.base_model import BaseModel
+import numpy as np
+import cv2
 
 
 class MobileNet(BaseModel):
 
-    def __init__(self, output_stride):
-        super().__init__(output_stride)
+    def __init__(self, sess, input_tensor_name, output_tensor_names, output_stride):
+        super().__init__(sess, input_tensor_name, output_tensor_names, output_stride)
 
-    def preprocess_input(self):
-        return self
+    def preprocess_input(self, image):
+        target_width, target_height = self.valid_resolution(image.shape[1], image.shape[0])
+        # the scale that can get us back to the original width and height:
+        scale = np.array([image.shape[0] / target_height, image.shape[1] / target_width])
+        input_img = cv2.resize(image, (target_width, target_height), interpolation=cv2.INTER_LINEAR)
+        input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB).astype(np.float32)  # to RGB colors
 
-    def name_output_results(self, graph):
-        return graph
+        input_img = input_img * (2.0 / 255.0) - 1.0  # normalize to [-1,1]
+        input_img = input_img.reshape(1, target_height, target_width, 3)  # NHWC
+        return input_img, scale
diff --git a/posenet/posenet.py b/posenet/posenet.py
@@ -0,0 +1,21 @@
+from posenet.base_model import BaseModel
+
+
+class PoseNet:
+
+    def __init__(self, model: BaseModel):
+        self.model = model
+
+    def estimate_multiple_poses(self, image):
+        heatmap_result, offsets_result, displacement_fwd_result, displacement_bwd_result, image_scale = \
+            self.model.predict(image)
+
+        return self
+
+    def estimate_single_pose(self, image):
+        heatmap_result, offsets_result, displacement_fwd_result, displacement_bwd_result, image_scale = \
+            self.model.predict(image)
+
+        # poses = [{'nose': {'x': 0.0, 'y': 0.0, 'score': 0}}]
+
+        return self
diff --git a/posenet/posenet_factory.py b/posenet/posenet_factory.py
@@ -0,0 +1,32 @@
+import tensorflow as tf
+import os
+import posenet.converter.tfjsdownload as tfjsdownload
+import posenet.converter.tfjs2tf as tfjs2tf
+from posenet.resnet import ResNet
+from posenet.mobilenet import MobileNet
+from posenet.posenet import PoseNet
+
+
+def load_model(model, neuralnet, model_variant):
+
+    model_cfg = tfjsdownload.model_config(model, neuralnet, model_variant)
+    model_path = model_cfg['tf_dir']
+    if not os.path.exists(model_path):
+        print('Cannot find tf model path %s, converting from tfjs...' % model_path)
+        tfjs2tf.convert(model, neuralnet, model_variant)
+        assert os.path.exists(model_path)
+
+    with tf.compat.v1.Session() as sess:
+
+        sess.graph.as_default()
+        tf.compat.v1.saved_model.loader.load(sess, ["serve"], model_path)
+
+        output_tensor_names = model_cfg['output_tensors']
+        input_tensor_name = model_cfg['input_tensors']['image']
+
+        if neuralnet == 'resnet50_v1':
+            net = ResNet(sess, input_tensor_name, output_tensor_names, model_cfg['output_stride'])
+        else:
+            net = MobileNet(sess, input_tensor_name, output_tensor_names, model_cfg['output_stride'])
+
+        return PoseNet(net)
diff --git a/posenet/resnet.py b/posenet/resnet.py
@@ -1,13 +1,21 @@
 from posenet.base_model import BaseModel
+import numpy as np
+import cv2
 
 
 class ResNet(BaseModel):
 
-    def __init__(self, output_stride):
-        super().__init__(output_stride)
+    def __init__(self, sess, input_tensor_name, output_tensor_names, output_stride):
+        super().__init__(sess, input_tensor_name, output_tensor_names, output_stride)
+        self.image_net_mean = [-123.15, -115.90, -103.06]
 
-    def preprocess_input(self):
-        return self
+    def preprocess_input(self, image):
+        target_width, target_height = self.valid_resolution(image.shape[1], image.shape[0])
+        # the scale that can get us back to the original width and height:
+        scale = np.array([image.shape[0] / target_height, image.shape[1] / target_width])
+        input_img = cv2.resize(image, (target_width, target_height), interpolation=cv2.INTER_LINEAR)
+        input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB).astype(np.float32)  # to RGB colors
 
-    def name_output_results(self, graph):
-        return graph
+        input_img = input_img + self.image_net_mean
+        input_img = input_img.reshape(1, target_height, target_width, 3)  # NHWC
+        return input_img, scale