support Parallel

GlassyWing · GlassyWing · commit c49e2f06056e · 2018-11-20T20:51:30.000+08:00
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@ data
 model
 logs
 .idea/
-tests/
+tests/*
+asset/*
 *.egg-info/
 dist/
diff --git a/dlocr/__main__.py b/dlocr/__main__.py
@@ -1,9 +1,10 @@
 import argparse
-from datetime import datetime
+import time
 
 import keras.backend as K
 
-from dlocr import default_dict_path, default_densenet_config_path, default_densenet_weight_path, default_ctpn_config_path, \
+from dlocr import default_dict_path, default_densenet_config_path, default_densenet_weight_path, \
+    default_ctpn_config_path, \
     default_ctpn_weight_path, get_session, TextDetectionApp
 
 if __name__ == '__main__':
@@ -30,7 +31,7 @@
                            dict_path=args.dict_file_path,
                            ctpn_config_path=args.ctpn_config_path,
                            densenet_config_path=args.densenet_config_path)
-    start_time = datetime.now()
-    for rect, line in app.detect(args.image_path, args.adjust):
-        print(line)
-    print(f"cost {(datetime.now() - start_time).microseconds / 1000}ms")
+    start_time = time.time()
+    _, texts = app.detect(args.image_path, args.adjust)
+    print('\n'.join(texts))
+    print(f"cost {(time.time() - start_time) * 1000}ms")
diff --git a/dlocr/densenet/core.py b/dlocr/densenet/core.py
@@ -1,5 +1,5 @@
 import json
-import os
+from concurrent.futures import ThreadPoolExecutor
 
 import keras.backend as K
 import numpy as np
@@ -57,6 +57,40 @@ def _ctc_loss(args):
     return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
 
 
+def single_img_process(img):
+    im = img.convert('L')
+    scale = im.size[1] * 1.0 / 32
+    w = im.size[0] / scale
+    w = int(w)
+
+    im = im.resize((w, 32), Image.ANTIALIAS)
+    img = np.array(im).astype(np.float32) / 255.0 - 0.5
+    img = img.reshape((32, w, 1))
+    return img
+
+
+def pad_img(img, len, value):
+    out = np.ones(shape=(32, len, 1)) * value
+    out[:, :img.shape[1], :] = img
+    return out
+
+
+def process_imgs(imgs):
+    tmp = []
+    with ThreadPoolExecutor() as executor:
+        for img in executor.map(single_img_process, imgs):
+            tmp.append(img)
+
+    max_len = max([img.shape[1] for img in tmp])
+
+    output = []
+    with ThreadPoolExecutor() as executor:
+        for img in executor.map(lambda img: pad_img(img, max_len, 0.5), tmp):
+            output.append(img)
+
+    return np.array(output)
+
+
 class DenseNetOCR:
 
     def __init__(self,
@@ -164,14 +198,28 @@ def predict(self, image, id_to_char):
         X = np.array([X])
 
         y_pred = self.base_model.predict(X)
-        argmax = np.argmax(y_pred, axis=2)[0]
 
         y_pred = y_pred[:, :, :]
         out = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0]) * y_pred.shape[1], )[0][0])[:, :]
         out = u''.join([id_to_char[x] for x in out[0]])
 
         return out, im
 
+    def predict_multi(self, images, id_to_char):
+
+        def single_text(out):
+            return u''.join(['' if x == -1 else id_to_char[x] for x in out])
+
+        X = process_imgs(images)
+        y_pred = self.base_model.predict_on_batch(X)
+        outs = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0]) * y_pred.shape[1], )[0][0])[:, :]
+        texts = []
+        with ThreadPoolExecutor() as executor:
+            for text in executor.map(single_text, outs):
+                texts.append(text)
+
+        return texts
+
     @staticmethod
     def save_config(obj, config_path: str):
         with open(config_path, 'w+') as outfile:
diff --git a/dlocr/text_detection_app.py b/dlocr/text_detection_app.py
@@ -1,3 +1,5 @@
+import os
+from concurrent.futures import ThreadPoolExecutor
 from math import *
 
 import cv2
@@ -7,7 +9,6 @@
 from dlocr.ctpn import CTPN
 from dlocr.densenet import DenseNetOCR
 from dlocr.densenet import load_dict
-import os
 
 
 def dumpRotateImage(img, degree, pt1, pt2, pt3, pt4):
@@ -65,21 +66,36 @@ def single_text_detect(rec, ocr, id_to_char, img, adjust):
     return image, text
 
 
-def model(ctpn, ocr, id_to_char, image_path, adjust):
-    text_recs, img = ctpn.predict(image_path, mode=2)  # 得到所有的检测框
-    text_recs = sort_box(text_recs)
-    results = []
+def clip_single_img(bbox, img, xDim, yDim, adjust):
+    xlength = int((bbox[2] - bbox[0]) * 0.1)
+    ylength = int((bbox[3] - bbox[1]) * 0.2)
+    if adjust:
+        pt1 = (max(1, bbox[0] - xlength), max(1, bbox[1] - ylength))
+        pt2 = (bbox[2], bbox[3])
+        pt3 = (min(bbox[6] + xlength, xDim - 2), min(yDim - 2, bbox[7] + ylength))
+        pt4 = (bbox[4], bbox[5])
+    else:
+        pt1 = (max(1, bbox[0]), max(1, bbox[1]))
+        pt2 = (bbox[2], bbox[3])
+        pt3 = (min(bbox[6], xDim - 2), min(yDim - 2, bbox[7]))
+        pt4 = (bbox[4], bbox[5])
 
-    for index, rec in enumerate(text_recs):
-        image, text = single_text_detect(rec, ocr, id_to_char, img, adjust)  # 识别文字
-        # plt.subplot(len(text_recs), 1, index + 1)
-        # plt.imshow(image)
-        if text is not None and len(text) > 0:
-            results.append((rec, text))
+    degree = degrees(atan2(pt2[1] - pt1[1], pt2[0] - pt1[0]))  # 图像倾斜角度
 
-    # plt.show()
+    partImg = dumpRotateImage(img, degree, pt1, pt2, pt3, pt4)
+    image = Image.fromarray(partImg)
+    return image
 
-    return results
+
+def clip_imgs_with_bboxes(bboxes, img, adjust):
+    xDim, yDim = img.shape[1], img.shape[0]
+
+    imgs = []
+    with ThreadPoolExecutor() as executor:
+        for img in executor.map(lambda t: clip_single_img(t[0], t[1], xDim, yDim, adjust),
+                                map(lambda bbox: (bbox, img), bboxes)):
+            imgs.append(img)
+    return imgs
 
 
 class TextDetectionApp:
@@ -117,16 +133,30 @@ def __init__(self,
         else:
             self.ocr = DenseNetOCR(num_classes=len(self.id_to_char))
 
-    def detect(self, image_path, adjust=True):
+    def detect(self, image_path, adjust=True, parallel=True):
         """
 
+        :param parallel: 是否并行处理
         :param image_path: 图像路径
         :param adjust: 是否调整检测框
         :return:
         """
         if not os.path.exists(image_path):
             raise ValueError(f"The image path: {image_path} not exists!")
-        return model(self.ctpn, self.ocr, self.id_to_char, image_path, adjust)
-
+        text_recs, img = self.ctpn.predict(image_path, mode=2)  # 得到所有的检测框
+        text_recs = sort_box(text_recs)
 
+        if parallel:
+            imgs = clip_imgs_with_bboxes(text_recs, img, adjust)
 
+            texts = self.ocr.predict_multi(imgs, id_to_char=self.id_to_char)
+        else:
+            texts = []
+            for index, rec in enumerate(text_recs):
+                image, text = single_text_detect(rec, self.ocr, self.id_to_char, img, adjust)  # 识别文字
+                # plt.subplot(len(text_recs), 1, index + 1)
+                # plt.imshow(image)
+                if text is not None and len(text) > 0:
+                    texts.append(text)
+
+        return text_recs, texts

-Original file line number
+Diff line change
 model
 logs
 .idea/
 -tests/
 +tests/*
 +asset/*
 *.egg-info/
 dist/