From 5b7af6756ec91b0199df9d183756193b1572249c Mon Sep 17 00:00:00 2001 From: Yuantao Feng Date: Mon, 25 Oct 2021 20:21:28 +0800 Subject: [PATCH] Update to OpenCV APIs (YuNet -> FaceDetectorYN, SFace -> FaceRecognizerSF) (#6) * update YuNet and SFace impl with opencv-python api --- README.md | 4 +- benchmark/config/face_detection_yunet.yaml | 3 +- benchmark/requirements.txt | 2 +- models/face_detection_yunet/demo.py | 4 +- models/face_detection_yunet/yunet.py | 161 +++++-------------- models/face_recognition_sface/demo.py | 20 +-- models/face_recognition_sface/sface.py | 176 +++++---------------- 7 files changed, 88 insertions(+), 282 deletions(-) diff --git a/README.md b/README.md index 97dcb124..8e963157 100644 --- a/README.md +++ b/README.md @@ -29,10 +29,10 @@ Hardware Setup: --> | Model | Input Size | CPU x86_64 (ms) | CPU ARM (ms) | |-------|------------|-----------------|--------------| -| [YuNet](./models/face_detection_yunet) | 160x120 | 2.35 | 8.72 | +| [YuNet](./models/face_detection_yunet) | 160x120 | 1.45 | 6.22 | | [DB](./models/text_detection_db) | 640x480 | 137.38 | 2780.78 | | [CRNN](./models/text_recognition_crnn) | 100x32 | 50.21 | 234.32 | -| [SFace](./models/face_recognition_sface) | 112x112 | 8.69 | 96.79 | +| [SFace](./models/face_recognition_sface) | 112x112 | 8.65 | 99.20 | | [PP-ResNet](./models/image_classification_ppresnet) | 224x224 | 56.05 | 602.58 | [PP-HumanSeg](./models/human_segmentation_pphumanseg) | 192x192 | 19.92 | 105.32 | diff --git a/benchmark/config/face_detection_yunet.yaml b/benchmark/config/face_detection_yunet.yaml index a61a99ab..3febec19 100644 --- a/benchmark/config/face_detection_yunet.yaml +++ b/benchmark/config/face_detection_yunet.yaml @@ -19,5 +19,4 @@ Model: modelPath: "models/face_detection_yunet/face_detection_yunet.onnx" confThreshold: 0.6 nmsThreshold: 0.3 - topK: 5000 - keepTopK: 750 \ No newline at end of file + topK: 5000 \ No newline at end of file diff --git a/benchmark/requirements.txt b/benchmark/requirements.txt index d1127bc8..7add2580 100644 --- a/benchmark/requirements.txt +++ b/benchmark/requirements.txt @@ -1,5 +1,5 @@ numpy==1.21.2 -opencv-python==4.5.3.56 +opencv-python==4.5.4.58 tqdm pyyaml requests \ No newline at end of file diff --git a/models/face_detection_yunet/demo.py b/models/face_detection_yunet/demo.py index dc100f3d..9c3ad4c2 100644 --- a/models/face_detection_yunet/demo.py +++ b/models/face_detection_yunet/demo.py @@ -25,7 +25,6 @@ def str2bool(v): parser.add_argument('--conf_threshold', type=float, default=0.9, help='Filter out faces of confidence < conf_threshold.') parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.') parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.') -parser.add_argument('--keep_top_k', type=int, default=750, help='Keep keep_top_k bounding boxes after NMS.') parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.') parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') args = parser.parse_args() @@ -62,8 +61,7 @@ def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps inputSize=[320, 320], confThreshold=args.conf_threshold, nmsThreshold=args.nms_threshold, - topK=args.top_k, - keepTopK=args.keep_top_k) + topK=args.top_k) # If input is an image if args.input is not None: diff --git a/models/face_detection_yunet/yunet.py b/models/face_detection_yunet/yunet.py index 64c0c93d..7e4702fe 100644 --- a/models/face_detection_yunet/yunet.py +++ b/models/face_detection_yunet/yunet.py @@ -10,140 +10,57 @@ import cv2 as cv class YuNet: - def __init__(self, modelPath, inputSize=[320, 320], confThreshold=0.6, nmsThreshold=0.3, topK=5000, keepTopK=750): + def __init__(self, modelPath, inputSize=[320, 320], confThreshold=0.6, nmsThreshold=0.3, topK=5000, backendId=0, targetId=0): self._modelPath = modelPath - self._model = cv.dnn.readNet(self._modelPath) - - self._inputNames = '' - self._outputNames = ['loc', 'conf', 'iou'] - self._inputSize = inputSize # [w, h] + self._inputSize = tuple(inputSize) # [w, h] self._confThreshold = confThreshold self._nmsThreshold = nmsThreshold self._topK = topK - self._keepTopK = keepTopK - - self._min_sizes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]] - self._steps = [8, 16, 32, 64] - self._variance = [0.1, 0.2] + self._backendId = backendId + self._targetId = targetId - # Generate priors - self._priorGen() + self._model = cv.FaceDetectorYN.create( + model=self._modelPath, + config="", + input_size=self._inputSize, + score_threshold=self._confThreshold, + nms_threshold=self._nmsThreshold, + top_k=self._topK, + backend_id=self._backendId, + target_id=self._targetId) @property def name(self): return self.__class__.__name__ - def setBackend(self, backend): - self._model.setPreferableBackend(backend) - - def setTarget(self, target): - self._model.setPreferableTarget(target) + def setBackend(self, backendId): + self._backendId = backendId + self._model = cv.FaceDetectorYN.create( + model=self._modelPath, + config="", + input_size=self._inputSize, + score_threshold=self._confThreshold, + nms_threshold=self._nmsThreshold, + top_k=self._topK, + backend_id=self._backendId, + target_id=self._targetId) + + def setTarget(self, targetId): + self._targetId = targetId + self._model = cv.FaceDetectorYN.create( + model=self._modelPath, + config="", + input_size=self._inputSize, + score_threshold=self._confThreshold, + nms_threshold=self._nmsThreshold, + top_k=self._topK, + backend_id=self._backendId, + target_id=self._targetId) def setInputSize(self, input_size): - self._inputSize = input_size # [w, h] - - # Regenerate priors - self._priorGen() - - def _preprocess(self, image): - return cv.dnn.blobFromImage(image) + self._model.setInputSize(tuple(input_size)) def infer(self, image): - assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1]) - assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0]) - - # Preprocess - inputBlob = self._preprocess(image) - # Forward - self._model.setInput(inputBlob, self._inputNames) - outputBlob = self._model.forward(self._outputNames) - - # Postprocess - results = self._postprocess(outputBlob) - - return results - - def _postprocess(self, outputBlob): - # Decode - dets = self._decode(outputBlob) - - # NMS - keepIdx = cv.dnn.NMSBoxes( - bboxes=dets[:, 0:4].tolist(), - scores=dets[:, -1].tolist(), - score_threshold=self._confThreshold, - nms_threshold=self._nmsThreshold, - top_k=self._topK - ) # box_num x class_num - if len(keepIdx) > 0: - dets = dets[keepIdx] - dets = np.squeeze(dets, axis=1) - return dets[:self._keepTopK] - else: - return np.empty(shape=(0, 15)) - - def _priorGen(self): - w, h = self._inputSize - feature_map_2th = [int(int((h + 1) / 2) / 2), - int(int((w + 1) / 2) / 2)] - feature_map_3th = [int(feature_map_2th[0] / 2), - int(feature_map_2th[1] / 2)] - feature_map_4th = [int(feature_map_3th[0] / 2), - int(feature_map_3th[1] / 2)] - feature_map_5th = [int(feature_map_4th[0] / 2), - int(feature_map_4th[1] / 2)] - feature_map_6th = [int(feature_map_5th[0] / 2), - int(feature_map_5th[1] / 2)] - - feature_maps = [feature_map_3th, feature_map_4th, - feature_map_5th, feature_map_6th] - - priors = [] - for k, f in enumerate(feature_maps): - min_sizes = self._min_sizes[k] - for i, j in product(range(f[0]), range(f[1])): # i->h, j->w - for min_size in min_sizes: - s_kx = min_size / w - s_ky = min_size / h - - cx = (j + 0.5) * self._steps[k] / w - cy = (i + 0.5) * self._steps[k] / h - - priors.append([cx, cy, s_kx, s_ky]) - self.priors = np.array(priors, dtype=np.float32) - - def _decode(self, outputBlob): - loc, conf, iou = outputBlob - # get score - cls_scores = conf[:, 1] - iou_scores = iou[:, 0] - # clamp - _idx = np.where(iou_scores < 0.) - iou_scores[_idx] = 0. - _idx = np.where(iou_scores > 1.) - iou_scores[_idx] = 1. - scores = np.sqrt(cls_scores * iou_scores) - scores = scores[:, np.newaxis] - - scale = np.array(self._inputSize) - - # get bboxes - bboxes = np.hstack(( - (self.priors[:, 0:2] + loc[:, 0:2] * self._variance[0] * self.priors[:, 2:4]) * scale, - (self.priors[:, 2:4] * np.exp(loc[:, 2:4] * self._variance)) * scale - )) - # (x_c, y_c, w, h) -> (x1, y1, w, h) - bboxes[:, 0:2] -= bboxes[:, 2:4] / 2 - - # get landmarks - landmarks = np.hstack(( - (self.priors[:, 0:2] + loc[:, 4: 6] * self._variance[0] * self.priors[:, 2:4]) * scale, - (self.priors[:, 0:2] + loc[:, 6: 8] * self._variance[0] * self.priors[:, 2:4]) * scale, - (self.priors[:, 0:2] + loc[:, 8:10] * self._variance[0] * self.priors[:, 2:4]) * scale, - (self.priors[:, 0:2] + loc[:, 10:12] * self._variance[0] * self.priors[:, 2:4]) * scale, - (self.priors[:, 0:2] + loc[:, 12:14] * self._variance[0] * self.priors[:, 2:4]) * scale - )) - - dets = np.hstack((bboxes, landmarks, scores)) - return dets \ No newline at end of file + faces = self._model.detect(image) + return faces[1] \ No newline at end of file diff --git a/models/face_recognition_sface/demo.py b/models/face_recognition_sface/demo.py index 5c64b99c..ea7dd8b5 100644 --- a/models/face_recognition_sface/demo.py +++ b/models/face_recognition_sface/demo.py @@ -35,14 +35,13 @@ def str2bool(v): if __name__ == '__main__': # Instantiate SFace for face recognition - recognizer = SFace(modelPath=args.model) + recognizer = SFace(modelPath=args.model, disType=args.dis_type) # Instantiate YuNet for face detection detector = YuNet(modelPath='../face_detection_yunet/face_detection_yunet.onnx', inputSize=[320, 320], confThreshold=0.9, nmsThreshold=0.3, - topK=5000, - keepTopK=750) + topK=5000) img1 = cv.imread(args.input1) img2 = cv.imread(args.input2) @@ -56,16 +55,5 @@ def str2bool(v): assert face2.shape[0] > 0, 'Cannot find a face in {}'.format(args.input2) # Match - distance = recognizer.match(img1, face1[0][:-1], img2, face2[0][:-1], args.dis_type) - print(distance) - if args.dis_type == 0: - dis_type = 'Cosine' - threshold = 0.363 - result = 'same identity' if distance >= threshold else 'different identity' - elif args.dis_type == 1: - dis_type = 'Norm-L2' - threshold = 1.128 - result = 'same identity' if distance <= threshold else 'different identity' - else: - raise NotImplementedError() - print('Using {} distance, threshold {}: {}.'.format(dis_type, threshold, result)) \ No newline at end of file + result = recognizer.match(img1, face1[0][:-1], img2, face2[0][:-1]) + print('Result: {}.'.format('same identity' if result else 'different identities')) \ No newline at end of file diff --git a/models/face_recognition_sface/sface.py b/models/face_recognition_sface/sface.py index 81fff0d0..84e2bda2 100644 --- a/models/face_recognition_sface/sface.py +++ b/models/face_recognition_sface/sface.py @@ -10,156 +10,60 @@ from _testcapi import FLT_MIN class SFace: - def __init__(self, modelPath): - self._model = cv.dnn.readNet(modelPath) - self._input_size = [112, 112] - self._dst = np.array([ - [38.2946, 51.6963], - [73.5318, 51.5014], - [56.0252, 71.7366], - [41.5493, 92.3655], - [70.7299, 92.2041] - ], dtype=np.float32) - self._dst_mean = np.array([56.0262, 71.9008], dtype=np.float32) + def __init__(self, modelPath, disType=0, backendId=0, targetId=0): + self._modelPath = modelPath + self._backendId = backendId + self._targetId = targetId + self._model = cv.FaceRecognizerSF.create( + model=self._modelPath, + config="", + backend_id=self._backendId, + target_id=self._targetId) + + self._disType = disType # 0: cosine similarity, 1: Norm-L2 distance + assert self._disType in [0, 1], "0: Cosine similarity, 1: norm-L2 distance, others: invalid" + + self._threshold_cosine = 0.363 + self._threshold_norml2 = 1.128 @property def name(self): return self.__class__.__name__ - def setBackend(self, backend_id): - self._model.setPreferableBackend(backend_id) - - def setTarget(self, target_id): - self._model.setPreferableTarget(target_id) + def setBackend(self, backendId): + self._backendId = backendId + self._model = cv.FaceRecognizerSF.create( + model=self._modelPath, + config="", + backend_id=self._backendId, + target_id=self._targetId) + + def setTarget(self, targetId): + self._targetId = targetId + self._model = cv.FaceRecognizerSF.create( + model=self._modelPath, + config="", + backend_id=self._backendId, + target_id=self._targetId) def _preprocess(self, image, bbox): - aligned_image = self._alignCrop(image, bbox) - return cv.dnn.blobFromImage(aligned_image) + return self._model.alignCrop(image, bbox) def infer(self, image, bbox): # Preprocess inputBlob = self._preprocess(image, bbox) # Forward - self._model.setInput(inputBlob) - outputBlob = self._model.forward() - - # Postprocess - results = self._postprocess(outputBlob) - - return results + features = self._model.feature(inputBlob) + return features - def _postprocess(self, outputBlob): - return outputBlob / cv.norm(outputBlob) - - def match(self, image1, face1, image2, face2, dis_type=0): + def match(self, image1, face1, image2, face2): feature1 = self.infer(image1, face1) feature2 = self.infer(image2, face2) - if dis_type == 0: # COSINE - return np.sum(feature1 * feature2) - elif dis_type == 1: # NORM_L2 - return cv.norm(feature1, feature2) - else: - raise NotImplementedError() - - def _alignCrop(self, image, face): - # Retrieve landmarks - if face.shape[-1] == (4 + 5 * 2): - landmarks = face[4:].reshape(5, 2) - else: - raise NotImplementedError() - warp_mat = self._getSimilarityTransformMatrix(landmarks) - aligned_image = cv.warpAffine(image, warp_mat, self._input_size, flags=cv.INTER_LINEAR) - return aligned_image - - def _getSimilarityTransformMatrix(self, src): - # compute the mean of src and dst - src_mean = np.array([np.mean(src[:, 0]), np.mean(src[:, 1])], dtype=np.float32) - dst_mean = np.array([56.0262, 71.9008], dtype=np.float32) - # subtract the means from src and dst - src_demean = src.copy() - src_demean[:, 0] = src_demean[:, 0] - src_mean[0] - src_demean[:, 1] = src_demean[:, 1] - src_mean[1] - dst_demean = self._dst.copy() - dst_demean[:, 0] = dst_demean[:, 0] - dst_mean[0] - dst_demean[:, 1] = dst_demean[:, 1] - dst_mean[1] - - A = np.array([[0., 0.], [0., 0.]], dtype=np.float64) - for i in range(5): - A[0][0] += dst_demean[i][0] * src_demean[i][0] - A[0][1] += dst_demean[i][0] * src_demean[i][1] - A[1][0] += dst_demean[i][1] * src_demean[i][0] - A[1][1] += dst_demean[i][1] * src_demean[i][1] - A = A / 5 - - d = np.array([1.0, 1.0], dtype=np.float64) - if A[0][0] * A[1][1] - A[0][1] * A[1][0] < 0: - d[1] = -1 - - T = np.array([ - [1.0, 0.0, 0.0], - [0.0, 1.0, 0.0], - [0.0, 0.0, 1.0] - ], dtype=np.float64) - - s, u, vt = cv.SVDecomp(A) - smax = s[0][0] if s[0][0] > s[1][0] else s[1][0] - tol = smax * 2 * FLT_MIN - rank = int(0) - if s[0][0] > tol: - rank += 1 - if s[1][0] > tol: - rank += 1 - det_u = u[0][0] * u[1][1] - u[0][1] * u[1][0] - det_vt = vt[0][0] * vt[1][1] - vt[0][1] * vt[1][0] - if rank == 1: - if det_u * det_vt > 0: - uvt = np.matmul(u, vt) - T[0][0] = uvt[0][0] - T[0][1] = uvt[0][1] - T[1][0] = uvt[1][0] - T[1][1] = uvt[1][1] - else: - temp = d[1] - d[1] = -1 - D = np.array([[d[0], 0.0], [0.0, d[1]]], dtype=np.float64) - Dvt = np.matmul(D, vt) - uDvt = np.matmul(u, Dvt) - T[0][0] = uDvt[0][0] - T[0][1] = uDvt[0][1] - T[1][0] = uDvt[1][0] - T[1][1] = uDvt[1][1] - d[1] = temp - else: - D = np.array([[d[0], 0.0], [0.0, d[1]]], dtype=np.float64) - Dvt = np.matmul(D, vt) - uDvt = np.matmul(u, Dvt) - T[0][0] = uDvt[0][0] - T[0][1] = uDvt[0][1] - T[1][0] = uDvt[1][0] - T[1][1] = uDvt[1][1] - - var1 = 0.0 - var2 = 0.0 - for i in range(5): - var1 += src_demean[i][0] * src_demean[i][0] - var2 += src_demean[i][1] * src_demean[i][1] - var1 /= 5 - var2 /= 5 - - scale = 1.0 / (var1 + var2) * (s[0][0] * d[0] + s[1][0] * d[1]) - TS = [ - T[0][0] * src_mean[0] + T[0][1] * src_mean[1], - T[1][0] * src_mean[0] + T[1][1] * src_mean[1] - ] - T[0][2] = dst_mean[0] - scale * TS[0] - T[1][2] = dst_mean[1] - scale * TS[1] - T[0][0] *= scale - T[0][1] *= scale - T[1][0] *= scale - T[1][1] *= scale - return np.array([ - [T[0][0], T[0][1], T[0][2]], - [T[1][0], T[1][1], T[1][2]] - ], dtype=np.float64) \ No newline at end of file + if self._disType == 0: # COSINE + cosine_score = self._model.match(feature1, feature2, self._disType) + return 1 if cosine_score >= self._threshold_cosine else 0 + else: # NORM_L2 + norml2_distance = self._model.match(feature1, feature2, self._disType) + return 1 if norml2_distance <= self._threshold_norml2 else 0 \ No newline at end of file