open-mmlab · lyviva · Apr 20, 2023 · Apr 23, 2023 · Apr 23, 2023 · Apr 23, 2023
diff --git a/projects/6DofPose/demo.py b/projects/6DofPose/demo.py
@@ -0,0 +1,116 @@
+import os
+import numpy as np
+from utils import *
+import argparse
+import torch
+import matplotlib.pyplot as plt
+
+import mmengine
+import mmcv
+
+from mmengine.registry import init_default_scope
+from mmpose.apis import inference_topdown
+from mmpose.apis import init_model as init_pose_estimator
+from mmpose.structures import merge_data_samples
+from mmpose.evaluation.functional import nms
+
+from mmdet.apis import inference_detector, init_detector
+
+
+def predict(image_path):
+    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+
+    detector = init_detector(
+        '/home/liuyoufu/code/mmpose-openmmlab/mmpose/work_dirs/rtmdet_tiny_ape/rtmdet_tiny_ape.py',
+        '/home/liuyoufu/code/mmpose-openmmlab/mmpose/work_dirs/rtmdet_tiny_ape/best_coco_bbox_mAP_epoch_90.pth',
+        device=device)
+
+    pose_estimator = init_pose_estimator(
+        '/home/liuyoufu/code/mmpose-openmmlab/mmpose/work_dirs/rtmpose-s_ape/rtmpose-s_ape.py',
+        '/home/liuyoufu/code/mmpose-openmmlab/mmpose/work_dirs/rtmpose-s_ape/best_PCK_epoch_240.pth',
+        device=device,
+        cfg_options={'model': {'test_cfg': {'output_heatmaps': True}}})
+
+    init_default_scope(detector.cfg.get('default_scope', 'mmdet'))
+    detect_result = inference_detector(detector, image_path)
+    CONF_THRES = 0.5
+
+    pred_instance = detect_result.pred_instances.cpu().numpy()
+    bboxes = np.concatenate((pred_instance.bboxes, pred_instance.scores[:, None]), axis=1)
+    bboxes = bboxes[np.logical_and(pred_instance.labels == 0, pred_instance.scores > CONF_THRES)]
+    bboxes = bboxes[nms(bboxes, 0.3)][:, :4].astype('int')
+
+    pose_results = inference_topdown(pose_estimator, image_path, bboxes)
+    data_samples = merge_data_samples(pose_results)
+    keypoints = data_samples.pred_instances.keypoints.astype('int')
+
+    return keypoints
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='demo')
+    parser.add_argument('--image-path', help='image path')
+    parser.add_argument('--id', type=str, help='object id, for example: 01')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    image_path = args.image_path
+    obj_id = args.id
+
+    # 根据图像文件路径，划分物体id 根目录 与 图像名称
+    file_name = os.path.basename(image_path)
+    root_path = os.path.dirname(image_path)[:-11]
+
+    # get model_info_dict, obj_id
+    model_info_path = root_path + 'models/'
+    model_info_dict = mmengine.load(model_info_path + 'models_info.yml')
+    object_path = os.path.join(root_path, f'data/{args.id}/')
+    info_dict = mmengine.load(object_path + 'info.yml')
+    gt_dict = mmengine.load(object_path + 'gt.yml')
+
+    # 根据图像名，获取对应图像的内参
+    intrinsic = np.array(info_dict[int(file_name.split(".")[0])]['cam_K']).reshape(3,3)
+
+    # get corner3D (8*3)
+    corners3D = get_3D_corners(model_info_dict, obj_id)
+
+    # get gt and prediction
+    keypoint_pr = predict(image_path)
+    corners2D_pr = keypoint_pr.reshape(-1,2)
+
+    # Compute [R|t] by pnp  =====  pred
+    R_pr, t_pr = pnp(corners3D,
+                     corners2D_pr,
+                     np.array(intrinsic, dtype='float32'))
+    Rt_pr = np.concatenate((R_pr, t_pr), axis=1)
+    proj_corners_pr = np.transpose(compute_projection(corners3D, Rt_pr, intrinsic)) 
+
+    # Compute [R|t] by pnp  =====  gt
+    R_gt = np.array(gt_dict[int(file_name.split(".")[0])][0]['cam_R_m2c']).reshape(3,3)
+    t_gt = np.array(gt_dict[int(file_name.split(".")[0])][0]['cam_t_m2c']).reshape(3,1)
+    Rt_gt = np.concatenate((R_gt, t_gt), axis=1)
+    proj_corners_gt = np.transpose(compute_projection(corners3D, Rt_gt, intrinsic)) 
+
+
+    image = mmcv.imread(image_path)
+    height = image.shape[0]
+    width = image.shape[1]
+
+    plt.xlim((0, width))
+    plt.ylim((0, height))
+    plt.imshow(mmcv.imresize(image, (width, height)))
+    # Projections
+    edges_corners = [[0, 1], [0, 2], [0, 4], [1, 3], [1, 5], [2, 3], 
+                     [2, 6], [3, 7], [4, 5], [4, 6], [5, 7], [6, 7]]
+    for edge in edges_corners:
+        plt.plot(proj_corners_pr[edge, 0], proj_corners_pr[edge, 1], color='b', linewidth=2.0)
+        plt.plot(proj_corners_pr[edge, 0], proj_corners_gt[edge, 1], color='g', linewidth=2.0)
+    plt.gca().invert_yaxis()
+    plt.show()
+    plt.pause(0)
+
+if __name__ == "__main__":
+    main()
diff --git a/projects/6DofPose/tools/linemod_to_coco.py b/projects/6DofPose/tools/linemod_to_coco.py
@@ -0,0 +1,200 @@
+import os
+import mmengine
+import argparse
+import numpy as np
+import mmcv
+
+def parse_examples(data_file):
+    if not os.path.isfile(data_file):
+        print(f'Error: file {data_file} does not exist!')
+        return None
+
+    with open(data_file) as fid:
+        data_examples = [example.strip() for example in fid if example != '']
+
+    return data_examples
+
+def images_info(object_path, data_examples):
+    all_images_path = os.path.join(object_path, 'rgb')
+    all_filenames = [
+        filename for filename in os.listdir(all_images_path)
+        if '.png' in filename and filename.replace('.png', '') in data_examples
+        ]
+    image_paths = [
+        os.path.join(all_images_path, filename) for filename in all_filenames
+        ]
+    images = []
+    for id, image_path in enumerate(image_paths):
+        img = mmcv.imread(image_path)
+        height = img.shape[0]
+        width = img.shape[1]
+        images.append(dict(file_name=all_filenames[id],
+                           height=height,
+                           width=width,
+                           id=id))
+    return images
+
+def project_points_3D_to_2D(points_3D, rotation_vector, translation_vector,
+                            camera_matrix):
+    points_3D = points_3D.reshape(3,1)
+    rotation_vector = rotation_vector.reshape(3,3)
+    translation_vector = translation_vector.reshape(3,1)
+    pixel = camera_matrix.dot(
+        rotation_vector.dot(points_3D)+translation_vector)
+    pixel /= pixel[-1]
+    points_2D = pixel[:2]
+
+    return points_2D
+
+def insert_np_cam_calibration(filtered_infos):
+    for info in filtered_infos:
+        info['cam_K_np'] = np.reshape(np.array(info['cam_K']), newshape=(3, 3))
+
+    return filtered_infos
+
+def get_bbox_from_mask(mask, mask_value=None):
+    if mask_value is None:
+        seg = np.where(mask != 0)
+    else:
+        seg = np.where(mask == mask_value)
+    # check if mask is empty
+    if seg[0].size <= 0 or seg[1].size <= 0:
+        return np.zeros((4, ), dtype=np.float32), False
+    min_x = np.min(seg[1])
+    min_y = np.min(seg[0])
+    max_x = np.max(seg[1])
+    max_y = np.max(seg[0])
+
+    return np.array([min_x, min_y, max_x-min_x, max_y-min_y], dtype=np.float32)
+
+def annotations_info(object_path, data_examples, gt_dict, info_dict,
+                     model_info_dict, obj_id):
+    all_images_path = os.path.join(object_path, 'rgb')
+    all_filenames = [
+        filename for filename in os.listdir(all_images_path)
+        if '.png' in filename and filename.replace('.png', '') in data_examples
+    ]
+    image_paths = [
+        os.path.join(all_images_path, filename) for filename in all_filenames
+    ]
+    mask_paths = [
+        image_path.replace('rgb', 'mask') for image_path in image_paths
+    ]
+
+    example_ids = [int(filename.split('.')[0]) for filename in all_filenames]
+    filtered_gt_lists = [gt_dict[key] for key in example_ids]
+    filtered_gts = []
+    for gt_list in filtered_gt_lists:
+        all_annos = [anno for anno in gt_list if anno['obj_id'] == int(obj_id)]
+        if len(all_annos) <= 0:
+            print('\nError: No annotation found!')
+            filtered_gts.append(None)
+        elif len(all_annos) > 1:
+            print('\nWarning: found more than one annotation.\
+                    using only the first annotation')
+            filtered_gts.append(all_annos[0])
+        else:
+            filtered_gts.append(all_annos[0])
+
+    filtered_infos = [info_dict[key] for key in example_ids]
+    info_list = insert_np_cam_calibration(filtered_infos)
+
+    id = 0
+    annotations = []
+    # 获取bbox与keypoints
+    for gt, info, mask_path in zip(filtered_gts, info_list, mask_paths):
+        mask = mmcv.imread(mask_path)
+        annotation = {}
+        annotation['category_id'] = 1
+        annotation['segmentation'] = []
+        annotation['iscrowd'] = 0
+        annotation['image_id'] = id
+        annotation['id'] = id # 因为图片中只有一个物体，所以image_id=id
+        bbox = get_bbox_from_mask(mask)
+        annotation['bbox'] = bbox
+        annotation['area'] = bbox[2] * bbox[3]
+        annotation['num_keypoints'] = 8
+
+        # keypoints中 不存在的关键点为[0,0] 关键点的第三位是0 没有标注点 1 遮挡点 2正常点
+        min_x = model_info_dict[int(obj_id)]['min_x']
+        min_y = model_info_dict[int(obj_id)]['min_y']
+        min_z = model_info_dict[int(obj_id)]['min_z']
+        max_x = min_x + model_info_dict[int(obj_id)]['size_x']
+        max_y = min_y + model_info_dict[int(obj_id)]['size_y']
+        max_z = min_z + model_info_dict[int(obj_id)]['size_z']
+        corners = np.array([[max_x, max_y, min_z],
+                            [max_x, max_y, max_z],
+                            [max_x, min_y, min_z],
+                            [max_x, min_y, max_z],
+                            [min_x, max_y, min_z],
+                            [min_x, max_y, max_z],
+                            [min_x, min_y, min_z],
+                            [min_x, min_y, max_z]])
+        corners = [
+            project_points_3D_to_2D(corner, np.array(gt['cam_R_m2c']),
+                                    np.array(gt['cam_t_m2c']),
+                                    info['cam_K_np'])
+            for corner in corners]
+        corners = np.array(corners).reshape(8,2)
+        tmp = np.array([2]*8).reshape(8,1)
+        corners = np.hstack((corners, tmp))
+        corners = corners.reshape(-1)
+        annotation['keypoints'] = corners
+
+        id += 1
+        annotations.append(annotation)
+    return annotations
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Create_linemod_json')
+    parser.add_argument('--root', help='root path')
+    parser.add_argument('--id', type=str, help='object id, for example: 01')
+    parser.add_argument('--mode', type=str, help='mode, for example: train')
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+
+    object_path = os.path.join(args.root, f'data/{args.id}/')
+    data_examples = parse_examples(object_path + args.mode + '.txt')
+    gt_dict = mmengine.load(object_path + 'gt.yml')
+    info_dict = mmengine.load(object_path + 'info.yml')
+    obj_id = args.id
+    model_info_path = args.root + 'models/'
+    model_info_dict = mmengine.load(model_info_path + 'models_info.yml')
+
+    # images
+    images = images_info(object_path, data_examples)
+
+    # annotations
+    annotations = annotations_info(object_path, data_examples, 
+                                   gt_dict, info_dict, model_info_dict,
+                                   obj_id)
+
+    # categories
+    object = [{
+        'supercatgory': 'ape',
+        'id': 1,
+        'name': 'ape',
+        'keypoints': [
+            'min_min_min', 'min_min_max',
+            'min_max_min', 'min_max_max',
+            'max_min_min', 'max_min_max',
+            'max_max_min', 'max_max_max'],
+        'skeleton': [[0, 4], [1, 5], [3, 7], [6, 2],
+                     [0, 2], [1, 3], [7, 5], [4, 6],
+                     [0, 1], [7, 6], [5, 4], [2, 3]],
+    }]
+
+    # remove invalid data
+    linemod_coco = {
+        'categories': object,
+        'images': images,
+        'annotations': annotations
+    }
+    out_file = args.root + 'json/linemod_preprocessed_'+ args.mode + '.json'
+    mmengine.dump(linemod_coco, out_file)
+
+if __name__ == '__main__':
+    main()
diff --git a/projects/6DofPose/utils.py b/projects/6DofPose/utils.py
@@ -0,0 +1,64 @@
+import cv2
+import numpy as np
+
+def get_3D_corners(model_info_dict, obj_id):
+    min_x = model_info_dict[int(obj_id)]['min_x']
+    min_y = model_info_dict[int(obj_id)]['min_y']
+    min_z = model_info_dict[int(obj_id)]['min_z']
+    max_x = min_x + model_info_dict[int(obj_id)]['size_x']
+    max_y = min_y + model_info_dict[int(obj_id)]['size_y']
+    max_z = min_z + model_info_dict[int(obj_id)]['size_z']
+    corners = np.array([[max_x, max_y, min_z],
+                        [max_x, max_y, max_z],
+                        [max_x, min_y, min_z],
+                        [max_x, min_y, max_z],
+                        [min_x, max_y, min_z],
+                        [min_x, max_y, max_z],
+                        [min_x, min_y, min_z],
+                        [min_x, min_y, max_z]])
+    return corners
+
+
+def pnp(points_3D, points_2D, cameraMatrix):
+    try:
+        distCoeffs = pnp.distCoeffs
+    except:
+        distCoeffs = np.zeros((8, 1), dtype='float32') 
+
+    assert points_2D.shape[0] == points_2D.shape[0], 'points 3D and points 2D must have same number of vertices'
+
+    points_2D = points_2D.astype(np.float32)
+    points_3D = (points_3D).astype(np.float32)
+    _, R_exp, t = cv2.solvePnP(points_3D,
+                               points_2D.reshape((-1,1,2)),
+                               cameraMatrix,
+                               distCoeffs)                            
+
+    R, _ = cv2.Rodrigues(R_exp)
+    return R, t
+
+
+def project_points_3D_to_2D(points_3D, rotation_vector, translation_vector,
+                            camera_matrix):
+    points_3D = points_3D.reshape(3,1)
+    rotation_vector = rotation_vector.reshape(3,3)
+    translation_vector = translation_vector.reshape(3,1)
+    pixel = camera_matrix.dot(
+        rotation_vector.dot(points_3D)+translation_vector)
+    pixel /= pixel[-1]
+    points_2D = pixel[:2]
+
+    return points_2D
+
+
+def compute_projection(points_3D, transformation, internal_calibration):
+    points_3D = points_3D.T
+    tmp = np.array([1.]*8).reshape(1, 8)
+    points_3D = np.concatenate((points_3D, tmp))
+
+    projections_2d = np.zeros((2, points_3D.shape[1]), dtype='float32')
+    camera_projection = (internal_calibration.dot(transformation)).dot(points_3D)
+    projections_2d[0, :] = camera_projection[0, :]/camera_projection[2, :]
+    projections_2d[1, :] = camera_projection[1, :]/camera_projection[2, :]
+    return projections_2d
+