-
Notifications
You must be signed in to change notification settings - Fork 12
/
urso.py
154 lines (121 loc) · 5.36 KB
/
urso.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
'''
Class to handle URSO datasets
'''
import os
import numpy as np
import os.path
import skimage
import pandas as pd
import se3lib
import utils
from dataset import Dataset
class Camera:
fov_x = 90.0 * np.pi / 180
fov_y = 73.7 * np.pi / 180
width = 1280 # number of horizontal[pixels]
height = 960 # number of vertical[pixels]
# Focal lengths
fx = width / (2 * np.tan(fov_x / 2))
fy = - height / (2 * np.tan(fov_y / 2))
K = np.matrix([[fx, 0, width / 2], [0, fy, height / 2], [0, 0, 1]])
# Image mean (RGB)
MEAN_PIXEL = np.array([45, 49, 52])
class Urso(Dataset):
def load_dataset(self, dataset_dir, config, subset):
"""Load a subset of the dataset.
dataset_dir: The root directory of the dataset.
subset: What to load (train, val, test)
"""
self.name = 'Urso'
if not os.path.exists(dataset_dir):
print("Image directory '" + dataset_dir + "' not found.")
return None
set_filename = dataset_dir + '/' + subset + '_images.csv'
rgb_list_df = pd.read_csv(set_filename, names=['filename'], header=-1)
rgb_list = list(rgb_list_df['filename'])
# Set camera params
self.camera = Camera()
print('Loading poses')
poses = pd.read_csv(dataset_dir + '/' + subset + '_poses_gt.csv')
nr_instances = len(rgb_list)
q_array = np.zeros((nr_instances, 4), dtype=np.float32)
t_array = np.zeros((nr_instances, 3), dtype=np.float32)
# Enforce injectivity, i.e., Keep quaternions only in the north hemisphere (useful for regression)
for i in range(nr_instances):
if poses['q4'][i] < 0:
q_array[i, :] = np.asarray([-poses['q1'][i], -poses['q2'][i], -poses['q3'][i], -poses['q4'][i]])
else:
q_array[i, :] = np.asarray([poses['q1'][i], poses['q2'][i], poses['q3'][i], poses['q4'][i]])
t_array[i, :] = [poses['x'][i], poses['y'][i], poses['z'][i]]
# Encode orientation using soft assignment
if not config.REGRESS_ORI:
print('Encoding orientations using soft assignment..')
ori_encoded, ori_histogram_map, ori_output_mask = utils.encode_ori(q_array, config.ORI_BINS_PER_DIM, config.BETA,
np.array([-180, -90, -180]), np.array([180, 90, 180]))
self.ori_histogram_map = ori_histogram_map
self.ori_output_mask = ori_output_mask
if not config.REGRESS_LOC:
print('Encoding locations using soft assignment..')
# Obtain location as: (image_x, image_y, depth)
img_x_array = poses['y'] / poses['x'] # simultaneously converting to cam ref frame
img_y_array = poses['z'] / poses['x'] # simultaneously converting to cam ref frame
z_array = poses['x']
# Compute location limits based on camera FOV and dataset range
theta_x = self.camera.fov_x * np.pi / 360
theta_y = self.camera.fov_y * np.pi / 360
x_max = np.tan(theta_x)
y_max = np.tan(theta_y)
z_min = min(z_array)
z_max = max(z_array)
loc_encoded, loc_histogram_map = utils.encode_loc(np.stack((img_x_array, img_y_array, z_array), axis=1),
config.LOC_BINS_PER_DIM, config.BETA,
np.array([-x_max, -y_max, z_min]), np.array([x_max, y_max, z_max]))
# Store physical structure of histogram for later inference
self.histogram_3D_map = loc_histogram_map
if not rgb_list:
print('No files found')
return None
K1, K2 = utils.encode_as_keypoints(q_array, t_array, 3.0)
i = 0
for file_name in rgb_list:
q = q_array[i, :]
# Convert to angle-axis
v, theta = se3lib.quat2angleaxis(q)
# Convert to euler angles
pyr = np.asarray(se3lib.quat2euler(q))
if config.REGRESS_ORI:
ori_encoded_i = []
else:
ori_encoded_i = ori_encoded[i, :]
if config.REGRESS_LOC:
loc_encoded_i = []
else:
loc_encoded_i = loc_encoded[i, :]
rgb_path = os.path.join(dataset_dir, file_name)
self.add_image(
"URSO",
image_id=i,
path=rgb_path,
keypoints=[K1[i, :], K2[i, :]],
location=[poses['x'][i], poses['y'][i], poses['z'][i]],
location_map=loc_encoded_i,
quaternion=q,
angleaxis=[v[0] * theta, v[1] * theta, v[2] * theta],
pyr=pyr,
ori_map=ori_encoded_i
)
i = i + 1
self.num_images = len(self.image_info)
self._image_ids = np.arange(self.num_images)
def load_image(self, image_id):
"""Load the specified image and return a [H,W,3] Numpy array.
"""
# Load image
image = skimage.io.imread(self.image_info[image_id]['path'])
# If grayscale. Convert to RGB for consistency.
if image.ndim != 3:
image = skimage.color.gray2rgb(image)
# If has an alpha channel, remove it for consistency
if image.shape[-1] == 4:
image = image[..., :3]
return image