Skip to content

Commit d5c0db2

Browse files
committed
Update iphone dataset capture
1 parent 6ed033d commit d5c0db2

File tree

6 files changed

+468
-1
lines changed

6 files changed

+468
-1
lines changed

DOCUMENTATION.md

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
# Co-SLAM Documentation
2+
3+
### [Paper](https://arxiv.org/pdf/2304.14377.pdf) | [Project Page](https://hengyiwang.github.io/projects/CoSLAM) | [Video](https://hengyiwang.github.io/projects/Co-SLAM/videos/presentation.mp4)
4+
5+
> Co-SLAM: Joint Coordinate and Sparse Parametric Encodings for Neural Real-Time SLAM <br />
6+
> [Hengyi Wang](https://hengyiwang.github.io/), [Jingwen Wang](https://jingwenwang95.github.io/), [Lourdes Agapito](http://www0.cs.ucl.ac.uk/staff/L.Agapito/)<br />
7+
> CVPR 2023
8+
9+
This is the documentation for Co-SLAM that contains data capture, details of different hyper-parameters.
10+
11+
12+
13+
## Create your own dataset using iphone/ipad pro
14+
15+
1. Download [strayscanner](https://apps.apple.com/us/app/stray-scanner/id1557051662) in App Store
16+
2. Record the RGB-D video
17+
3. The output fold should be at `Files/On My iPad/Stray Scanner/`
18+
4. Create your own config files for dataset, set `dataset: 'iphone'` , check `./configs/iPhone` for details. (Note intrinsics given by Stray Scanner should be divided by 7.5 to align the RGB-D frame)
19+
5. Create your config file for specific scene, you can define the scene bound yourself, or use provided `vis_bound.ipynb` to determine scene bound
20+
21+
NOTE: The resolution of depth map is (256, 192), which is a bit too small. The camera tracking of neural SLAM won't be very robust on the iphone dataset. It is recommended to use RealSense for data capturing. Any suggestions on this would be welcome.
22+
23+
## Parameters
24+
25+
### Tracking
26+
27+
```python
28+
iter: 10 # num of iterations for tracking
29+
sample: 1024 # num of samples for tracking
30+
pc_samples: 40960 # num of samples for tracking using pc loss (Not used)
31+
lr_rot: 0.001 # lr for rotation
32+
lr_trans: 0.001 # lr for translation
33+
ignore_edge_W: 20 # ignore the edge of image (W)
34+
ignore_edge_H: 20 # ignore the edge of image (H)
35+
iter_point: 0 # num of iterations for tracking using pc loss (Not used)
36+
wait_iters: 100 # Stop optimizing if no improvement for k iterations
37+
const_speed: True # Constant speed assumption for initializing pose
38+
best: True # Use the pose with smallest loss/Use last pose
39+
```
40+
41+
42+
43+
### Mapping
44+
45+
```python
46+
sample: 2048 # Number of pixels used for BA
47+
first_mesh: True # Save first mesh
48+
iters: 10 # num of iterations for BA
49+
50+
# lr for representation, an interesting observation is that
51+
# if you set lr_embed=0.001, lr_decoder=0.01, this makes
52+
# decoder relies more on coordinate encoding, results in
53+
# better completion. This is suitable for room-scale scene, but
54+
# is not suitable for TUM RGB-D...
55+
lr_embed: 0.01 # lr for HashGrid
56+
lr_decoder: 0.01 # lr for decoder
57+
58+
lr_rot: 0.001 # lr for rotation
59+
lr_trans: 0.001 # lr for translation
60+
keyframe_every: 5 # Select keyframe every 5 frames
61+
map_every: 5 # Perform BA every 5 frames
62+
n_pixels: 0.05 # num of pixels saved for each frame
63+
first_iters: 500 # num of iterations for first frame mapping
64+
65+
# As we perform global BA, we need to make sure 1) for every
66+
# iteration, there should be samples from current frame, and
67+
# 2) Do not sample too many pixels on current frame, which may
68+
# introduce bias, it is suggested min_pixels_cur = 0.01 * #samples
69+
optim_cur: False # For challenging scenes, avoid optimizing current frame pose during BA
70+
min_pixels_cur: 20 # min pixels sampled for current frame
71+
map_accum_step: 1 # num of steps for accumulating gradient for model
72+
pose_accum_step: 5 # num of steps for accumulating gradient for pose
73+
map_wait_step: 0 # wait n iterations to start update model
74+
filter_depth: False # Filter out outliers or not
75+
```
76+
77+
78+
79+
### Parametric encoding
80+
81+
```python
82+
enc: 'HashGrid' # Type of grid, including 'DenseGrid, TiledGrid' as describled in tinycudann
83+
tcnn_encoding: True # Use tcnn encoding
84+
hash_size: 19 # Hash table size, refer to our paper for different settings of each dataset
85+
voxel_color: 0.08 # Voxel size for color grid (if applicable)
86+
voxel_sdf: 0.04 # Voxel size for sdf grid (Larger than 10 means voxel dim instead, i.e. fixed resolution)
87+
oneGrid: True # Use only OneGrid
88+
```
89+
90+
91+
92+
### Coordinate encoding
93+
94+
```python
95+
enc: 'OneBlob' # Type of coordinate encoding
96+
n_bins: 16 # Number of bins for OneBlob
97+
```
98+
99+
100+
101+
### Decoder
102+
103+
```python
104+
geo_feat_dim: 15 # Dim of geometric feature for color decoder
105+
hidden_dim: 32 # Dim of SDF MLPs
106+
num_layers: 2 # Num of layers for SDF MLP
107+
num_layers_color: 2 # Num of layers for color MLPs
108+
hidden_dim_color: 32 # Dim of color MLPs
109+
tcnn_network: False # Use tinycudann MLP or Pytorch MLP
110+
```
111+
112+
113+
114+
### Training
115+
116+
```python
117+
rgb_weight: 5.0 # weight of rgb loss
118+
depth_weight: 0.1 # weight of depth loss
119+
sdf_weight: 1000 # weight of sdf loss (truncation)
120+
fs_weight: 10 # weight of sdf loss (free space)
121+
eikonal_weight: 0 # weight of eikonal (Not used)
122+
smooth_weight: 0.001 # weight of smoothness loss (Smaller, as it is applied for feature)
123+
smooth_pts: 64 # Dim of random sampled grid for smoothness
124+
smooth_vox: 0.1 # Voxel size of random sampled grid for smoothness
125+
smooth_margin: 0.05 # Margin of sampled grid
126+
#n_samples: 256
127+
n_samples_d: 96 # Num of sample points for rendering
128+
range_d: 0.25 # Sampled range for depth-guided sampling [-25cm, 25cm]
129+
n_range_d: 21 # Num of depth-guided sample points
130+
n_importance: 0 # Num of sample points for importance sampling
131+
perturb: 1 # Random perturbation (1:True)
132+
white_bkgd: False
133+
trunc: 0.1 # truncation range (10cm for room-scale scene, 5cm for RUM RGBD)
134+
rot_rep: 'quat' # Rotation representation. (Axis angle does not support identity init)
135+
```
136+

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ This repository contains the code for the paper Co-SLAM: Joint Coordinate and Sp
2323
- [x] Code for Co-SLAM [2023-5-12]
2424
- [x] Code for offline RGB-D reconstruction [click here](https://github.com/HengyiWang/Hybrid-Surf). [2023-5-12]
2525
- [x] Code for evaluation strategy, performance analysis [click here](https://github.com/JingwenWang95/neural_slam_eval). [2023-5-18]
26-
- [ ] Tutorials on creating sequences using iPhone/iPad Pro
26+
- [x] Tutorials on params & creating sequences using iPhone/iPad Pro [click here](./DOCUMENTATION.md). [2023-5-26]
27+
- [ ] Tutorials on creating sequences using RealSense
2728

2829
## Installation
2930

configs/IPhone/iphone.yaml

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
dataset: 'iphone'
2+
3+
data:
4+
downsample: 1
5+
sc_factor: 1
6+
translation: 0
7+
num_workers: 4
8+
9+
mapping:
10+
sample: 2048
11+
first_mesh: True
12+
iters: 10
13+
lr_embed: 0.01
14+
lr_decoder: 0.01
15+
lr_rot: 0.001
16+
lr_trans: 0.001
17+
keyframe_every: 5
18+
map_every: 5
19+
n_pixels: 0.05
20+
first_iters: 500
21+
optim_cur: False
22+
min_pixels_cur: 20
23+
map_accum_step: 1
24+
pose_accum_step: 5
25+
map_wait_step: 0
26+
filter_depth: False
27+
28+
tracking:
29+
iter: 10
30+
sample: 1024
31+
pc_samples: 40960
32+
lr_rot: 0.001
33+
lr_trans: 0.001
34+
ignore_edge_W: 20
35+
ignore_edge_H: 20
36+
iter_point: 0
37+
wait_iters: 100
38+
const_speed: True
39+
best: True
40+
41+
grid:
42+
enc: 'HashGrid'
43+
tcnn_encoding: True
44+
hash_size: 19
45+
voxel_color: 0.08
46+
voxel_sdf: 0.04
47+
oneGrid: True
48+
49+
pos:
50+
enc: 'OneBlob'
51+
n_bins: 16
52+
53+
decoder:
54+
geo_feat_dim: 15
55+
hidden_dim: 32
56+
num_layers: 2
57+
num_layers_color: 2
58+
hidden_dim_color: 32
59+
tcnn_network: False
60+
61+
cam:
62+
H: 192
63+
W: 256
64+
fx: 213.17225333
65+
fy: 213.17225333
66+
cx: 126.25554667
67+
cy: 95.27465333
68+
png_depth_scale: 1000. #for depth image in png format
69+
crop_edge: 0
70+
near: 0
71+
far: 5
72+
depth_trunc: 5.
73+
74+
training:
75+
rgb_weight: 5.0
76+
depth_weight: 0.1
77+
sdf_weight: 1000
78+
fs_weight: 10
79+
eikonal_weight: 0
80+
smooth_weight: 0.001 #0.001
81+
smooth_pts: 64
82+
smooth_vox: 0.1
83+
smooth_margin: 0.05
84+
#n_samples: 256
85+
n_samples_d: 96
86+
range_d: 0.25
87+
n_range_d: 21
88+
n_importance: 0
89+
perturb: 1
90+
white_bkgd: False
91+
trunc: 0.1
92+
rot_rep: 'quat'
93+
rgb_missing: 0.0
94+
95+
mesh:
96+
resolution: 512
97+
vis: 500
98+
voxel_eval: 0.02
99+
voxel_final: 0.015
100+
visualisation: False
101+

configs/IPhone/statue.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
inherit_from: configs/IPhone/iphone.yaml
2+
mapping:
3+
bound: [[-2,2],[-1.2,0.1],[-3.5,1.2]]
4+
marching_cubes_bound: [[-2,2],[-1.2,0.1],[-2,1.2]]
5+
data:
6+
datadir: ./data/iphone/statue
7+
trainskip: 1
8+
output: output/statue
9+
exp_name: demo
10+
cam:
11+
far: 5
12+
depth_trunc: 5

tools/vis_cameras.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
'''
2+
camera extrinsics visualization tools
3+
modified from https://github.com/opencv/opencv/blob/master/samples/python/camera_calibration_show_extrinsics.py
4+
'''
5+
6+
import numpy as np
7+
import cv2 as cv
8+
import open3d as o3d
9+
10+
11+
def inverse_homogeneoux_matrix(M):
12+
R = M[0:3, 0:3]
13+
t = M[0:3, 3]
14+
M_inv = np.identity(4)
15+
M_inv[0:3, 0:3] = R.T
16+
M_inv[0:3, 3] = -(R.T).dot(t)
17+
18+
return M_inv
19+
20+
21+
def draw_cuboid(bound):
22+
x_min, x_max = bound[0, 0], bound[0, 1]
23+
y_min, y_max = bound[1, 0], bound[1, 1]
24+
z_min, z_max = bound[2, 0], bound[2, 1]
25+
points = [[x_min, y_min, z_min], [x_max, y_min, z_min], [x_max, y_max, z_min], [x_min, y_max, z_min],
26+
[x_min, y_min, z_max], [x_max, y_min, z_max], [x_max, y_max, z_max], [x_min, y_max, z_max]]
27+
lines = [[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7]]
28+
29+
colors = [[0, 1, 0] for i in range(len(lines))]
30+
line_set = o3d.geometry.LineSet()
31+
line_set.points = o3d.utility.Vector3dVector(points)
32+
line_set.lines = o3d.utility.Vector2iVector(lines)
33+
line_set.colors = o3d.utility.Vector3dVector(colors)
34+
35+
return line_set
36+
37+
38+
def draw_camera(cam_width, cam_height, f, extrinsic, color, show_axis=True):
39+
"""
40+
:param extrinsic: c2w tranformation
41+
:return:
42+
"""
43+
points = [[0, 0, 0], [-cam_width, -cam_height, f], [cam_width, -cam_height, f],
44+
[cam_width, cam_height, f], [-cam_width, cam_height, f]]
45+
lines = [[0, 1], [0, 2], [0, 3], [0, 4], [1, 2], [2, 3], [3, 4], [4, 1]]
46+
colors = [color for i in range(len(lines))]
47+
48+
line_set = o3d.geometry.LineSet()
49+
line_set.points = o3d.utility.Vector3dVector(points)
50+
line_set.lines = o3d.utility.Vector2iVector(lines)
51+
line_set.colors = o3d.utility.Vector3dVector(colors)
52+
line_set.transform(extrinsic)
53+
54+
if show_axis:
55+
axis = o3d.geometry.TriangleMesh.create_coordinate_frame()
56+
axis.scale(min(cam_width, cam_height), np.array([0., 0., 0.]))
57+
axis.transform(extrinsic)
58+
return [line_set, axis]
59+
else:
60+
return [line_set]
61+
62+
63+
def visualize(extrinsics=None, things_to_draw=[]):
64+
65+
######################## plot params ########################
66+
cam_width = 0.64/2 # Width/2 of the displayed camera.
67+
cam_height = 0.48/2 # Height/2 of the displayed camera.
68+
focal_len = 0.20 # focal length of the displayed camera.
69+
70+
######################## original code ########################
71+
vis = o3d.visualization.Visualizer()
72+
vis.create_window()
73+
74+
if extrinsics is not None:
75+
for c in range(extrinsics.shape[0]):
76+
c2w = extrinsics[c, ...]
77+
camera = draw_camera(cam_width, cam_height, focal_len, c2w, color=[1, 0, 0])
78+
for geom in camera:
79+
vis.add_geometry(geom)
80+
81+
axis = o3d.geometry.TriangleMesh.create_coordinate_frame()
82+
vis.add_geometry(axis)
83+
for geom in things_to_draw:
84+
vis.add_geometry(geom)
85+
vis.run()
86+
vis.destroy_window()

0 commit comments

Comments
 (0)