Skip to content

Commit dfc2e11

Browse files
authored
Merge pull request #64 from prs-eth/hypersim_preprocess
hypersim preprocessing scripts
2 parents be30652 + 9c17691 commit dfc2e11

File tree

3 files changed

+240
-0
lines changed

3 files changed

+240
-0
lines changed
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Hypersim preprocessing
2+
3+
## Download
4+
5+
Download [Hypersim](https://github.com/apple/ml-hypersim) dataset using [this script](https://github.com/apple/ml-hypersim/blob/20f398f4387aeca73175494d6a2568f37f372150/code/python/tools/dataset_download_images.py).
6+
7+
Download the scene split file from [here](https://github.com/apple/ml-hypersim/blob/main/evermotion_dataset/analysis/metadata_images_split_scene_v1.csv).
8+
9+
## Process dataset
10+
11+
Run the preprocessing script:
12+
13+
```bash
14+
python script/dataset_preprocess/hypersim/preprocess_hypersim.py --split_csv /path/to/metadata_images_split_scene_v1.csv
15+
```
16+
17+
(optional) Tar the processed data, for example:
18+
19+
```bash
20+
cd data/Hypersim/processed/train
21+
tar -cf ../../hypersim_processed_train.tar .
22+
```
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Author: Bingxin Ke
2+
# Last modified: 2024-02-19
3+
4+
5+
from pylab import count_nonzero, clip, np
6+
7+
8+
# Adapted from https://github.com/apple/ml-hypersim/blob/main/code/python/tools/scene_generate_images_tonemap.py
9+
def tone_map(rgb, entity_id_map):
10+
assert (entity_id_map != 0).all()
11+
12+
gamma = 1.0 / 2.2 # standard gamma correction exponent
13+
inv_gamma = 1.0 / gamma
14+
percentile = (
15+
90 # we want this percentile brightness value in the unmodified image...
16+
)
17+
brightness_nth_percentile_desired = 0.8 # ...to be this bright after scaling
18+
19+
valid_mask = entity_id_map != -1
20+
21+
if count_nonzero(valid_mask) == 0:
22+
scale = 1.0 # if there are no valid pixels, then set scale to 1.0
23+
else:
24+
brightness = (
25+
0.3 * rgb[:, :, 0] + 0.59 * rgb[:, :, 1] + 0.11 * rgb[:, :, 2]
26+
) # "CCIR601 YIQ" method for computing brightness
27+
brightness_valid = brightness[valid_mask]
28+
29+
eps = 0.0001 # if the kth percentile brightness value in the unmodified image is less than this, set the scale to 0.0 to avoid divide-by-zero
30+
brightness_nth_percentile_current = np.percentile(brightness_valid, percentile)
31+
32+
if brightness_nth_percentile_current < eps:
33+
scale = 0.0
34+
else:
35+
# Snavely uses the following expression in the code at https://github.com/snavely/pbrs_tonemapper/blob/master/tonemap_rgbe.py:
36+
# scale = np.exp(np.log(brightness_nth_percentile_desired)*inv_gamma - np.log(brightness_nth_percentile_current))
37+
#
38+
# Our expression below is equivalent, but is more intuitive, because it follows more directly from the expression:
39+
# (scale*brightness_nth_percentile_current)^gamma = brightness_nth_percentile_desired
40+
41+
scale = (
42+
np.power(brightness_nth_percentile_desired, inv_gamma)
43+
/ brightness_nth_percentile_current
44+
)
45+
46+
rgb_color_tm = np.power(np.maximum(scale * rgb, 0), gamma)
47+
rgb_color_tm = clip(rgb_color_tm, 0, 1)
48+
return rgb_color_tm
49+
50+
51+
# According to https://github.com/apple/ml-hypersim/issues/9
52+
def dist_2_depth(width, height, flt_focal, distance):
53+
img_plane_x = (
54+
np.linspace((-0.5 * width) + 0.5, (0.5 * width) - 0.5, width)
55+
.reshape(1, width)
56+
.repeat(height, 0)
57+
.astype(np.float32)[:, :, None]
58+
)
59+
img_plane_y = (
60+
np.linspace((-0.5 * height) + 0.5, (0.5 * height) - 0.5, height)
61+
.reshape(height, 1)
62+
.repeat(width, 1)
63+
.astype(np.float32)[:, :, None]
64+
)
65+
img_plane_z = np.full([height, width, 1], flt_focal, np.float32)
66+
img_plane = np.concatenate([img_plane_x, img_plane_y, img_plane_z], 2)
67+
68+
depth = distance / np.linalg.norm(img_plane, 2, 2) * flt_focal
69+
return depth
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
# Author: Bingxin Ke
2+
# Last modified: 2024-02-19
3+
4+
import argparse
5+
import os
6+
7+
import cv2
8+
import h5py
9+
import numpy as np
10+
import pandas as pd
11+
from hypersim_util import dist_2_depth, tone_map
12+
from tqdm import tqdm
13+
14+
IMG_WIDTH = 1024
15+
IMG_HEIGHT = 768
16+
FOCAL_LENGTH = 886.81
17+
18+
if "__main__" == __name__:
19+
parser = argparse.ArgumentParser()
20+
parser.add_argument(
21+
"--split_csv",
22+
type=str,
23+
default="data/Hypersim/metadata_images_split_scene_v1.csv",
24+
)
25+
parser.add_argument("--dataset_dir", type=str, default="data/Hypersim/raw_data")
26+
parser.add_argument("--output_dir", type=str, default="data/Hypersim/processed")
27+
28+
args = parser.parse_args()
29+
30+
split_csv = args.split_csv
31+
dataset_dir = args.dataset_dir
32+
output_dir = args.output_dir
33+
34+
# %%
35+
raw_meta_df = pd.read_csv(split_csv)
36+
meta_df = raw_meta_df[raw_meta_df.included_in_public_release].copy()
37+
38+
# %%
39+
for split in ["train", "val", "test"]:
40+
split_output_dir = os.path.join(output_dir, split)
41+
os.makedirs(split_output_dir)
42+
43+
split_meta_df = meta_df[meta_df.split_partition_name == split].copy()
44+
split_meta_df["rgb_path"] = None
45+
split_meta_df["rgb_mean"] = np.nan
46+
split_meta_df["rgb_std"] = np.nan
47+
split_meta_df["rgb_min"] = np.nan
48+
split_meta_df["rgb_max"] = np.nan
49+
split_meta_df["depth_path"] = None
50+
split_meta_df["depth_mean"] = np.nan
51+
split_meta_df["depth_std"] = np.nan
52+
split_meta_df["depth_min"] = np.nan
53+
split_meta_df["depth_max"] = np.nan
54+
split_meta_df["invalid_ratio"] = np.nan
55+
56+
for i, row in tqdm(split_meta_df.iterrows(), total=len(split_meta_df)):
57+
# Load data
58+
rgb_path = os.path.join(
59+
row.scene_name,
60+
"images",
61+
f"scene_{row.camera_name}_final_hdf5",
62+
f"frame.{row.frame_id:04d}.color.hdf5",
63+
)
64+
dist_path = os.path.join(
65+
row.scene_name,
66+
"images",
67+
f"scene_{row.camera_name}_geometry_hdf5",
68+
f"frame.{row.frame_id:04d}.depth_meters.hdf5",
69+
)
70+
render_entity_id_path = os.path.join(
71+
row.scene_name,
72+
"images",
73+
f"scene_{row.camera_name}_geometry_hdf5",
74+
f"frame.{row.frame_id:04d}.render_entity_id.hdf5",
75+
)
76+
assert os.path.exists(os.path.join(dataset_dir, rgb_path))
77+
assert os.path.exists(os.path.join(dataset_dir, dist_path))
78+
79+
with h5py.File(os.path.join(dataset_dir, rgb_path), "r") as f:
80+
rgb = np.array(f["dataset"]).astype(float)
81+
with h5py.File(os.path.join(dataset_dir, dist_path), "r") as f:
82+
dist_from_center = np.array(f["dataset"]).astype(float)
83+
with h5py.File(os.path.join(dataset_dir, render_entity_id_path), "r") as f:
84+
render_entity_id = np.array(f["dataset"]).astype(int)
85+
86+
# Tone map
87+
rgb_color_tm = tone_map(rgb, render_entity_id)
88+
rgb_int = (rgb_color_tm * 255).astype(np.uint8) # [H, W, RGB]
89+
90+
# Distance -> depth
91+
plane_depth = dist_2_depth(
92+
IMG_WIDTH, IMG_HEIGHT, FOCAL_LENGTH, dist_from_center
93+
)
94+
valid_mask = render_entity_id != -1
95+
96+
# Record invalid ratio
97+
invalid_ratio = (np.prod(valid_mask.shape) - valid_mask.sum()) / np.prod(
98+
valid_mask.shape
99+
)
100+
plane_depth[~valid_mask] = 0
101+
102+
# Save as png
103+
scene_path = row.scene_name
104+
if not os.path.exists(os.path.join(split_output_dir, row.scene_name)):
105+
os.makedirs(os.path.join(split_output_dir, row.scene_name))
106+
107+
rgb_name = f"rgb_{row.camera_name}_fr{row.frame_id:04d}.png"
108+
rgb_path = os.path.join(scene_path, rgb_name)
109+
cv2.imwrite(
110+
os.path.join(split_output_dir, rgb_path),
111+
cv2.cvtColor(rgb_int, cv2.COLOR_RGB2BGR),
112+
)
113+
114+
plane_depth *= 1000.0
115+
plane_depth = plane_depth.astype(np.uint16)
116+
depth_name = f"depth_plane_{row.camera_name}_fr{row.frame_id:04d}.png"
117+
depth_path = os.path.join(scene_path, depth_name)
118+
cv2.imwrite(os.path.join(split_output_dir, depth_path), plane_depth)
119+
120+
# Meta data
121+
split_meta_df.at[i, "rgb_path"] = rgb_path
122+
split_meta_df.at[i, "rgb_mean"] = np.mean(rgb_int)
123+
split_meta_df.at[i, "rgb_std"] = np.std(rgb_int)
124+
split_meta_df.at[i, "rgb_min"] = np.min(rgb_int)
125+
split_meta_df.at[i, "rgb_max"] = np.max(rgb_int)
126+
127+
split_meta_df.at[i, "depth_path"] = depth_path
128+
restored_depth = plane_depth / 1000.0
129+
split_meta_df.at[i, "depth_mean"] = np.mean(restored_depth)
130+
split_meta_df.at[i, "depth_std"] = np.std(restored_depth)
131+
split_meta_df.at[i, "depth_min"] = np.min(restored_depth)
132+
split_meta_df.at[i, "depth_max"] = np.max(restored_depth)
133+
134+
split_meta_df.at[i, "invalid_ratio"] = invalid_ratio
135+
136+
with open(
137+
os.path.join(split_output_dir, f"filename_list_{split}.txt"), "w+"
138+
) as f:
139+
lines = split_meta_df.apply(
140+
lambda r: f"{r['rgb_path']} {r['depth_path']}", axis=1
141+
).tolist()
142+
f.writelines("\n".join(lines))
143+
144+
with open(
145+
os.path.join(split_output_dir, f"filename_meta_{split}.csv"), "w+"
146+
) as f:
147+
split_meta_df.to_csv(f, header=True)
148+
149+
print("Preprocess finished")

0 commit comments

Comments
 (0)