Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding multi-camera for the side grayscale fisheye lens and dual-user training support on the Aria glasses #2932

Open
wants to merge 37 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
37a78a3
solved many camera.width issues and AriaImageFrame issues
AntonioMacaronio Feb 20, 2024
0ddfb6b
adding transform.json writing, able to get all image data from VSR file!
AntonioMacaronio Feb 20, 2024
70374b2
adding per image pixel sampling due to different multi-cam feature
AntonioMacaronio Feb 26, 2024
205bcd2
testing many changes to fix extrinsic matrices
AntonioMacaronio Feb 28, 2024
4ea6e55
cleaning up testing
AntonioMacaronio Feb 28, 2024
c4452f6
merge main into branch
AntonioMacaronio Feb 28, 2024
493a103
fixes - anthony's awesome commit
AntonioMacaronio Feb 28, 2024
e63dda1
experimenting with new pinhole idea
AntonioMacaronio Mar 9, 2024
e683148
all 3 camera rendering seems to be doing very well in full effect, ex…
AntonioMacaronio Mar 10, 2024
039d1a4
cleaning up and detailing segfault locations
AntonioMacaronio Mar 20, 2024
c7b16ab
fixing the changes, don't need to upload VRS file
AntonioMacaronio Apr 5, 2024
cf918b4
fixing formatting
AntonioMacaronio Apr 11, 2024
c92f0f9
Merge branch 'main' into aria-side-camera-support
AntonioMacaronio Apr 15, 2024
65eceb7
splatfacto temporary fix, as well as temporary changes to process_pro…
AntonioMacaronio Apr 17, 2024
3a539ac
fixing commit tree with proper changes
AntonioMacaronio Apr 22, 2024
eb6f0f6
contains all print statements and debugging information
AntonioMacaronio Apr 30, 2024
29462ea
cleanup: only full_images_datamanager.py, process_project_aria.py, pi…
AntonioMacaronio Apr 30, 2024
1ec4052
updating branch by merging with main
AntonioMacaronio Apr 30, 2024
b7dd359
testing endline PR
AntonioMacaronio Apr 30, 2024
a10a241
fixing all endlines for PR cleanup
AntonioMacaronio Apr 30, 2024
594b270
fixed shape errors and many bugs, working appearance embedded gsplat …
AntonioMacaronio May 10, 2024
95c7eab
added new variables for multi-user processing
AntonioMacaronio May 24, 2024
3e8a930
supporting dual user aria recordings
AntonioMacaronio May 26, 2024
ead7740
final commit containing Kevin's appearance changes to recreate 5/25/2…
AntonioMacaronio May 26, 2024
b687a53
updating all files for a clean merge
AntonioMacaronio May 26, 2024
fa651df
finishing merged with main
AntonioMacaronio May 26, 2024
9c288b1
formmating and additional code cleanup
AntonioMacaronio May 27, 2024
4edc2e7
sorting imports to pass ruff linter
AntonioMacaronio May 27, 2024
6bed8e1
another cleaning up of imports
AntonioMacaronio May 29, 2024
c92055f
fixed final ruff linting error!
AntonioMacaronio May 29, 2024
6265972
Merge branch 'main' into aria-side-camera-support
AntonioMacaronio May 29, 2024
4121e5d
ran ruff formmatter as well as linter
AntonioMacaronio May 29, 2024
b5024c3
fixing many errors involving PyLance
AntonioMacaronio May 29, 2024
3068a9e
repass ruff formatting
AntonioMacaronio May 29, 2024
7020f8c
repass ruff formatting
AntonioMacaronio May 29, 2024
35c9605
ruff formatting rechecking
AntonioMacaronio May 29, 2024
21b4d58
final pyright error
AntonioMacaronio May 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
238 changes: 186 additions & 52 deletions nerfstudio/scripts/datasets/process_project_aria.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,17 @@
import threading
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, cast
from typing import Any, Dict, List, Optional, cast

import numpy as np
import open3d as o3d
import tyro
from PIL import Image

try:
from projectaria_tools.core import mps
from projectaria_tools.core import calibration, mps
from projectaria_tools.core.data_provider import VrsDataProvider, create_vrs_data_provider
from projectaria_tools.core.image import InterpolationMethod
from projectaria_tools.core.mps.utils import filter_points_from_confidence
from projectaria_tools.core.sophus import SE3
except ImportError:
Expand Down Expand Up @@ -68,6 +69,7 @@ class AriaImageFrame:
file_path: str
t_world_camera: SE3
timestamp_ns: float
pinhole_intrinsic: List[int]


@dataclass
Expand All @@ -76,11 +78,10 @@ class TimedPoses:
t_world_devices: List[SE3]


def get_camera_calibs(provider: VrsDataProvider) -> Dict[str, AriaCameraCalibration]:
def get_camera_calibs(provider: VrsDataProvider, name="camera-rgb") -> AriaCameraCalibration:
"""Retrieve the per-camera factory calibration from within the VRS."""

factory_calib = {}
name = "camera-rgb"
device_calib = provider.get_device_calibration()
assert device_calib is not None, "Could not find device calibration"
sensor_calib = device_calib.get_camera_calib(name)
Expand All @@ -101,7 +102,7 @@ def get_camera_calibs(provider: VrsDataProvider) -> Dict[str, AriaCameraCalibrat
t_device_camera=sensor_calib.get_transform_device_camera(),
)

return factory_calib
return factory_calib[name]


def read_trajectory_csv_to_dict(file_iterable_csv: str) -> TimedPoses:
Expand All @@ -118,24 +119,76 @@ def read_trajectory_csv_to_dict(file_iterable_csv: str) -> TimedPoses:
)


def undistort_fisheye624(provider: VrsDataProvider, sensor_name: str, index: int): # -> List[np.ndarray, tuple]:
"""
Given a VrsDataProvider, and a name of a fisheye624 camera, and index of image in the capture
Returns a nparray representing the image and intrinsic
"""
# Retrieve the image at specified index
sensor_stream_id = provider.get_stream_id_from_label(sensor_name)
assert sensor_stream_id is not None, f"Could not find stream {sensor_name}"
image_data = provider.get_image_data_by_index(sensor_stream_id, index)
image_array = image_data[0].to_numpy_array().astype(np.uint64)

# Retrieve the current camera calibration
device_calib = provider.get_device_calibration()
assert device_calib is not None, "Could not find device calibration"
src_calib = device_calib.get_camera_calib(sensor_name)
assert isinstance(src_calib, calibration.CameraCalibration), "src_calib is not of type CameraCalibration"

# Construct the final camera calibration
f_length = 500 if sensor_name == "camera-rgb" else 170
num_rows, num_cols = image_array.shape[0], image_array.shape[1]
dst_calib = calibration.get_linear_camera_calibration(num_cols, num_rows, f_length, sensor_name)
assert isinstance(dst_calib, calibration.CameraCalibration), "dst_calib is not of type CameraCalibration"

# Undistort the fisheye624 image into a pinhole image
rectified_image = calibration.distort_by_calibration(
image_array, dst_calib, src_calib, InterpolationMethod.BILINEAR
)
"""The linear camera model (a.k.a pinhole model) is parametrized by 4 coefficients : f_x, f_y, c_x, c_y."""
intrinsic = [f_length, f_length, num_cols // 2, num_rows // 2]
return rectified_image, intrinsic


def generate_circular_mask(numRows: int, numCols: int, radius: float):
"""
Generates a mask where a circle in the center of the image with input radius is white (sampled from).
Everything outside the circle is black (masked out)
"""
# Calculate the center coordinates
rows, cols = np.ogrid[:numRows, :numCols]
center_row, center_col = numRows // 2, numCols // 2

# Calculate the distance of each pixel from the center
distance_from_center = np.sqrt((rows - center_row) ** 2 + (cols - center_col) ** 2)
mask = np.zeros((numRows, numCols), dtype=np.uint8)
mask[distance_from_center <= radius] = 1
return mask


def to_aria_image_frame(
provider: VrsDataProvider,
index: int,
name_to_camera: Dict[str, AriaCameraCalibration],
t_world_devices: TimedPoses,
output_dir: Path,
name: str = "camera-rgb",
pinhole: bool = False,
) -> AriaImageFrame:
name = "camera-rgb"

camera_calibration = name_to_camera[name]
aria_camera_calibration = name_to_camera[name]
stream_id = provider.get_stream_id_from_label(name)
assert stream_id is not None, f"Could not find stream {name}"

# Get the image corresponding to this index
image_data = provider.get_image_data_by_index(stream_id, index)
img = Image.fromarray(image_data[0].to_numpy_array())
rectified_img, intrinsic = image_data[0].to_numpy_array(), [0, 0, 0, 0]
if pinhole:
rectified_img, intrinsic = undistort_fisheye624(provider, name, index)
img = Image.fromarray(rectified_img)
capture_time_ns = image_data[1].capture_timestamp_ns

# save the image
file_path = f"{output_dir}/{name}_{capture_time_ns}.jpg"
threading.Thread(target=lambda: img.save(file_path)).start()

Expand All @@ -146,17 +199,31 @@ def to_aria_image_frame(
t_world_device = t_world_devices.t_world_devices[nearest_pose_idx]

# Compute the world to camera transform.
t_world_camera = t_world_device @ camera_calibration.t_device_camera @ T_ARIA_NERFSTUDIO
t_world_camera = t_world_device @ aria_camera_calibration.t_device_camera @ T_ARIA_NERFSTUDIO

return AriaImageFrame(
camera=camera_calibration,
camera=aria_camera_calibration,
file_path=file_path,
t_world_camera=t_world_camera,
timestamp_ns=capture_time_ns,
pinhole_intrinsic=intrinsic,
)


def to_nerfstudio_frame(frame: AriaImageFrame) -> Dict:
def to_nerfstudio_frame(frame: AriaImageFrame, pinhole: bool = False, mask_path: str = "") -> Dict:
if pinhole:
return {
"fl_x": frame.pinhole_intrinsic[0],
"fl_y": frame.pinhole_intrinsic[1],
"cx": frame.pinhole_intrinsic[2],
"cy": frame.pinhole_intrinsic[3],
"w": frame.pinhole_intrinsic[2] * 2,
"h": frame.pinhole_intrinsic[3] * 2,
"file_path": frame.file_path,
"transform_matrix": frame.t_world_camera.to_matrix().tolist(),
"timestamp": frame.timestamp_ns,
"mask_path": mask_path,
}
return {
"fl_x": frame.camera.fx,
"fl_y": frame.camera.fy,
Expand Down Expand Up @@ -184,59 +251,126 @@ class ProcessProjectAria:
"""Path to Project Aria Machine Perception Services (MPS) attachments."""
output_dir: Path
"""Path to the output directory."""
points_file: Optional[Path] = None
"""Path to the point cloud file (usually called semidense_points.csv.gz) if not in the mps_data_dir"""
include_side_cameras: bool = False
"""If True, include and process the images captured by the grayscale side cameras. If False, only uses the main RGB camera's data."""
vrs_file2: Optional[Path] = None
"""Path to the second VRS file if provided"""
mps_data_dir2: Optional[Path] = None
"""Path to the second MPS attachments if provided"""
points_file2: Optional[Path] = None
"""Path to the second point cloud file if provided"""

def main(self) -> None:
"""Generate a nerfstudio dataset from ProjectAria data (VRS) and MPS attachments."""
# Create output directory if it doesn't exist.
self.output_dir = self.output_dir.absolute()
self.output_dir.mkdir(parents=True, exist_ok=True)

provider = create_vrs_data_provider(str(self.vrs_file.absolute()))
assert provider is not None, "Cannot open file"

name_to_camera = get_camera_calibs(provider)

print("Getting poses from closed loop trajectory CSV...")
trajectory_csv = self.mps_data_dir / "closed_loop_trajectory.csv"
t_world_devices = read_trajectory_csv_to_dict(str(trajectory_csv.absolute()))
vrs_mps_points_triplets = [(self.vrs_file, self.mps_data_dir, self.points_file)]
if self.vrs_file2 and self.mps_data_dir2:
vrs_mps_points_triplets.append((self.vrs_file2, self.mps_data_dir2, self.points_file2))

name = "camera-rgb"
stream_id = provider.get_stream_id_from_label(name)

# create an AriaImageFrame for each image in the VRS.
print("Creating Aria frames...")
aria_frames = [
to_aria_image_frame(provider, index, name_to_camera, t_world_devices, self.output_dir)
for index in range(0, provider.get_num_data(stream_id))
]

# create the NerfStudio frames from the AriaImageFrames.
print("Creating NerfStudio frames...")
CANONICAL_RGB_VALID_RADIUS = 707.5
CANONICAL_RGB_WIDTH = 1408
rgb_valid_radius = CANONICAL_RGB_VALID_RADIUS * (aria_frames[0].camera.width / CANONICAL_RGB_WIDTH)
nerfstudio_frames = {
"camera_model": ARIA_CAMERA_MODEL,
"frames": [to_nerfstudio_frame(frame) for frame in aria_frames],
"fisheye_crop_radius": rgb_valid_radius,
"camera_model": "OPENCV" if self.include_side_cameras else ARIA_CAMERA_MODEL,
"frames": [],
}

# save global point cloud, which is useful for Gaussian Splatting.
points_path = self.mps_data_dir / "global_points.csv.gz"
if not points_path.exists():
# MPS point cloud output was renamed in Aria's December 4th, 2023 update.
# https://facebookresearch.github.io/projectaria_tools/docs/ARK/sw_release_notes#project-aria-updates-aria-mobile-app-v140-and-changes-to-mps
points_path = self.mps_data_dir / "semidense_points.csv.gz"

if points_path.exists():
print("Found global points, saving to PLY...")
points_data = mps.read_global_point_cloud(str(points_path)) # type: ignore
points_data = filter_points_from_confidence(points_data)
points = []

for rec_i, (vrs_file, mps_data_dir, points_file) in enumerate(vrs_mps_points_triplets):
provider = create_vrs_data_provider(str(vrs_file.absolute()))
assert provider is not None, "Cannot open file"

names = ["camera-rgb", "camera-slam-left", "camera-slam-right"]
name_to_camera = {
name: get_camera_calibs(provider, name) for name in names
} # name_to_camera is of type Dict[str, AriaCameraCalibration]

print(f"Getting poses from recording {rec_i + 1}'s closed loop trajectory CSV...")
trajectory_csv = mps_data_dir / "closed_loop_trajectory.csv"
t_world_devices = read_trajectory_csv_to_dict(str(trajectory_csv.absolute()))

stream_ids = [provider.get_stream_id_from_label(name) for name in names]

# create an AriaImageFrame for each image in the VRS.
print(f"Creating Aria frames for recording {rec_i + 1}...")
CANONICAL_RGB_VALID_RADIUS = 707.5 # radius of a circular mask that represents the valid area on the camera's sensor plane. Pixels out of this circular region are considered invalid
CANONICAL_RGB_WIDTH = 1408
if not self.include_side_cameras:
aria_rgb_frames = [
to_aria_image_frame(
provider, index, name_to_camera, t_world_devices, self.output_dir, name=names[0]
)
for index in range(0, provider.get_num_data(stream_ids[0]))
]
print(f"Creating NerfStudio frames for recording {rec_i + 1}...")
nerfstudio_frames["frames"] += [to_nerfstudio_frame(frame) for frame in aria_rgb_frames]
rgb_valid_radius = CANONICAL_RGB_VALID_RADIUS * (
aria_rgb_frames[0].camera.width / CANONICAL_RGB_WIDTH
) # to handle both high-res 2880 x 2880 aria captures
nerfstudio_frames["fisheye_crop_radius"] = rgb_valid_radius
else:
aria_all3cameras_pinhole_frames = [
[
to_aria_image_frame(
provider,
index,
name_to_camera,
t_world_devices,
self.output_dir,
name=names[i],
pinhole=True,
)
for index in range(0, provider.get_num_data(stream_id))
]
for i, stream_id in enumerate(stream_ids)
]
# generate masks for undistorted images
rgb_width = aria_all3cameras_pinhole_frames[0][0].camera.width
rgb_valid_radius = CANONICAL_RGB_VALID_RADIUS * (rgb_width / CANONICAL_RGB_WIDTH)
slam_valid_radius = 330.0 # found here: https://github.com/facebookresearch/projectaria_tools/blob/4aee633cb667ab927825dc10477cad0df8393a34/core/calibration/loader/SensorCalibrationJson.cpp#L102C5-L104C18
rgb_mask_nparray, slam_mask_nparray = (
generate_circular_mask(rgb_width, rgb_width, rgb_valid_radius),
generate_circular_mask(480, 640, slam_valid_radius),
)
rgb_mask_filepath, slam_mask_filepath = (
f"{self.output_dir}/rgb_mask.jpg",
f"{self.output_dir}/slam_mask.jpg",
)
Image.fromarray(rgb_mask_nparray).save(rgb_mask_filepath)
Image.fromarray(slam_mask_nparray).save(slam_mask_filepath)

print(f"Creating NerfStudio frames for recording {rec_i + 1}...")
mask_filepaths = [rgb_mask_filepath, slam_mask_filepath, slam_mask_filepath]
pinhole_frames = [
to_nerfstudio_frame(frame, pinhole=True, mask_path=mask_filepath)
for i, mask_filepath in enumerate(mask_filepaths)
for frame in aria_all3cameras_pinhole_frames[i]
]
nerfstudio_frames["frames"] += pinhole_frames

if points_file:
points_path = points_file
else:
points_path = mps_data_dir / "global_points.csv.gz"
if not points_path.exists():
# MPS point cloud output was renamed in Aria's December 4th, 2023 update.
# https://facebookresearch.github.io/projectaria_tools/docs/ARK/sw_release_notes#project-aria-updates-aria-mobile-app-v140-and-changes-to-mps
points_path = mps_data_dir / "semidense_points.csv.gz"

if points_path.exists():
print(f"Found global points for recording {rec_i+1}")
points_data = mps.read_global_point_cloud(str(points_path)) # type: ignore
points_data = filter_points_from_confidence(points_data)
points += [cast(Any, it).position_world for it in points_data]

if points:
print("Saving found points to PLY...")
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(np.array([cast(Any, it).position_world for it in points_data]))
pcd.points = o3d.utility.Vector3dVector(np.array(points))
ply_file_path = self.output_dir / "global_points.ply"
o3d.io.write_point_cloud(str(ply_file_path), pcd)

nerfstudio_frames["ply_file_path"] = "global_points.ply"
else:
print("No global points found!")
Expand Down