File size: 9,879 Bytes
8bd45de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#!/usr/bin/env python3
# --------------------------------------------------------
# Script to pre-process the aria-ase dataset
# Usage:
# 1. Prepare the codebase and environment for the projectaria_tools
# 2. copy this script to the project root directory
# 3. Run the script
# --------------------------------------------------------
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
from pathlib import Path
import os
from PIL import Image
from scipy.spatial.transform import Rotation as R
from projectaria_tools.projects import ase
from projectaria_tools.core import data_provider, calibration
from projectaria_tools.core.image import InterpolationMethod
from projects.AriaSyntheticEnvironment.tutorial.code_snippets.readers import read_trajectory_file
import cv2
from tqdm import tqdm
import os, sys, json
import open3d as o3d
import random


def save_pointcloud(points_3d_array, rgb ,pcd_name):
    # Flatten the instance values array
    rgb_values_flat = rgb

    # Check if the number of points matches the number of instance values
    assert points_3d_array.shape[0] == rgb_values_flat.shape[0], "The number of points must match the number of instance values"

    # Create an Open3D point cloud object
    pcd = o3d.geometry.PointCloud()

    # Assign the 3D points to the point cloud object
    pcd.points = o3d.utility.Vector3dVector(points_3d_array)

    # Assign the colors to the point cloud
    pcd.colors = o3d.utility.Vector3dVector(rgb_values_flat / 255.0)  # Normalize colors to [0, 1]

    # Define the file path where you want to save the point cloud
    output_file_path = pcd_name+'.pcd'

    # Save the point cloud in PCD format
    o3d.io.write_point_cloud(output_file_path, pcd)

    print(f"Point cloud saved to {output_file_path}")


def unproject(camera_params, undistorted_depth,undistorted_rgb):
    # Get the height and width of the depth image
    height, width = undistorted_depth.shape

    # Generate pixel coordinates
    y, x = np.indices((height, width))
    pixel_coords = np.stack((x, y), axis=-1).reshape(-1, 2)

    # Flatten the depth image to create a 1D array of depth values
    depth_values_flat = undistorted_depth.flatten()
    rgb_values_flat = undistorted_rgb.reshape(-1,3)

    # Initialize an array to store 3D points
    points_3d = []
    valid_rgb = []

    for pixel_coord, depth, rgb in zip(pixel_coords, depth_values_flat, rgb_values_flat):
        # Format the pixel coordinate for unproject (reshape to [2, 1])
        pixel_coord_reshaped = np.array([[pixel_coord[0]], [pixel_coord[1]]], dtype=np.float64)

        # Unproject the pixel to get the direction vector (ray)
        #   direction_vector = device.unproject(pixel_coord_reshaped)
        X = (pixel_coord_reshaped[0] - camera_params[2]) / camera_params[0] # X = (u - cx) / fx
        Y = (pixel_coord_reshaped[1] - camera_params[3]) / camera_params[1] # Y = (v - cy) / fy
        direction_vector = np.array([X[0], Y[0], 1],dtype=np.float32)
        if direction_vector is not None:
            # Replace the z-value of the direction vector with the depth value
            # Assuming the direction vector is normalized
            direction_vector_normalized = direction_vector / np.linalg.norm(direction_vector)
            point_3d = direction_vector_normalized * (depth / 1000)

            # Append the computed 3D point and the corresponding instance
            points_3d.append(point_3d.flatten())
            valid_rgb.append(rgb)

    # Convert the list of 3D points to a numpy array
    points_3d_array = np.array(points_3d)
    points_rgb = np.array(valid_rgb)
    return points_3d_array,points_rgb

def distance_to_depth(K, dist, uv=None):
    if uv is None and len(dist.shape) >= 2:
        # create mesh grid according to d
        uv = np.stack(np.meshgrid(np.arange(dist.shape[1]), np.arange(dist.shape[0])), -1)
        uv = uv.reshape(-1, 2)
        dist = dist.reshape(-1)
        if not isinstance(dist, np.ndarray):
            import torch
            uv = torch.from_numpy(uv).to(dist)
    if isinstance(dist, np.ndarray):
        # z * np.sqrt(x_temp**2+y_temp**2+z_temp**2) = dist
        uvh = np.concatenate([uv, np.ones((len(uv), 1))], -1)
        uvh = uvh.T # N, 3
        temp_point = np.linalg.inv(K) @ uvh # 3, N  
        temp_point = temp_point.T # N, 3
        z = dist / np.linalg.norm(temp_point, axis=1)
    else:
        uvh = torch.cat([uv, torch.ones(len(uv), 1).to(uv)], -1)
        temp_point = torch.inverse(K) @ uvh
        z = dist / torch.linalg.norm(temp_point, dim=1)
    return z

def transform_3d_points(transform, points):
    N = len(points)
    points_h = np.concatenate([points, np.ones((N, 1))], axis=1)
    transformed_points_h = (transform @ points_h.T).T
    transformed_points = transformed_points_h[:, :-1]
    return transformed_points


def aria_export_to_scannet(scene_id, seed):
    random.seed(int(seed + scene_id))
    src_folder = Path("ase_raw/"+str(scene_id))
    trgt_folder = Path("ase_processed/"+str(scene_id))
    trgt_folder.mkdir(parents=True, exist_ok=True)
    SCENE_ID = src_folder.stem
    print("SCENE_ID:", SCENE_ID)

    scene_max_depth = 0
    scene_min_depth = np.inf
    Path(trgt_folder, "intrinsic").mkdir(exist_ok=True)
    Path(trgt_folder, "pose").mkdir(exist_ok=True)
    Path(trgt_folder, "depth").mkdir(exist_ok=True)
    Path(trgt_folder, "color").mkdir(exist_ok=True)

    rgb_dir = src_folder / "rgb"
    depth_dir = src_folder / "depth"
    # Load camera calibration
    device = ase.get_ase_rgb_calibration()
    # Load the trajectory using read_trajectory_file() 
    trajectory_path = src_folder / "trajectory.csv"
    trajectory = read_trajectory_file(trajectory_path)
    all_points_3d = []
    all_rgb = []
    num_frames = len(list(rgb_dir.glob("*.jpg")))
    # Path('./debug').mkdir(exist_ok=True)
    for frame_idx in tqdm(range(num_frames)):   
        frame_id = str(frame_idx).zfill(7)
        rgb_path = rgb_dir / f"vignette{frame_id}.jpg"
        depth_path = depth_dir / f"depth{frame_id}.png"
        depth = Image.open(depth_path) # uint16        
        rgb = cv2.imread(str(rgb_path), cv2.IMREAD_UNCHANGED)
        depth = np.array(depth)
        scene_min_depth = min(depth.min(), scene_min_depth)
        inf_value = np.iinfo(np.array(depth).dtype).max
        depth[depth == inf_value] = 0 # consider it as invalid, inplace with 0
        T_world_from_device = trajectory["Ts_world_from_device"][frame_idx] # camera-to-world
        assert device.get_image_size()[0] == 704
        # https://facebookresearch.github.io/projectaria_tools/docs/data_utilities/advanced_code_snippets/image_utilities
        focal_length = device.get_focal_lengths()[0]
        pinhole = calibration.get_linear_camera_calibration(
            512,
            512,
            focal_length,
            "camera-rgb",
            device.get_transform_device_camera() # important to get correct transformation matrix in pinhole_cw90
            )
        # distort image
        rectified_rgb = calibration.distort_by_calibration(np.array(rgb), pinhole, device, InterpolationMethod.BILINEAR)
        # raw_image = np.array(depth) # Will not work
        depth = np.array(depth).astype(np.float32) # WILL WORK
        rectified_depth = calibration.distort_by_calibration(depth, pinhole, device)
        
        rotated_image = np.rot90(rectified_rgb, k=3)
        rotated_depth = np.rot90(rectified_depth, k=3)

        cv2.imwrite(str(Path(trgt_folder, "color", f"{frame_id}.jpg")), rotated_image)
        # # TODO: check this
        # plt.imsave(Path(f"./debug/debug_undistort_{frame_id}.png"), np.uint16(rotated_depth), cmap="plasma")
        # Get rotated image calibration
        pinhole_cw90 = calibration.rotate_camera_calib_cw90deg(pinhole)
        principal = pinhole_cw90.get_principal_point()
        cx, cy = principal[0], principal[1]
        focal_lengths = pinhole_cw90.get_focal_lengths()
        fx, fy = focal_lengths 
        K = np.array([ # camera-to-pixel
            [fx, 0, cx],
            [0, fy, cy],
            [0, 0, 1.0]])

        c2w = T_world_from_device 
        c2w_rotation = pinhole_cw90.get_transform_device_camera().to_matrix()
        c2w_final = c2w @ c2w_rotation   # right-matmul!
        cam2world = c2w_final
        
        # save depth
        rotated_depth = np.uint16(rotated_depth)
        depth_image = Image.fromarray(rotated_depth, mode='I;16')
        depth_image.save(str(Path(trgt_folder, "depth", f"{frame_id}.png")))
        # for debug; load depth and convert to pointcloud
        # depth_image = np.array(Image.open(str(Path(trgt_folder, "depth", f"{frame_id}.png"))), dtype=np.uint16)
        # points_3d_array, points_rgb = unproject((fx, fy, cx, cy), depth_image, rotated_image)
        # points_3d_world = transform_3d_points(cam2world, points_3d_array)
        # all_points_3d.append(points_3d_world)
        # all_rgb.append(points_rgb)
        # distance-to-depth
        # rotated_depth = distance_to_depth(K, rotated_depth).reshape((rotated_depth.shape[0], rotated_depth.shape[1]))#.reshape((dpt.shape[0], dpt.shape[1]))        

        Path(trgt_folder, "intrinsic", "intrinsic_color.txt").write_text(f"""{K[0][0]} {K[0][1]} {K[0][2]} 0.00\n{K[1][0]} {K[1][1]} {K[1][2]} 0.00\n{K[2][0]} {K[2][1]} {K[2][2]} 0.00\n0.00 0.00 0.00 1.00""")
        Path(trgt_folder, "pose", f"{frame_id}.txt").write_text(f"""{cam2world[0, 0]} {cam2world[0, 1]} {cam2world[0, 2]} {cam2world[0, 3]}\n{cam2world[1, 0]} {cam2world[1, 1]} {cam2world[1, 2]} {cam2world[1, 3]}\n{cam2world[2, 0]} {cam2world[2, 1]} {cam2world[2, 2]} {cam2world[2, 3]}\n0.00 0.00 0.00 1.00""")
    


if __name__ == "__main__": 
    seed = 42   
    for scene_id in tqdm(range(0, 500)):
        aria_export_to_scannet(scene_id=scene_id, seed = seed)