본문 바로가기

Deep Learning

Image Frustum to Global 3D

반응형
# generate camera frustum
h, w = self.cfg['image']['h'], self.cfg['image']['w']
n_cam, dim, downsampled_h, downsampled_w = feat.size()

# Depth grid
depth_grid = torch.arange(1, 65, 1, dtype=torch.float)
depth_grid = depth_grid.view(-1, 1, 1).expand(-1, downsampled_h, downsampled_w)
n_depth_slices = depth_grid.shape[0]

# x and y grids
x_grid = torch.linspace(0, w - 1, downsampled_w, dtype=torch.float)
x_grid = x_grid.view(1, 1, downsampled_w).expand(n_depth_slices, downsampled_h, downsampled_w)
y_grid = torch.linspace(0, h - 1, downsampled_h, dtype=torch.float)
y_grid = y_grid.view(1, downsampled_h, 1).expand(n_depth_slices, downsampled_h, downsampled_w)

frustum = torch.stack((x_grid, y_grid, depth_grid), -1)  # D H W 3
# frustum = F.pad(frustum, (0, 0, 0, 0, 0, 0, 0, 1), value=1)  # D H W 4
D, H, W, C = frustum.size()
frustum = rearrange(frustum, 'd h w c -> d c (h w)')  # D 3 (H W)
frustum = torch.cat((frustum[:, :2, :] * frustum[:, 2:3, :], frustum[:, 2:3, :]), dim=1) # D 3 (H W)
frustum = frustum[None, None].repeat(1, n_cam, 1, 1, 1) # b n D 3 (H W)

Kinv = I[:, :, None].to('cpu').inverse().repeat(1, 1, D, 1, 1) # b n D 3 3
Rinv = E[:, :, None].to('cpu').inverse().repeat(1, 1, D, 1, 1) # b n D 4 4

camera = Kinv @ frustum # b n D 3 (H W)
camera = F.pad(camera, (0, 0, 0, 1, 0, 0, 0, 0, 0, 0), value=1) # b n D 4 (H W)

ego = Rinv @ camera  # b n D 4 (H W)
ego[:, :, :, 0] = (ego[:, :, :, 0] - ego[:, :, :, 0].min()) / (ego[:, :, :, 0].max() - ego[:, :, :, 0].min())
ego[:, :, :, 1] = (ego[:, :, :, 1] - ego[:, :, :, 1].min()) / (ego[:, :, :, 1].max() - ego[:, :, :, 1].min())
ego[:, :, :, 2] = (ego[:, :, :, 2] - ego[:, :, :, 2].min()) / (ego[:, :, :, 2].max() - ego[:, :, :, 2].min())
ego = rearrange(ego, 'b n d c (h w) -> (b n) (d c) h w', h=H, w=W).to(feat)