Skip to content

Commit

Permalink
nuScenes KITTI 2d boxes (nutonomy#158)
Browse files Browse the repository at this point in the history
* Convert 3d boxes to 2d for kitti

* Bug fix with swapped sign

* Ignore boxes that are behind the camera

* Fix wrong type hint

* Fix order of statements

* Return none if no corners in image

* Added assertion for debugging and shortened output precision

* Fix cropping and ignore boxes that don't fall into the image

* Tools to plot 2d boxes

* Debug output

* Fleshed out comments

* Added disclaimer to 3d-2d projection
  • Loading branch information
holger-motional authored May 20, 2019
1 parent 5ec8a39 commit 57de64c
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 43 deletions.
9 changes: 7 additions & 2 deletions python-sdk/nuscenes/scripts/export_2d_annotations_as_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
# Licensed under the Creative Commons [see license.txt]

"""
Export 2D annotations (xmin, ymin,xmax, ymax) from re-projections of our annotated 3D bounding boxes to a .json file.
Export 2D annotations (xmin, ymin, xmax, ymax) from re-projections of our annotated 3D bounding boxes to a .json file.
Note: Projecting tight 3d boxes to 2d generally leads to non-tight boxes.
Furthermore it is non-trivial to determine whether a box falls into the image, rather than behind or around it.
Finally some of the objects may be occluded by other objects, in particular when the lidar can see them, but the
cameras cannot.
"""

from nuscenes.nuscenes import NuScenes
Expand Down Expand Up @@ -138,7 +143,7 @@ def get_2d_boxes(sample_data_token: str, visibilities: List[str]) -> List[Ordere
in_front = np.argwhere(corners_3d[2, :] > 0).flatten()
corners_3d = corners_3d[:, in_front]

# Applying the re-projection algorithm post-processing step..
# Applying the re-projection algorithm post-processing step.
corner_coords = view_points(corners_3d, camera_intrinsic, True).T[:, :2].tolist()
final_coords = post_process_coords(corner_coords)

Expand Down
33 changes: 25 additions & 8 deletions python-sdk/nuscenes/scripts/export_kitti.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@
To launch these scripts run:
- python export_kitti.py nuscenes_gt_to_kitti --nusc_kitti_dir ~/nusc_kitti
- python export_kitti.py render_kitti --nusc_kitti_dir ~/nusc_kitti
- python export_kitti.py render_kitti --nusc_kitti_dir ~/nusc_kitti --render_2d False
- python export_kitti.py kitti_res_to_nuscenes --nusc_kitti_dir ~/nusc_kitti
Note: The parameter --render_2d specifies whether to draw 2d or 3d boxes.
To work with the original KITTI dataset, use these parameters:
--nusc_kitti_dir /data/sets/kitti --split training
Expand Down Expand Up @@ -90,6 +91,7 @@ def nuscenes_gt_to_kitti(self) -> None:
"""
kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2)
kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse
imsize = (1600, 900)

token_idx = 0 # Start tokens from 0.

Expand Down Expand Up @@ -216,7 +218,7 @@ def nuscenes_gt_to_kitti(self) -> None:
# Truncated: Set all objects to 0 which means untruncated.
truncated = 0.0

# Occluded: Hard-coded: Full visibility.
# Occluded: Set all objects to full visibility as this information is not available in nuScenes.
occluded = 0

# Convert nuScenes category to nuScenes detection challenge category.
Expand All @@ -226,20 +228,35 @@ def nuscenes_gt_to_kitti(self) -> None:
if detection_name is None:
continue

# Convert to KITTI 3d and 2d box and KITTI output format.
# Convert from nuScenes to KITTI box format.
box_cam_kitti = KittiDB.box_nuscenes_to_kitti(
box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect)
box_cam_kitti.score = 0 # Set dummy score so we can use this file as result.
output = KittiDB.box_to_string(name=detection_name, box=box_cam_kitti, truncation=truncated,
occlusion=occluded)

# Project 3d box to 2d box in image, ignore box if it does not fall inside.
bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti, p_left_kitti, imsize=imsize)
if bbox_2d is None:
continue

# Set dummy score so we can use this file as result.
box_cam_kitti.score = 0

# Convert box to output string format.
output = KittiDB.box_to_string(name=detection_name, box=box_cam_kitti, bbox_2d=bbox_2d,
truncation=truncated, occlusion=occluded)

# Write to disk.
label_file.write(output + '\n')

def render_kitti(self) -> None:
def render_kitti(self, render_2d: bool) -> None:
"""
Renders the annotations in the KITTI dataset from a lidar and a camera view.
:param render_2d: Whether to render 2d boxes (only works for camera data).
"""
if render_2d:
print('Rendering 2d boxes from KITTI format')
else:
print('Rendering 3d boxes projected from 3d KITTI format')

# Load the KITTI dataset.
kitti = KittiDB(root=self.nusc_kitti_dir, splits=(self.split,))

Expand All @@ -253,7 +270,7 @@ def render_kitti(self) -> None:
for sensor in ['lidar', 'camera']:
out_path = os.path.join(render_dir, '%s_%s.png' % (token, sensor))
print('Rendering file to disk: %s' % out_path)
kitti.render_sample_data(token, sensor_modality=sensor, out_path=out_path)
kitti.render_sample_data(token, sensor_modality=sensor, out_path=out_path, render_2d=render_2d)
plt.close() # Close the windows to avoid a warning of too many open windows.

def kitti_res_to_nuscenes(self, meta: Dict[str, bool] = None) -> None:
Expand Down
124 changes: 91 additions & 33 deletions python-sdk/nuscenes/utils/kitti.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import os
from os import path as osp
from typing import List, Tuple, Any
from typing import List, Tuple, Any, Union

import matplotlib.pyplot as plt
import numpy as np
Expand Down Expand Up @@ -153,13 +153,14 @@ def box_nuscenes_to_kitti(box: Box, velo_to_cam_rot: Quaternion,
return box

@staticmethod
def project_kitti_box_to_image(box: Box, p_left: np.ndarray, imsize: Tuple[int, int]) -> Tuple[int, int, int, int]:
def project_kitti_box_to_image(box: Box, p_left: np.ndarray, imsize: Tuple[int, int]) \
-> Union[None, Tuple[int, int, int, int]]:
"""
Projects 3D box into KITTI image FOV.
:param box: 3D box in KITTI reference frame.
:param p_left: <np.float: 3, 4>. Projection matrix.
:param imsize: (width , height). Image size.
:return: (xmin, ymin, xmax, ymax). Bounding box in image plane.
:param imsize: (width, height). Image size.
:return: (xmin, ymin, xmax, ymax). Bounding box in image plane or None if box is not in the image.
"""

# Create a new box.
Expand All @@ -169,13 +170,25 @@ def project_kitti_box_to_image(box: Box, p_left: np.ndarray, imsize: Tuple[int,
# We use the true center, so we need to adjust half height in negative y direction.
box.translate(np.array([0, -box.wlh[2] / 2, 0]))

# Project corners to 2d to get bbox in pixel coords.
# Check that some corners are inside the image.
corners = np.array([corner for corner in box.corners().T if corner[2] > 0]).T
if len(corners) == 0:
return None

# Project corners that are in front of the camera to 2d to get bbox in pixel coords.
imcorners = view_points(corners, p_left, normalize=True)[:2]
bbox = (np.min(imcorners[0]), np.min(imcorners[1]), np.max(imcorners[0]), np.max(imcorners[1]))

# Crop bbox to prevent it extending outside image
bbox_crop = (max(0, bbox[0]), max(0, bbox[1]), min(imsize[0], bbox[2]), min(imsize[1], bbox[3]))
# Crop bbox to prevent it extending outside image.
bbox_crop = tuple(max(0, b) for b in bbox)
bbox_crop = (min(imsize[0], bbox_crop[0]),
min(imsize[0], bbox_crop[1]),
min(imsize[0], bbox_crop[2]),
min(imsize[1], bbox_crop[3]))

# Detect if a cropped box is empty.
if bbox_crop[0] >= bbox_crop[2] or bbox_crop[1] >= bbox_crop[3]:
return None

return bbox_crop

Expand Down Expand Up @@ -275,7 +288,6 @@ def get_boxes(self,
return boxes

with open(KittiDB.get_filepath(token, 'label_2', root=self.root), 'r') as f:

for line in f:
# Parse this line into box information.
parsed_line = self.parse_label_line(line)
Expand Down Expand Up @@ -333,18 +345,51 @@ def get_boxes(self,

return boxes

def get_boxes_2d(self,
token: str,
filter_classes: List[str] = None) -> Tuple[
List[Tuple[float, float, float, float]],
List[str]
]:
"""
Get the 2d boxes associated with a sample.
:return: A list of boxes in KITTI format (xmin, ymin, xmax, ymax) and a list of the class names.
"""
boxes = []
names = []
with open(KittiDB.get_filepath(token, 'label_2', root=self.root), 'r') as f:
for line in f:
# Parse this line into box information.
parsed_line = self.parse_label_line(line)

if parsed_line['name'] in {'DontCare', 'Misc'}:
continue

bbox_2d = parsed_line['bbox_camera']
name = parsed_line['name']

# Optional: Filter classes.
if filter_classes is not None and name not in filter_classes:
continue

boxes.append(bbox_2d)
names.append(name)
return boxes, names


@staticmethod
def box_to_string(name: str,
box: Box,
bbox: Tuple[float, float, float, float] = (-1.0, -1.0, -1.0, -1.0),
bbox_2d: Tuple[float, float, float, float] = (-1.0, -1.0, -1.0, -1.0),
truncation: float = -1.0,
occlusion: int = -1,
alpha: float = -10.0) -> str:
"""
Convert box in KITTI image frame to official label string fromat.
:param name: KITTI name of the box.
:param box: Box class in KITTI image frame.
:param bbox: Optional, 2D bounding box obtained by projected Box into image. Otherwise set to KITTI default.
:param bbox_2d: Optional, 2D bounding box obtained by projected Box into image (xmin, ymin, xmax, ymax).
Otherwise set to KITTI default.
:param truncation: Optional truncation, otherwise set to KITTI default.
:param occlusion: Optional occlusion, otherwise set to KITTI default.
:param alpha: Optional alpha, otherwise set to KITTI default.
Expand All @@ -356,13 +401,13 @@ def box_to_string(name: str,

# Prepare output.
name += ' '
trunc = '{:.3f} '.format(truncation)
trunc = '{:.2f} '.format(truncation)
occ = '{:d} '.format(occlusion)
a = '{:.3f} '.format(alpha)
bb = '{:.3f} {:.3f} {:.3f} {:.3f} '.format(bbox[0], bbox[1], bbox[2], bbox[3]) # bbox (xmin, ymin, xmax, ymax).
hwl = '{:.3f} {:.3f} {:.3f} '.format(box.wlh[2], box.wlh[0], box.wlh[1]) # height, width, length.
xyz = '{:.3f} {:.3f} {:.3f} '.format(box.center[0], box.center[1], box.center[2]) # x, y, z.
y = '{:.3f}'.format(yaw) # Yaw angle.
a = '{:.2f} '.format(alpha)
bb = '{:.2f} {:.2f} {:.2f} {:.2f} '.format(bbox_2d[0], bbox_2d[1], bbox_2d[2], bbox_2d[3])
hwl = '{:.2} {:.2f} {:.2f} '.format(box.wlh[2], box.wlh[0], box.wlh[1]) # height, width, length.
xyz = '{:.2f} {:.2f} {:.2f} '.format(box.center[0], box.center[1], box.center[2]) # x, y, z.
y = '{:.2f}'.format(yaw) # Yaw angle.
s = ' {:.4f}'.format(box.score) # Classification score.

output = name + trunc + occ + a + bb + hwl + xyz + y
Expand Down Expand Up @@ -407,7 +452,8 @@ def render_sample_data(self,
box_linewidth: int = 2,
filter_classes: List[str] = None,
max_dist: float = None,
out_path: str = None) -> None:
out_path: str = None,
render_2d: bool = False) -> None:
"""
Render sample data onto axis. Visualizes lidar in nuScenes lidar frame and camera in camera frame.
:param token: KITTI token.
Expand All @@ -422,6 +468,7 @@ def render_sample_data(self,
:param filter_classes: Optionally filter the classes to render.
:param max_dist: Maximum distance in m to still draw a box.
:param out_path: Optional path to save the rendered figure to disk.
:param render_2d: Whether to render 2d boxes (only works for camera data).
"""
# Default settings.
if color_func is None:
Expand Down Expand Up @@ -456,7 +503,6 @@ def render_sample_data(self,
box.render(ax, view=view_3d, colors=(color, color, 'k'), linewidth=box_linewidth)

elif sensor_modality == 'camera':
transforms = self.get_transforms(token, self.root)
im_path = KittiDB.get_filepath(token, 'image_2', root=self.root)
im = Image.open(im_path)

Expand All @@ -469,21 +515,33 @@ def render_sample_data(self,
ax.set_ylim(im.size[1], 0)

if with_anns:
for box in boxes:
# Undo the transformations in get_boxes() to get back to the camera frame.
box.rotate(self.kitti_to_nu_lidar_inv) # In KITTI lidar frame.
box.rotate(Quaternion(matrix=transforms['velo_to_cam']['R']))
box.translate(transforms['velo_to_cam']['T']) # In KITTI camera frame, un-rectified.
box.rotate(Quaternion(matrix=transforms['r0_rect'])) # In KITTI camera frame, rectified.

# Filter boxes outside the image (relevant when visualizing nuScenes data in KITTI format).
if not box_in_image(box, transforms['p_left'][:3, :3], im.size, vis_level=BoxVisibility.ANY):
continue

# Render.
color = np.array(color_func(box.name)) / 255
box.render(ax, view=transforms['p_left'][:3, :3], normalize=True, colors=(color, color, 'k'),
linewidth=box_linewidth)
if render_2d:
# Use KITTI's 2d boxes.
boxes_2d, names = self.get_boxes_2d(token, filter_classes=filter_classes)
for box, name in zip(boxes_2d, names):
color = np.array(color_func(name)) / 255
ax.plot([box[0], box[0]], [box[1], box[3]], color=color, linewidth=box_linewidth)
ax.plot([box[2], box[2]], [box[1], box[3]], color=color, linewidth=box_linewidth)
ax.plot([box[0], box[2]], [box[1], box[1]], color=color, linewidth=box_linewidth)
ax.plot([box[0], box[2]], [box[3], box[3]], color=color, linewidth=box_linewidth)
else:
# Project 3d boxes to 2d.
transforms = self.get_transforms(token, self.root)
for box in boxes:
# Undo the transformations in get_boxes() to get back to the camera frame.
box.rotate(self.kitti_to_nu_lidar_inv) # In KITTI lidar frame.
box.rotate(Quaternion(matrix=transforms['velo_to_cam']['R']))
box.translate(transforms['velo_to_cam']['T']) # In KITTI camera frame, un-rectified.
box.rotate(Quaternion(matrix=transforms['r0_rect'])) # In KITTI camera frame, rectified.

# Filter boxes outside the image (relevant when visualizing nuScenes data in KITTI format).
if not box_in_image(box, transforms['p_left'][:3, :3], im.size, vis_level=BoxVisibility.ANY):
continue

# Render.
color = np.array(color_func(box.name)) / 255
box.render(ax, view=transforms['p_left'][:3, :3], normalize=True, colors=(color, color, 'k'),
linewidth=box_linewidth)
else:
raise ValueError("Unrecognized modality {}.".format(sensor_modality))

Expand Down

0 comments on commit 57de64c

Please sign in to comment.