-
Notifications
You must be signed in to change notification settings - Fork 644
/
Copy pathexport_2d_annotations_as_json.py
207 lines (162 loc) · 8.3 KB
/
export_2d_annotations_as_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# nuScenes dev-kit.
# Code written by Sergi Adipraja Widjaja, 2019.
"""
Export 2D annotations (xmin, ymin, xmax, ymax) from re-projections of our annotated 3D bounding boxes to a .json file.
Note: Projecting tight 3d boxes to 2d generally leads to non-tight boxes.
Furthermore it is non-trivial to determine whether a box falls into the image, rather than behind or around it.
Finally some of the objects may be occluded by other objects, in particular when the lidar can see them, but the
cameras cannot.
"""
import argparse
import json
import os
from collections import OrderedDict
from typing import List, Tuple, Union
import numpy as np
from pyquaternion.quaternion import Quaternion
from shapely.geometry import MultiPoint, box
from tqdm import tqdm
from nuscenes.nuscenes import NuScenes
from nuscenes.utils.geometry_utils import view_points
def post_process_coords(corner_coords: List,
imsize: Tuple[int, int] = (1600, 900)) -> Union[Tuple[float, float, float, float], None]:
"""
Get the intersection of the convex hull of the reprojected bbox corners and the image canvas, return None if no
intersection.
:param corner_coords: Corner coordinates of reprojected bounding box.
:param imsize: Size of the image canvas.
:return: Intersection of the convex hull of the 2D box corners and the image canvas.
"""
polygon_from_2d_box = MultiPoint(corner_coords).convex_hull
img_canvas = box(0, 0, imsize[0], imsize[1])
if polygon_from_2d_box.intersects(img_canvas):
img_intersection = polygon_from_2d_box.intersection(img_canvas)
intersection_coords = np.array([coord for coord in img_intersection.exterior.coords])
min_x = min(intersection_coords[:, 0])
min_y = min(intersection_coords[:, 1])
max_x = max(intersection_coords[:, 0])
max_y = max(intersection_coords[:, 1])
return min_x, min_y, max_x, max_y
else:
return None
def generate_record(ann_rec: dict,
x1: float,
y1: float,
x2: float,
y2: float,
sample_data_token: str,
filename: str) -> OrderedDict:
"""
Generate one 2D annotation record given various informations on top of the 2D bounding box coordinates.
:param ann_rec: Original 3d annotation record.
:param x1: Minimum value of the x coordinate.
:param y1: Minimum value of the y coordinate.
:param x2: Maximum value of the x coordinate.
:param y2: Maximum value of the y coordinate.
:param sample_data_token: Sample data token.
:param filename:The corresponding image file where the annotation is present.
:return: A sample 2D annotation record.
"""
repro_rec = OrderedDict()
repro_rec['sample_data_token'] = sample_data_token
relevant_keys = [
'attribute_tokens',
'category_name',
'instance_token',
'next',
'num_lidar_pts',
'num_radar_pts',
'prev',
'sample_annotation_token',
'sample_data_token',
'visibility_token',
]
for key, value in ann_rec.items():
if key in relevant_keys:
repro_rec[key] = value
repro_rec['bbox_corners'] = [x1, y1, x2, y2]
repro_rec['filename'] = filename
return repro_rec
def get_2d_boxes(sample_data_token: str, visibilities: List[str]) -> List[OrderedDict]:
"""
Get the 2D annotation records for a given `sample_data_token`.
:param sample_data_token: Sample data token belonging to a camera keyframe.
:param visibilities: Visibility filter.
:return: List of 2D annotation record that belongs to the input `sample_data_token`
"""
# Get the sample data and the sample corresponding to that sample data.
sd_rec = nusc.get('sample_data', sample_data_token)
assert sd_rec['sensor_modality'] == 'camera', 'Error: get_2d_boxes only works for camera sample_data!'
if not sd_rec['is_key_frame']:
raise ValueError('The 2D re-projections are available only for keyframes.')
s_rec = nusc.get('sample', sd_rec['sample_token'])
# Get the calibrated sensor and ego pose record to get the transformation matrices.
cs_rec = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token'])
pose_rec = nusc.get('ego_pose', sd_rec['ego_pose_token'])
camera_intrinsic = np.array(cs_rec['camera_intrinsic'])
# Get all the annotation with the specified visibilties.
ann_recs = [nusc.get('sample_annotation', token) for token in s_rec['anns']]
ann_recs = [ann_rec for ann_rec in ann_recs if (ann_rec['visibility_token'] in visibilities)]
repro_recs = []
for ann_rec in ann_recs:
# Augment sample_annotation with token information.
ann_rec['sample_annotation_token'] = ann_rec['token']
ann_rec['sample_data_token'] = sample_data_token
# Get the box in global coordinates.
box = nusc.get_box(ann_rec['token'])
# Move them to the ego-pose frame.
box.translate(-np.array(pose_rec['translation']))
box.rotate(Quaternion(pose_rec['rotation']).inverse)
# Move them to the calibrated sensor frame.
box.translate(-np.array(cs_rec['translation']))
box.rotate(Quaternion(cs_rec['rotation']).inverse)
# Filter out the corners that are not in front of the calibrated sensor.
corners_3d = box.corners()
in_front = np.argwhere(corners_3d[2, :] > 0).flatten()
corners_3d = corners_3d[:, in_front]
# Project 3d box to 2d.
corner_coords = view_points(corners_3d, camera_intrinsic, True).T[:, :2].tolist()
# Keep only corners that fall within the image.
final_coords = post_process_coords(corner_coords)
# Skip if the convex hull of the re-projected corners does not intersect the image canvas.
if final_coords is None:
continue
else:
min_x, min_y, max_x, max_y = final_coords
# Generate dictionary record to be included in the .json file.
repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y, sample_data_token, sd_rec['filename'])
repro_recs.append(repro_rec)
return repro_recs
def main(args):
"""Generates 2D re-projections of the 3D bounding boxes present in the dataset."""
print("Generating 2D reprojections of the nuScenes dataset")
# Get tokens for all camera images.
sample_data_camera_tokens = [s['token'] for s in nusc.sample_data if (s['sensor_modality'] == 'camera') and
s['is_key_frame']]
# For debugging purposes: Only produce the first n images.
if args.image_limit != -1:
sample_data_camera_tokens = sample_data_camera_tokens[:args.image_limit]
# Loop through the records and apply the re-projection algorithm.
reprojections = []
for token in tqdm(sample_data_camera_tokens):
reprojection_records = get_2d_boxes(token, args.visibilities)
reprojections.extend(reprojection_records)
# Save to a .json file.
dest_path = os.path.join(args.dataroot, args.version)
if not os.path.exists(dest_path):
os.makedirs(dest_path)
with open(os.path.join(args.dataroot, args.version, args.filename), 'w') as fh:
json.dump(reprojections, fh, sort_keys=True, indent=4)
print("Saved the 2D re-projections under {}".format(os.path.join(args.dataroot, args.version, args.filename)))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Export 2D annotations from reprojections to a .json file.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--dataroot', type=str, default='/data/sets/nuscenes', help="Path where nuScenes is saved.")
parser.add_argument('--version', type=str, default='v1.0-trainval', help='Dataset version.')
parser.add_argument('--filename', type=str, default='image_annotations.json', help='Output filename.')
parser.add_argument('--visibilities', type=str, default=['', '1', '2', '3', '4'],
help='Visibility bins, the higher the number the higher the visibility.', nargs='+')
parser.add_argument('--image_limit', type=int, default=-1, help='Number of images to process or -1 to process all.')
args = parser.parse_args()
nusc = NuScenes(dataroot=args.dataroot, version=args.version)
main(args)