8
8
from ...ops .roiaware_pool3d import roiaware_pool3d_utils
9
9
from ...utils import common_utils
10
10
from ..dataset import DatasetTemplate
11
+ from pyquaternion import Quaternion
12
+ from PIL import Image
11
13
12
14
13
15
class NuScenesDataset (DatasetTemplate ):
@@ -17,6 +19,13 @@ def __init__(self, dataset_cfg, class_names, training=True, root_path=None, logg
17
19
dataset_cfg = dataset_cfg , class_names = class_names , training = training , root_path = root_path , logger = logger
18
20
)
19
21
self .infos = []
22
+ self .camera_config = self .dataset_cfg .get ('CAMERA_CONFIG' , None )
23
+ if self .camera_config is not None :
24
+ self .use_camera = self .camera_config .get ('USE_CAMERA' , True )
25
+ self .camera_image_config = self .camera_config .IMAGE
26
+ else :
27
+ self .use_camera = False
28
+
20
29
self .include_nuscenes_data (self .mode )
21
30
if self .training and self .dataset_cfg .get ('BALANCED_RESAMPLING' , False ):
22
31
self .infos = self .balanced_infos_resampling (self .infos )
@@ -108,6 +117,41 @@ def get_lidar_with_sweeps(self, index, max_sweeps=1):
108
117
points = np .concatenate ((points , times ), axis = 1 )
109
118
return points
110
119
120
+ def crop_image (self , input_dict ):
121
+ W , H = input_dict ["ori_shape" ]
122
+ imgs = input_dict ["camera_imgs" ]
123
+ img_process_infos = []
124
+ crop_images = []
125
+ for img in imgs :
126
+ if self .training == True :
127
+ fH , fW = self .camera_image_config .FINAL_DIM
128
+ resize_lim = self .camera_image_config .RESIZE_LIM_TRAIN
129
+ resize = np .random .uniform (* resize_lim )
130
+ resize_dims = (int (W * resize ), int (H * resize ))
131
+ newW , newH = resize_dims
132
+ crop_h = newH - fH
133
+ crop_w = int (np .random .uniform (0 , max (0 , newW - fW )))
134
+ crop = (crop_w , crop_h , crop_w + fW , crop_h + fH )
135
+ else :
136
+ fH , fW = self .camera_image_config .FINAL_DIM
137
+ resize_lim = self .camera_image_config .RESIZE_LIM_TEST
138
+ resize = np .mean (resize_lim )
139
+ resize_dims = (int (W * resize ), int (H * resize ))
140
+ newW , newH = resize_dims
141
+ crop_h = newH - fH
142
+ crop_w = int (max (0 , newW - fW ) / 2 )
143
+ crop = (crop_w , crop_h , crop_w + fW , crop_h + fH )
144
+
145
+ # reisze and crop image
146
+ img = img .resize (resize_dims )
147
+ img = img .crop (crop )
148
+ crop_images .append (img )
149
+ img_process_infos .append ([resize , crop , False , 0 ])
150
+
151
+ input_dict ['img_process_infos' ] = img_process_infos
152
+ input_dict ['camera_imgs' ] = crop_images
153
+ return input_dict
154
+
111
155
def __len__ (self ):
112
156
if self ._merge_all_iters_to_one_epoch :
113
157
return len (self .infos ) * self .total_epochs
@@ -137,6 +181,60 @@ def __getitem__(self, index):
137
181
'gt_names' : info ['gt_names' ] if mask is None else info ['gt_names' ][mask ],
138
182
'gt_boxes' : info ['gt_boxes' ] if mask is None else info ['gt_boxes' ][mask ]
139
183
})
184
+ if self .use_camera :
185
+ input_dict ["image_paths" ] = []
186
+ input_dict ["lidar2camera" ] = []
187
+ input_dict ["lidar2image" ] = []
188
+ input_dict ["camera2ego" ] = []
189
+ input_dict ["camera_intrinsics" ] = []
190
+ input_dict ["camera2lidar" ] = []
191
+
192
+ for _ , camera_info in info ["cams" ].items ():
193
+ input_dict ["image_paths" ].append (camera_info ["data_path" ])
194
+
195
+ # lidar to camera transform
196
+ lidar2camera_r = np .linalg .inv (camera_info ["sensor2lidar_rotation" ])
197
+ lidar2camera_t = (
198
+ camera_info ["sensor2lidar_translation" ] @ lidar2camera_r .T
199
+ )
200
+ lidar2camera_rt = np .eye (4 ).astype (np .float32 )
201
+ lidar2camera_rt [:3 , :3 ] = lidar2camera_r .T
202
+ lidar2camera_rt [3 , :3 ] = - lidar2camera_t
203
+ input_dict ["lidar2camera" ].append (lidar2camera_rt .T )
204
+
205
+ # camera intrinsics
206
+ camera_intrinsics = np .eye (4 ).astype (np .float32 )
207
+ camera_intrinsics [:3 , :3 ] = camera_info ["camera_intrinsics" ]
208
+ input_dict ["camera_intrinsics" ].append (camera_intrinsics )
209
+
210
+ # lidar to image transform
211
+ lidar2image = camera_intrinsics @ lidar2camera_rt .T
212
+ input_dict ["lidar2image" ].append (lidar2image )
213
+
214
+ # camera to ego transform
215
+ camera2ego = np .eye (4 ).astype (np .float32 )
216
+ camera2ego [:3 , :3 ] = Quaternion (
217
+ camera_info ["sensor2ego_rotation" ]
218
+ ).rotation_matrix
219
+ camera2ego [:3 , 3 ] = camera_info ["sensor2ego_translation" ]
220
+ input_dict ["camera2ego" ].append (camera2ego )
221
+
222
+ # camera to lidar transform
223
+ camera2lidar = np .eye (4 ).astype (np .float32 )
224
+ camera2lidar [:3 , :3 ] = camera_info ["sensor2lidar_rotation" ]
225
+ camera2lidar [:3 , 3 ] = camera_info ["sensor2lidar_translation" ]
226
+ input_dict ["camera2lidar" ].append (camera2lidar )
227
+ # read image
228
+ filename = input_dict ["image_paths" ]
229
+ images = []
230
+ for name in filename :
231
+ images .append (Image .open (str (self .root_path / name )))
232
+
233
+ input_dict ["camera_imgs" ] = images
234
+ input_dict ["ori_shape" ] = images [0 ].size
235
+
236
+ # resize and crop image
237
+ input_dict = self .crop_image (input_dict )
140
238
141
239
data_dict = self .prepare_data (data_dict = input_dict )
142
240
@@ -251,7 +349,7 @@ def create_groundtruth_database(self, used_classes=None, max_sweeps=10):
251
349
pickle .dump (all_db_infos , f )
252
350
253
351
254
- def create_nuscenes_info (version , data_path , save_path , max_sweeps = 10 ):
352
+ def create_nuscenes_info (version , data_path , save_path , max_sweeps = 10 , with_cam = False ):
255
353
from nuscenes .nuscenes import NuScenes
256
354
from nuscenes .utils import splits
257
355
from . import nuscenes_utils
@@ -308,6 +406,7 @@ def create_nuscenes_info(version, data_path, save_path, max_sweeps=10):
308
406
parser .add_argument ('--cfg_file' , type = str , default = None , help = 'specify the config of dataset' )
309
407
parser .add_argument ('--func' , type = str , default = 'create_nuscenes_infos' , help = '' )
310
408
parser .add_argument ('--version' , type = str , default = 'v1.0-trainval' , help = '' )
409
+ parser .add_argument ('--with_cam' , action = 'store_true' , default = False , help = 'use camera or not' )
311
410
args = parser .parse_args ()
312
411
313
412
if args .func == 'create_nuscenes_infos' :
@@ -319,6 +418,7 @@ def create_nuscenes_info(version, data_path, save_path, max_sweeps=10):
319
418
data_path = ROOT_DIR / 'data' / 'nuscenes' ,
320
419
save_path = ROOT_DIR / 'data' / 'nuscenes' ,
321
420
max_sweeps = dataset_cfg .MAX_SWEEPS ,
421
+ with_cam = args .with_cam
322
422
)
323
423
324
424
nuscenes_dataset = NuScenesDataset (
0 commit comments