-
Notifications
You must be signed in to change notification settings - Fork 86
/
Copy pathkitti.py
182 lines (144 loc) · 7.2 KB
/
kitti.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
from __future__ import division
import os
import os.path
import torch.utils.data as data
import utils
from utils import box3d_corner_to_center_batch, anchors_center_to_corner, corner_to_standup_box2d_batch
from data_aug import aug_data
from box_overlaps import bbox_overlaps
import numpy as np
import cv2
class KittiDataset(data.Dataset):
def __init__(self, cfg, root='./KITTI',set='train',type='velodyne_train'):
self.type = type
self.root = root
self.data_path = os.path.join(root, 'training')
self.lidar_path = os.path.join(self.data_path, "crop/")
self.image_path = os.path.join(self.data_path, "image_2/")
self.calib_path = os.path.join(self.data_path, "calib/")
self.label_path = os.path.join(self.data_path, "label_2/")
with open(os.path.join(self.data_path, '%s.txt' % set)) as f:
self.file_list = f.read().splitlines()
self.T = cfg.T
self.vd = cfg.vd
self.vh = cfg.vh
self.vw = cfg.vw
self.xrange = cfg.xrange
self.yrange = cfg.yrange
self.zrange = cfg.zrange
self.anchors = cfg.anchors.reshape(-1,7)
self.feature_map_shape = (int(cfg.H / 2), int(cfg.W / 2))
self.anchors_per_position = cfg.anchors_per_position
self.pos_threshold = cfg.pos_threshold
self.neg_threshold = cfg.neg_threshold
def cal_target(self, gt_box3d):
# Input:
# labels: (N,)
# feature_map_shape: (w, l)
# anchors: (w, l, 2, 7)
# Output:
# pos_equal_one (w, l, 2)
# neg_equal_one (w, l, 2)
# targets (w, l, 14)
# attention: cal IoU on birdview
anchors_d = np.sqrt(self.anchors[:, 4] ** 2 + self.anchors[:, 5] ** 2)
pos_equal_one = np.zeros((*self.feature_map_shape, 2))
neg_equal_one = np.zeros((*self.feature_map_shape, 2))
targets = np.zeros((*self.feature_map_shape, 14))
gt_xyzhwlr = box3d_corner_to_center_batch(gt_box3d)
anchors_corner = anchors_center_to_corner(self.anchors)
anchors_standup_2d = corner_to_standup_box2d_batch(anchors_corner)
# BOTTLENECK
gt_standup_2d = corner_to_standup_box2d_batch(gt_box3d)
iou = bbox_overlaps(
np.ascontiguousarray(anchors_standup_2d).astype(np.float32),
np.ascontiguousarray(gt_standup_2d).astype(np.float32),
)
id_highest = np.argmax(iou.T, axis=1) # the maximum anchor's ID
id_highest_gt = np.arange(iou.T.shape[0])
mask = iou.T[id_highest_gt, id_highest] > 0
id_highest, id_highest_gt = id_highest[mask], id_highest_gt[mask]
# find anchor iou > cfg.XXX_POS_IOU
id_pos, id_pos_gt = np.where(iou > self.pos_threshold)
# find anchor iou < cfg.XXX_NEG_IOU
id_neg = np.where(np.sum(iou < self.neg_threshold,
axis=1) == iou.shape[1])[0]
id_pos = np.concatenate([id_pos, id_highest])
id_pos_gt = np.concatenate([id_pos_gt, id_highest_gt])
# TODO: uniquify the array in a more scientific way
id_pos, index = np.unique(id_pos, return_index=True)
id_pos_gt = id_pos_gt[index]
id_neg.sort()
# cal the target and set the equal one
index_x, index_y, index_z = np.unravel_index(
id_pos, (*self.feature_map_shape, self.anchors_per_position))
pos_equal_one[index_x, index_y, index_z] = 1
# ATTENTION: index_z should be np.array
targets[index_x, index_y, np.array(index_z) * 7] = \
(gt_xyzhwlr[id_pos_gt, 0] - self.anchors[id_pos, 0]) / anchors_d[id_pos]
targets[index_x, index_y, np.array(index_z) * 7 + 1] = \
(gt_xyzhwlr[id_pos_gt, 1] - self.anchors[id_pos, 1]) / anchors_d[id_pos]
targets[index_x, index_y, np.array(index_z) * 7 + 2] = \
(gt_xyzhwlr[id_pos_gt, 2] - self.anchors[id_pos, 2]) / self.anchors[id_pos, 3]
targets[index_x, index_y, np.array(index_z) * 7 + 3] = np.log(
gt_xyzhwlr[id_pos_gt, 3] / self.anchors[id_pos, 3])
targets[index_x, index_y, np.array(index_z) * 7 + 4] = np.log(
gt_xyzhwlr[id_pos_gt, 4] / self.anchors[id_pos, 4])
targets[index_x, index_y, np.array(index_z) * 7 + 5] = np.log(
gt_xyzhwlr[id_pos_gt, 5] / self.anchors[id_pos, 5])
targets[index_x, index_y, np.array(index_z) * 7 + 6] = (
gt_xyzhwlr[id_pos_gt, 6] - self.anchors[id_pos, 6])
index_x, index_y, index_z = np.unravel_index(
id_neg, (*self.feature_map_shape, self.anchors_per_position))
neg_equal_one[index_x, index_y, index_z] = 1
# to avoid a box be pos/neg in the same time
index_x, index_y, index_z = np.unravel_index(
id_highest, (*self.feature_map_shape, self.anchors_per_position))
neg_equal_one[index_x, index_y, index_z] = 0
return pos_equal_one, neg_equal_one, targets
def preprocess(self, lidar):
# shuffling the points
np.random.shuffle(lidar)
voxel_coords = ((lidar[:, :3] - np.array([self.xrange[0], self.yrange[0], self.zrange[0]])) / (
self.vw, self.vh, self.vd)).astype(np.int32)
# convert to (D, H, W)
voxel_coords = voxel_coords[:,[2,1,0]]
voxel_coords, inv_ind, voxel_counts = np.unique(voxel_coords, axis=0, \
return_inverse=True, return_counts=True)
voxel_features = []
for i in range(len(voxel_coords)):
voxel = np.zeros((self.T, 7), dtype=np.float32)
pts = lidar[inv_ind == i]
if voxel_counts[i] > self.T:
pts = pts[:self.T, :]
voxel_counts[i] = self.T
# augment the points
voxel[:pts.shape[0], :] = np.concatenate((pts, pts[:, :3] - np.mean(pts[:, :3], 0)), axis=1)
voxel_features.append(voxel)
return np.array(voxel_features), voxel_coords
def __getitem__(self, i):
lidar_file = self.lidar_path + '/' + self.file_list[i] + '.bin'
calib_file = self.calib_path + '/' + self.file_list[i] + '.txt'
label_file = self.label_path + '/' + self.file_list[i] + '.txt'
image_file = self.image_path + '/' + self.file_list[i] + '.png'
calib = utils.load_kitti_calib(calib_file)
Tr = calib['Tr_velo2cam']
gt_box3d = utils.load_kitti_label(label_file, Tr)
lidar = np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
if self.type == 'velodyne_train':
image = cv2.imread(image_file)
# data augmentation
lidar, gt_box3d = aug_data(lidar, gt_box3d)
# specify a range
lidar, gt_box3d = utils.get_filtered_lidar(lidar, gt_box3d)
# voxelize
voxel_features, voxel_coords = self.preprocess(lidar)
# bounding-box encoding
pos_equal_one, neg_equal_one, targets = self.cal_target(gt_box3d)
return voxel_features, voxel_coords, pos_equal_one, neg_equal_one, targets, image, calib, self.file_list[i]
elif self.type == 'velodyne_test':
NotImplemented
else:
raise ValueError('the type invalid')
def __len__(self):
return len(self.file_list)