-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathyolo2mot.py
207 lines (176 loc) · 9.95 KB
/
yolo2mot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# Converting from our GT format (modified YOLO format, where object_id added):
# [Class ID, Object ID, x, y, w, h] (relative coordinates)
# to MOT challenge format that cheind/py-motmetrics accepts:
# [Frame Index, Object ID, x1, y1, w, h, Confidence, 3D x, 3D y, 3D z] (absolute coordinates)
# Note: x and y are center position of bbox,
# x1 and y1 are the top-left corner bbox,
# x2 and y2 are the bottom-right corner of bbox
# w is width, h is height
# MOT challenge format does not distinguish different class_id, so we will be the one to filter.
# We defined "all" object classes as all the classes that exist in ground truth and object_id range
# indifferent to class_id.
# Or we can input specific class_id to filter.
# load libraries
import glob
import numpy as np
from tqdm import tqdm
import argparse
import re
import os
import pdb # for debugging
def normalize(value, minValue, maxValue, a, b):
return (value - minValue) / (maxValue - minValue) * (b - a) + a # normalize to [a, b] for drawing
# sort the string in a natural way. E.g., "0" -> "2" -> "10" instead of "0" -> "10" -> "2"
def natural_sort(l):
convert = lambda text: int(text) if text.isdigit() else text.lower()
alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
return sorted(l, key=alphanum_key)
# inputs for the command
def parse_args():
parser = argparse.ArgumentParser(description='Inputs for data conversion')
parser.add_argument("--input_dir_gt", help="Enter the ground truth input directory path", type=str)
parser.add_argument("--output_dir_gt", help="Enter the converted ground truth output directory path", type=str)
parser.add_argument("--class_id_filter", help="Select either class ID to filter or \"all\" classes", type=str)
parser.add_argument("--img_w", help="Enter the width in pixel of video image frame", type=int)
parser.add_argument("--img_h", help="Enter the height in pixel of video image frame", type=int)
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_args()
input_dir_gt = args.input_dir_gt # file path to the directory that contains our ground truth
output_dir_gt = args.output_dir_gt # file path to the directory that contains converted ground truth
class_id_filter = args.class_id_filter
img_w = args.img_w
img_h = args.img_h
output_path_gt = f"{output_dir_gt}/gt.txt" # file path to the single converted GT file
if class_id_filter == "all":
# ----------------------------------------------------------------
# Generate GT file in MOT format for "all" classes
# ----------------------------------------------------------------
print("Converting GT file in MOT format for \"all\" classes ...")
alltxt_gt = natural_sort(glob.glob(f"{input_dir_gt}/*.txt"))
if os.path.exists(output_path_gt):
os.remove(output_path_gt)
classmap = dict()
obj_count = 0
with open(output_path_gt, 'a') as output_file:
for txt in tqdm(alltxt_gt):
frame = int(txt.split("_")[-1].split(".")[0]) + 1
class_id, object_id, x, y, w, h = np.genfromtxt(txt, unpack=True)
# Either numpy array or single value if txt contains only one row
if isinstance(class_id, np.ndarray):
frame_id = np.repeat(frame, len(class_id))
conf_arr = np.repeat(1, len(class_id)) # for ground truth, confidence score is 1
pos_x_3d = np.repeat(-1, len(class_id)) # not 3d, so assign -1
pos_y_3d = np.repeat(-1, len(class_id)) # not 3d, so assign -1
pos_z_3d = np.repeat(-1, len(class_id)) # not 3d, so assign -1
else:
frame_id = frame
conf_arr = 1 # for ground truth, confidence score is 1
pos_x_3d = -1 # not 3d, so assign -1
pos_y_3d = -1 # not 3d, so assign -1
pos_z_3d = -1 # not 3d, so assign -1
# conversion of bbox parameters from relative coordinates to absolute coordinates
x = normalize(x, 0, 1, 0, img_w)
y = normalize(y, 0, 1, 0, img_h)
w = normalize(w, 0, 1, 0, img_w)
h = normalize(h, 0, 1, 0, img_h)
# x1 y1 is top-left, x2 y2 is bottom-right
x1 = x - w / 2
y1 = y - h / 2
# x2 = x + w / 2
# y2 = y + h / 2
# object_id conversion for GT file
# the following algorithm will map all the object_id indifferent to class_id
data = np.array([frame_id, class_id, object_id, x1, y1, w, h, conf_arr, pos_x_3d, pos_y_3d, pos_z_3d]).T
if data.ndim > 1:
# go through row by row
for i in range(len(data[:,1])):
# add new class id in classmap if it doesn't exist
if data[i,1] not in classmap:
classmap[data[i,1]] = dict()
old_obj_id = data[i,2]
# if old object id is in classmap[class_id] dictionary
# new object found!
if old_obj_id not in classmap[data[i,1]]:
obj_count += 1
new_obj_id = obj_count
data[i,2] = new_obj_id
classmap[data[i,1]][old_obj_id] = new_obj_id
# previously found object again
else:
new_obj_id = classmap[data[i,1]][old_obj_id]
data[i,2] = new_obj_id
# In case of only 1 row in a txt file, i.e., 1d numpy array
else:
if data[1] not in classmap:
classmap[data[1]] = dict()
old_obj_id = data[2]
if old_obj_id not in classmap[data[1]]:
obj_count += 1
new_obj_id = obj_count
data[2] = new_obj_id
classmap[data[1]][old_obj_id] = new_obj_id
else:
new_obj_id = classmap[data[1]][old_obj_id]
data[2] = new_obj_id
frame_id, class_id, object_id, x1, y1, w, h, conf_arr, pos_x_3d, pos_y_3d, pos_z_3d = data.T
# save (appending) into output_path_gt file
np.savetxt(output_file, np.column_stack(\
(frame_id, object_id, x1, y1, w, h, conf_arr, pos_x_3d, pos_y_3d, pos_z_3d)\
), delimiter=',', fmt="%d,%d,%s,%s,%s,%s,%d,%d,%d,%d")
frame += 1
else:
# ----------------------------------------------------------------
# Generate GT file in MOT format for the specific class_id
# ----------------------------------------------------------------
print(f"Converting GT file in MOT format for class_id {class_id_filter} ...")
class_id_filter = int(class_id_filter) # we have to use integer to filter numpy 2d array
alltxt_gt = natural_sort(glob.glob(f"{input_dir_gt}/*.txt"))
if os.path.exists(output_path_gt):
os.remove(output_path_gt)
with open(output_path_gt, 'a') as output_file:
for txt in tqdm(alltxt_gt):
frame = int(txt.split("_")[-1].split(".")[0]) + 1
class_id, object_id, x, y, w, h = np.genfromtxt(txt, unpack=True)
# Either numpy array or single value if txt contains only one row
if isinstance(class_id, np.ndarray):
frame_id = np.repeat(frame, len(class_id))
conf_arr = np.repeat(1, len(class_id)) # for ground truth, confidence score is 1
pos_x_3d = np.repeat(-1, len(class_id)) # not 3d, so assign -1
pos_y_3d = np.repeat(-1, len(class_id)) # not 3d, so assign -1
pos_z_3d = np.repeat(-1, len(class_id)) # not 3d, so assign -1
else:
frame_id = frame
conf_arr = 1 # for ground truth, confidence score is 1
pos_x_3d = -1 # not 3d, so assign -1
pos_y_3d = -1 # not 3d, so assign -1
pos_z_3d = -1 # not 3d, so assign -1
# conversion of bbox parameters from relative coordinates to absolute coordinates
x = normalize(x, 0, 1, 0, img_w)
y = normalize(y, 0, 1, 0, img_h)
w = normalize(w, 0, 1, 0, img_w)
h = normalize(h, 0, 1, 0, img_h)
# x1 y1 is top-left, x2 y2 is bottom-right
x1 = x - w / 2
y1 = y - h / 2
# x2 = x + w / 2
# y2 = y + h / 2
# The following code will filter by class_id
# pack data
data = np.array([frame_id, class_id, object_id, x1, y1, w, h, conf_arr, pos_x_3d, pos_y_3d, pos_z_3d]).T
# if data is 2D numpy array
if data.ndim > 1:
data = data[data[:,1]==class_id_filter] # filtering by the class id
data[:, 2] += 1 # increment object id by 1 to start from 1 not 0
# if data is 1D numpy array
else:
if data[1] != class_id_filter:
continue
else:
data[2] += 1 # increment object id by 1 to start from 1 not 0
frame_id, class_id, object_id, x1, y1, w, h, conf_arr, pos_x_3d, pos_y_3d, pos_z_3d = data.T
# save (appending) into output_path_gt
np.savetxt(output_file, np.column_stack(\
(frame_id, object_id, x1, y1, w, h, conf_arr, pos_x_3d, pos_y_3d, pos_z_3d)\
), delimiter=',', fmt="%d,%d,%s,%s,%s,%s,%d,%d,%d,%d")