-
Notifications
You must be signed in to change notification settings - Fork 120
/
parse_xml.py
341 lines (294 loc) · 13.6 KB
/
parse_xml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
#!/usr/bin/env python
"""
parse XML files containing tracklet info for kitti data base (raw data section)
(http://cvlibs.net/datasets/kitti/raw_data.php)
No guarantees that this code is correct, usage is at your own risk!
created by Christian Herdtweck, Max Planck Institute for Biological Cybernetics
(christian.herdtweck@tuebingen.mpg.de)
requires numpy!
example usage:
import parseTrackletXML as xmlParser
kittiDir = '/path/to/kitti/data'
drive = '2011_09_26_drive_0001'
xmlParser.example(kittiDir, drive)
or simply on command line:
python parseTrackletXML.py
"""
# Version History:
# 4/7/12 Christian Herdtweck: seems to work with a few random test xml tracklet files;
# converts file contents to ElementTree and then to list of Tracklet objects;
# Tracklet objects have str and iter functions
# 5/7/12 ch: added constants for state, occlusion, truncation and added consistency checks
# 30/1/14 ch: create example function from example code
from sys import argv as cmdLineArgs
from xml.etree.ElementTree import ElementTree
import numpy as np
import itertools
from warnings import warn
STATE_UNSET = 0
STATE_INTERP = 1
STATE_LABELED = 2
stateFromText = {'0':STATE_UNSET, '1':STATE_INTERP, '2':STATE_LABELED}
OCC_UNSET = 255 # -1 as uint8
OCC_VISIBLE = 0
OCC_PARTLY = 1
OCC_FULLY = 2
occFromText = {'-1':OCC_UNSET, '0':OCC_VISIBLE, '1':OCC_PARTLY, '2':OCC_FULLY}
TRUNC_UNSET = 255 # -1 as uint8, but in xml files the value '99' is used!
TRUNC_IN_IMAGE = 0
TRUNC_TRUNCATED = 1
TRUNC_OUT_IMAGE = 2
TRUNC_BEHIND_IMAGE = 3
truncFromText = {'99':TRUNC_UNSET, '0':TRUNC_IN_IMAGE, '1':TRUNC_TRUNCATED, \
'2':TRUNC_OUT_IMAGE, '3': TRUNC_BEHIND_IMAGE}
class Tracklet(object):
""" representation an annotated object track
Tracklets are created in function parseXML and can most conveniently used as follows:
for trackletObj in parseXML(trackletFile):
for translation, rotation, state, occlusion, truncation, amtOcclusion, amtBorders, absoluteFrameNumber in trackletObj:
your code here
#end: for all frames
#end: for all tracklets
absoluteFrameNumber is in range [firstFrame, firstFrame+nFrames[
amtOcclusion and amtBorders could be None
You can of course also directly access the fields objType (string), size (len-3 ndarray), firstFrame/nFrames (int),
trans/rots (nFrames x 3 float ndarrays), states/truncs (len-nFrames uint8 ndarrays), occs (nFrames x 2 uint8 ndarray),
and for some tracklets amtOccs (nFrames x 2 float ndarray) and amtBorders (nFrames x 3 float ndarray). The last two
can be None if the xml file did not include these fields in poses
"""
objectType = None
size = None # len-3 float array: (height, width, length)
firstFrame = None
trans = None # n x 3 float array (x,y,z)
rots = None # n x 3 float array (x,y,z)
states = None # len-n uint8 array of states
occs = None # n x 2 uint8 array (occlusion, occlusion_kf)
truncs = None # len-n uint8 array of truncation
amtOccs = None # None or (n x 2) float array (amt_occlusion, amt_occlusion_kf)
amtBorders = None # None (n x 3) float array (amt_border_l / _r / _kf)
nFrames = None
def __init__(self):
"""create Tracklet with no info set """
self.size = np.nan*np.ones(3, dtype=float)
def __str__(self):
""" return human-readable string representation of tracklet object
called implicitly in
print trackletObj
or in
text = str(trackletObj)
"""
return '[Tracklet over {0} frames for {1}]'.format(self.nFrames, self.objectType)
def __iter__(self):
""" returns an iterator that yields tuple of all the available data for each frame
called whenever code iterates over a tracklet object, e.g. in
for translation, rotation, state, occlusion, truncation, amtOcclusion, amtBorders, absoluteFrameNumber in trackletObj:
...do something ...
or
trackDataIter = iter(trackletObj)
"""
if self.amtOccs is None:
return itertools.izip(self.trans, self.rots, self.states, self.occs, self.truncs, \
itertools.repeat(None), itertools.repeat(None), xrange(self.firstFrame, self.firstFrame+self.nFrames))
else:
return itertools.izip(self.trans, self.rots, self.states, self.occs, self.truncs, \
self.amtOccs, self.amtBorders, xrange(self.firstFrame, self.firstFrame+self.nFrames))
#end: class Tracklet
def parseXML(trackletFile):
r""" parse tracklet xml file and convert results to list of Tracklet objects
:param trackletFile: name of a tracklet xml file
:returns: list of Tracklet objects read from xml file
"""
# convert tracklet XML data to a tree structure
eTree = ElementTree()
print 'parsing tracklet file', trackletFile
with open(trackletFile) as f:
eTree.parse(f)
# now convert output to list of Tracklet objects
trackletsElem = eTree.find('tracklets')
tracklets = []
trackletIdx = 0
nTracklets = None
for trackletElem in trackletsElem:
#print 'track:', trackletElem.tag
if trackletElem.tag == 'count':
nTracklets = int(trackletElem.text)
print 'file contains', nTracklets, 'tracklets'
elif trackletElem.tag == 'item_version':
pass
elif trackletElem.tag == 'item':
#print 'tracklet {0} of {1}'.format(trackletIdx, nTracklets)
# a tracklet
newTrack = Tracklet()
isFinished = False
hasAmt = False
frameIdx = None
for info in trackletElem:
#print 'trackInfo:', info.tag
if isFinished:
raise ValueError('more info on element after finished!')
if info.tag == 'objectType':
newTrack.objectType = info.text
elif info.tag == 'h':
newTrack.size[0] = float(info.text)
elif info.tag == 'w':
newTrack.size[1] = float(info.text)
elif info.tag == 'l':
newTrack.size[2] = float(info.text)
elif info.tag == 'first_frame':
newTrack.firstFrame = int(info.text)
elif info.tag == 'poses':
# this info is the possibly long list of poses
for pose in info:
#print 'trackInfoPose:', pose.tag
if pose.tag == 'count': # this should come before the others
if newTrack.nFrames is not None:
raise ValueError('there are several pose lists for a single track!')
elif frameIdx is not None:
raise ValueError('?!')
newTrack.nFrames = int(pose.text)
newTrack.trans = np.nan * np.ones((newTrack.nFrames, 3), dtype=float)
newTrack.rots = np.nan * np.ones((newTrack.nFrames, 3), dtype=float)
newTrack.states = np.nan * np.ones(newTrack.nFrames, dtype='uint8')
newTrack.occs = np.nan * np.ones((newTrack.nFrames, 2), dtype='uint8')
newTrack.truncs = np.nan * np.ones(newTrack.nFrames, dtype='uint8')
newTrack.amtOccs = np.nan * np.ones((newTrack.nFrames, 2), dtype=float)
newTrack.amtBorders = np.nan * np.ones((newTrack.nFrames, 3), dtype=float)
frameIdx = 0
elif pose.tag == 'item_version':
pass
elif pose.tag == 'item':
# pose in one frame
if frameIdx is None:
raise ValueError('pose item came before number of poses!')
for poseInfo in pose:
#print 'trackInfoPoseInfo:', poseInfo.tag
if poseInfo.tag == 'tx':
newTrack.trans[frameIdx, 0] = float(poseInfo.text)
elif poseInfo.tag == 'ty':
newTrack.trans[frameIdx, 1] = float(poseInfo.text)
elif poseInfo.tag == 'tz':
newTrack.trans[frameIdx, 2] = float(poseInfo.text)
elif poseInfo.tag == 'rx':
newTrack.rots[frameIdx, 0] = float(poseInfo.text)
elif poseInfo.tag == 'ry':
newTrack.rots[frameIdx, 1] = float(poseInfo.text)
elif poseInfo.tag == 'rz':
newTrack.rots[frameIdx, 2] = float(poseInfo.text)
elif poseInfo.tag == 'state':
newTrack.states[frameIdx] = stateFromText[poseInfo.text]
elif poseInfo.tag == 'occlusion':
newTrack.occs[frameIdx, 0] = occFromText[poseInfo.text]
elif poseInfo.tag == 'occlusion_kf':
newTrack.occs[frameIdx, 1] = occFromText[poseInfo.text]
elif poseInfo.tag == 'truncation':
newTrack.truncs[frameIdx] = truncFromText[poseInfo.text]
elif poseInfo.tag == 'amt_occlusion':
newTrack.amtOccs[frameIdx,0] = float(poseInfo.text)
hasAmt = True
elif poseInfo.tag == 'amt_occlusion_kf':
newTrack.amtOccs[frameIdx,1] = float(poseInfo.text)
hasAmt = True
elif poseInfo.tag == 'amt_border_l':
newTrack.amtBorders[frameIdx,0] = float(poseInfo.text)
hasAmt = True
elif poseInfo.tag == 'amt_border_r':
newTrack.amtBorders[frameIdx,1] = float(poseInfo.text)
hasAmt = True
elif poseInfo.tag == 'amt_border_kf':
newTrack.amtBorders[frameIdx,2] = float(poseInfo.text)
hasAmt = True
else:
raise ValueError('unexpected tag in poses item: {0}!'.format(poseInfo.tag))
frameIdx += 1
else:
raise ValueError('unexpected pose info: {0}!'.format(pose.tag))
elif info.tag == 'finished':
isFinished = True
else:
raise ValueError('unexpected tag in tracklets: {0}!'.format(info.tag))
#end: for all fields in current tracklet
# some final consistency checks on new tracklet
if not isFinished:
warn('tracklet {0} was not finished!'.format(trackletIdx))
if newTrack.nFrames is None:
warn('tracklet {0} contains no information!'.format(trackletIdx))
elif frameIdx != newTrack.nFrames:
warn('tracklet {0} is supposed to have {1} frames, but perser found {1}!'.format(\
trackletIdx, newTrack.nFrames, frameIdx))
if np.abs(newTrack.rots[:,:2]).sum() > 1e-16:
warn('track contains rotation other than yaw!')
# if amtOccs / amtBorders are not set, set them to None
if not hasAmt:
newTrack.amtOccs = None
newTrack.amtBorders = None
# add new tracklet to list
tracklets.append(newTrack)
trackletIdx += 1
else:
raise ValueError('unexpected tracklet info')
#end: for tracklet list items
print 'loaded', trackletIdx, 'tracklets'
# final consistency check
if trackletIdx != nTracklets:
warn('according to xml information the file has {0} tracklets, but parser found {1}!'.format(nTracklets, trackletIdx))
return tracklets
#end: function parseXML
def example(kittiDir=None, drive=None):
from os.path import join, expanduser
import readline # makes raw_input behave more fancy
# from xmlParser import parseXML, TRUNC_IN_IMAGE, TRUNC_TRUNCATED
DEFAULT_DRIVE = '2011_09_26_drive_0001'
twoPi = 2.*np.pi
# get dir names
if kittiDir is None:
kittiDir = expanduser(raw_input('please enter kitti base dir (e.g. ~/path/to/kitti): ').strip())
if drive is None:
drive = raw_input('please enter drive name (default {0}): '.format(DEFAULT_DRIVE)).strip()
if len(drive) == 0:
drive = DEFAULT_DRIVE
# read tracklets from file
myTrackletFile = join(kittiDir, drive, 'tracklet_labels.xml')
tracklets = parseXML(myTrackletFile)
# loop over tracklets
for iTracklet, tracklet in enumerate(tracklets):
print 'tracklet {0: 3d}: {1}'.format(iTracklet, tracklet)
# this part is inspired by kitti object development kit matlab code: computeBox3D
h,w,l = tracklet.size
trackletBox = np.array([ # in velodyne coordinates around zero point and without orientation yet\
[-l/2, -l/2, l/2, l/2, -l/2, -l/2, l/2, l/2], \
[ w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2], \
[ 0.0, 0.0, 0.0, 0.0, h, h, h, h]])
# loop over all data in tracklet
for translation, rotation, state, occlusion, truncation, amtOcclusion, amtBorders, absoluteFrameNumber \
in tracklet:
# determine if object is in the image; otherwise continue
if truncation not in (TRUNC_IN_IMAGE, TRUNC_TRUNCATED):
continue
# re-create 3D bounding box in velodyne coordinate system
yaw = rotation[2] # other rotations are 0 in all xml files I checked
assert np.abs(rotation[:2]).sum() == 0, 'object rotations other than yaw given!'
rotMat = np.array([\
[np.cos(yaw), -np.sin(yaw), 0.0], \
[np.sin(yaw), np.cos(yaw), 0.0], \
[ 0.0, 0.0, 1.0]])
cornerPosInVelo = np.dot(rotMat, trackletBox) + np.tile(translation, (8,1)).T
# calc yaw as seen from the camera (i.e. 0 degree = facing away from cam), as opposed to
# car-centered yaw (i.e. 0 degree = same orientation as car).
# makes quite a difference for objects in periphery!
# Result is in [0, 2pi]
x, y, z = translation
yawVisual = ( yaw - np.arctan2(y, x) ) % twoPi
#end: for all frames in track
#end: for all tracks
#end: function example
# when somebody runs this file as a script:
# run example if no arg or only 'example' was given as arg
# otherwise run parseXML
if __name__ == "__main__":
# cmdLineArgs[0] is 'parseTrackletXML.py'
if len(cmdLineArgs) < 2:
example()
elif (len(cmdLineArgs) == 2) and (cmdLineArgs[1] == 'example'):
example()
else:
parseXML(*cmdLineArgs[1:])