-
Notifications
You must be signed in to change notification settings - Fork 0
/
tracker.py
238 lines (201 loc) · 7.5 KB
/
tracker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import time
import cv2
import random
from vWriter import VideoWriterWrapper
import numpy as np
"""
Implements an ORB-based object tracker as specified by the paper:
Object Tracking Based on ORB and Temporal-Spacial Constraint by Shuang Wu,
IEEE Student Member, Yawen Fan, Shibao Zheng, IEEE Member and Hua Yang, IEEE
Member
Authors: Alberto Serrano, Stephen Kim
"""
# Define global variables
M = (0,0)
centers = []
vidWriter = None
def startVideoWriter():
global vidWriter
vidWriter = VideoWriterWrapper(frame_width, frame_height)
def endVideoWriter():
global vidWriter
vidWriter.cleanup()
"""
given two frames, previous and current frame, determine the search frame.
This function directly reflects formulas (9), (10), and (11) in the paper. What
gets returned in a 4-tuple representing S_i+1 from the paper
:param: prev (4-tuple) specifying a frame conforming to the following:
1. x coordinate of the center of the frame
2. y coordinate of the center of the frame
3. width of the frame
4. height of the frame
:param: curr (4-tuple) specifying a frame conforming to the following:
1. x coordinate of the center of the frame
2. y coordinate of the center of the frame
3. width of the frame
4. height of the frame
:param: ap (int) specifying an alpha constant; used to increase the search
frame's width and height
:return: (4-tuple) specifying the new search frame
"""
def getSearchFrame(prev, curr, ap = 5):
w = curr[2]
h = curr[3]
w_ = w + ap
h_ = h + ap
m_x = curr[0] - prev[0]
m_y = curr[1] - prev[1]
u_i = curr[0] + m_x
v_i = curr[1] + m_y
return(u_i, v_i, w_, h_)
# [c_x, c_y, w, h] -> [x, y, w, h]
"""
given a frame, return a bounding box
:param: frame (4-tuple) specifying a frame conforming to the following:
1. x coordinate of the center of the frame
2. y coordinate of the center of the frame
3. width of the frame
4. height of the frame
:return: (4-tuple) specifying a bounding box conforming to the following:
1. x coordinate for top left of the bounding box
2. y coordinate for top left of the bounding box
3. width of the frame
4. height of the frame
"""
def bboxFromFrame(frame):
x = frame[0]
y = frame[1]
w = frame[2]
h = frame[3]
return (int(x - (w/2)), int(y - (h/2)), w, h)
"""
Processes an image with two frames of the same object at different times to
determine the most probable location of the next frame for the object. Function
returns two 4-tuples describing the previous frame (which is just the current
frame) and the next frame
:param: cur (nd np.array, an image) image corresponding to cframe
:param: nxt (nd np.array, an image) image corresponding to s_i, the search frame
:param: pframe (4-tuple) specifying the current frame conforming to the
following
1. x coordinate of the center of the frame
2. y coordinate of the center of the frame
3. width of the frame
4. height of the frame
:param: cframe (4-tuple) specifying the current frame conforming to the
following
1. x coordinate of the center of the frame
2. y coordinate of the center of the frame
3. width of the frame
4. height of the frame
:return: two 4-tuples; specifying the next previous and current frames
"""
def processLiveFeed(cur, nxt, pframe, cframe):
global M
orb = cv2.ORB_create(1000, 1.2)
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
s_i = getSearchFrame(pframe, cframe)
framebbox = bboxFromFrame(cframe)
sbbox = bboxFromFrame(s_i)
# Compute features within region for current frame
x = framebbox[0]
y = framebbox[1]
w = framebbox[2]
h = framebbox[3]
kp1, des1 = orb.detectAndCompute(
cur[y:y+h, x:x+w], None
)
# Compute features within region for next adjacent frame
s_x = sbbox[0]
s_y = sbbox[1]
s_w = sbbox[2]
s_h = sbbox[3]
kp2, des2 = orb.detectAndCompute(
nxt[s_y:s_y+s_h, s_x:s_x+s_w], None
)
a = (x,y)
b = (s_x, s_y)
frame_i = cur[y:y+h, x:x+w]
frame_ipp = nxt[s_y:s_y+s_h, s_x:s_x+s_w]
if not ((des1 is None) or (des2 is None)):
matches = bf.match(des1,des2)
matches = sorted(matches, key=lambda val: val.distance)
M = videoDrawMatches(frame_i, a, kp1, frame_ipp, b, kp2, matches, 0, framebbox, cur, s_i)
nframe = (cframe[0] + int(M[0]), cframe[1] + int(M[1]), cframe[2], cframe[3])
return cframe, nframe
"""
Probably gonna change in future commits but heres some documentation:
img1 - cropped image of "cur"
img1_coord - bounding box of "cur"
kp1 - keypoints of img1
img2 - cropped image of "nxt"
img2_coord - bounding box of "nxt"
kp2 - keypoints of img2
matches - matches between img1 and img2
"""
def videoDrawMatches(img1, img1_coord, kp1, img2, img2_coord, kp2, matches, counter, bbox, out, s_i, n_key = 3):
global vidWriter
#out = img1
# For each pair of points we have between both images
# draw circles, then connect a line between them
x = int(bbox[0])
y = int(bbox[1])
C1_x = 0
C1_y = 0
C2_x = 0
C2_y = 0
for mat in matches[:n_key]:
# Get the matching keypoints for each of the images
img1_idx = mat.queryIdx
img2_idx = mat.trainIdx
(x1,y1) = get_real_coordinate(kp1[img1_idx].pt, img1_coord)
(x2,y2) = get_real_coordinate(kp2[img2_idx].pt, img2_coord)
C1_x += x1
C1_y += y1
C2_x += x2
C2_y += y2
# Draw circles around keypoints
cv2.circle(out, (int(x1),int(y1)), 4, (0, 0, 255), 1)
C1_x /= n_key
C1_y /= n_key
C2_x /= n_key
C2_y /= n_key
# For debugging
# print("Train: (" + str(C1_x) + ",", str(C1_y)+ ")")
# print("Query: (" + str(C2_x) + ",", str(C2_y)+ ")")
# print("Motion: (" + str(C2_x-C1_x) + ",", str(C2_y-C1_y)+ ")")
# print()
centers.append((s_i[0],s_i[1]))
for i in range(len(centers)):
cv2.circle(out, (round(centers[i][0]), round(centers[i][1])),
1, (0, 255, 0), 4
)
p1 = (int(bbox[0]), int(bbox[1]))
p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
cv2.rectangle(out, p1, p2, (255,0,0), 2, 1)
# if you want to see ORB keypoints and matches at each iteration
#img3 = cv2.drawMatches(img1,kp1,img2,kp2,matches[:n_key],None,flags=2)
#cv2.imshow("Top " + str(n_key) + " ORB Keypoints and matched", img3)
# if you want to see the whole scene and watch the bounding box move
cv2.imshow("Object tracking", out)
if vidWriter is not None:
vidWriter.write(out)
return C2_x - C1_x, C2_y - C1_y
"""
Takes a coordinate from within a cropped image (e.g. a keypoint coordinate), a
frame specifying where the cropped image exists in relation to the scene, and
returns the "true" coordinate as it would exist in the scene.
:param: coord (2-tuple) - specifies (x,y) coordinate from within a cropped image
:param: frame (4-tuple) - specifies (x,y,w,h) bounding box that represents the
cropped image in a scene
:return: (2-tuple) the x,y coordinate in terms of the scene
This function proves useful in calculating the motion vector. Since ORB
keypoints and descriptors are determined based on a cropped version of a larger
scene (for performance reasons) when calculating the motion vector between two
images, the keypoints need to be converted back to the real coordinates in the
scene, otherwise the motion vectors are calculating relative distances which
don't accurately reflect the change in centers
"""
def get_real_coordinate(coord,frame):
return frame[0]+coord[0], frame[1]+coord[1]
def main():
liveFeedMatches()