6
6
import utils .network as net_utils
7
7
import cfgs .config as cfg
8
8
from layers .reorg .reorg_layer import ReorgLayer
9
- from utils .cython_bbox import bbox_ious , bbox_intersections , bbox_overlaps , anchor_intersections
9
+ from utils .cython_bbox import bbox_ious , anchor_intersections
10
10
from utils .cython_yolo import yolo_to_bbox
11
+ from functools import partial
11
12
12
13
from multiprocessing import Pool
13
14
@@ -25,17 +26,21 @@ def _make_layers(in_channels, net_cfg):
25
26
layers .append (nn .MaxPool2d (kernel_size = 2 , stride = 2 ))
26
27
else :
27
28
out_channels , ksize = item
28
- layers .append (net_utils .Conv2d_BatchNorm (in_channels , out_channels , ksize , same_padding = True ))
29
- # layers.append(net_utils.Conv2d(in_channels, out_channels, ksize, same_padding=True))
29
+ layers .append (net_utils .Conv2d_BatchNorm (in_channels ,
30
+ out_channels ,
31
+ ksize ,
32
+ same_padding = True ))
33
+ # layers.append(net_utils.Conv2d(in_channels, out_channels,
34
+ # ksize, same_padding=True))
30
35
in_channels = out_channels
31
36
32
37
return nn .Sequential (* layers ), in_channels
33
38
34
39
35
- def _process_batch (data ):
36
- W , H = cfg .out_size
37
- inp_size = cfg .inp_size
38
- out_size = cfg .out_size
40
+ def _process_batch (data , size_index ):
41
+ W , H = cfg .multi_scale_out_size [ size_index ]
42
+ inp_size = cfg .multi_scale_inp_size [ size_index ]
43
+ out_size = cfg .multi_scale_out_size [ size_index ]
39
44
40
45
bbox_pred_np , gt_boxes , gt_classes , dontcares , iou_pred_np = data
41
46
@@ -61,7 +66,8 @@ def _process_batch(data):
61
66
np .ascontiguousarray (bbox_pred_np , dtype = np .float ),
62
67
anchors ,
63
68
H , W )
64
- bbox_np = bbox_np [0 ] # bbox_np = (hw, num_anchors, (x1, y1, x2, y2)) range: 0 ~ 1
69
+ # bbox_np = (hw, num_anchors, (x1, y1, x2, y2)) range: 0 ~ 1
70
+ bbox_np = bbox_np [0 ]
65
71
bbox_np [:, :, 0 ::2 ] *= float (inp_size [0 ]) # rescale x
66
72
bbox_np [:, :, 1 ::2 ] *= float (inp_size [1 ]) # rescale y
67
73
@@ -89,8 +95,10 @@ def _process_batch(data):
89
95
target_boxes = np .empty (gt_boxes_b .shape , dtype = np .float )
90
96
target_boxes [:, 0 ] = cx - np .floor (cx ) # cx
91
97
target_boxes [:, 1 ] = cy - np .floor (cy ) # cy
92
- target_boxes [:, 2 ] = (gt_boxes_b [:, 2 ] - gt_boxes_b [:, 0 ]) / inp_size [0 ] * out_size [0 ] # tw
93
- target_boxes [:, 3 ] = (gt_boxes_b [:, 3 ] - gt_boxes_b [:, 1 ]) / inp_size [1 ] * out_size [1 ] # th
98
+ target_boxes [:, 2 ] = \
99
+ (gt_boxes_b [:, 2 ] - gt_boxes_b [:, 0 ]) / inp_size [0 ] * out_size [0 ] # tw
100
+ target_boxes [:, 3 ] = \
101
+ (gt_boxes_b [:, 3 ] - gt_boxes_b [:, 1 ]) / inp_size [1 ] * out_size [1 ] # th
94
102
95
103
# for each gt boxes, match the best anchor
96
104
gt_boxes_resize = np .copy (gt_boxes_b )
@@ -105,12 +113,14 @@ def _process_batch(data):
105
113
ious_reshaped = np .reshape (ious , [hw , num_anchors , len (cell_inds )])
106
114
for i , cell_ind in enumerate (cell_inds ):
107
115
if cell_ind >= hw or cell_ind < 0 :
108
- print cell_ind
116
+ print ('cell inds size {}' .format (len (cell_inds )))
117
+ print ('cell over {} hw {}' .format (cell_ind , hw ))
109
118
continue
110
119
a = anchor_inds [i ]
111
120
112
- iou_pred_cell_anchor = iou_pred_np [cell_ind , a , :] # 0 ~ 1, should be close to 1
113
- _iou_mask [cell_ind , a , :] = cfg .object_scale * (1 - iou_pred_cell_anchor )
121
+ # 0 ~ 1, should be close to 1
122
+ iou_pred_cell_anchor = iou_pred_np [cell_ind , a , :]
123
+ _iou_mask [cell_ind , a , :] = cfg .object_scale * (1 - iou_pred_cell_anchor ) # noqa
114
124
# _ious[cell_ind, a, :] = anchor_ious[a, i]
115
125
_ious [cell_ind , a , :] = ious_reshaped [cell_ind , a , i ]
116
126
@@ -154,13 +164,15 @@ def __init__(self):
154
164
self .conv3 , c3 = _make_layers (c2 , net_cfgs [6 ])
155
165
156
166
stride = 2
157
- self .reorg = ReorgLayer (stride = 2 ) # stride*stride times the channels of conv1s
167
+ # stride*stride times the channels of conv1s
168
+ self .reorg = ReorgLayer (stride = 2 )
158
169
# cat [conv1s, conv3]
159
170
self .conv4 , c4 = _make_layers ((c1 * (stride * stride ) + c3 ), net_cfgs [7 ])
160
171
161
172
# linear
162
173
out_channels = cfg .num_anchors * (cfg .num_classes + 5 )
163
174
self .conv5 = net_utils .Conv2d (c4 , out_channels , 1 , 1 , relu = False )
175
+ self .global_average_pool = nn .AvgPool2d ((1 , 1 ))
164
176
165
177
# train
166
178
self .bbox_loss = None
@@ -172,65 +184,83 @@ def __init__(self):
172
184
def loss (self ):
173
185
return self .bbox_loss + self .iou_loss + self .cls_loss
174
186
175
- def forward (self , im_data , gt_boxes = None , gt_classes = None , dontcare = None ):
187
+ def forward (self , im_data , gt_boxes = None , gt_classes = None , dontcare = None ,
188
+ size_index = 0 ):
176
189
conv1s = self .conv1s (im_data )
177
190
conv2 = self .conv2 (conv1s )
178
191
conv3 = self .conv3 (conv2 )
179
192
conv1s_reorg = self .reorg (conv1s )
180
193
cat_1_3 = torch .cat ([conv1s_reorg , conv3 ], 1 )
181
194
conv4 = self .conv4 (cat_1_3 )
182
195
conv5 = self .conv5 (conv4 ) # batch_size, out_channels, h, w
196
+ global_average_pool = self .global_average_pool (conv5 )
183
197
184
198
# for detection
185
- # bsize, c, h, w -> bsize, h, w, c -> bsize, h x w, num_anchors, 5+num_classes
186
- bsize , _ , h , w = conv5 .size ()
199
+ # bsize, c, h, w -> bsize, h, w, c ->
200
+ # bsize, h x w, num_anchors, 5+num_classes
201
+ bsize , _ , h , w = global_average_pool .size ()
187
202
# assert bsize == 1, 'detection only support one image per batch'
188
- conv5_reshaped = conv5 .permute (0 , 2 , 3 , 1 ).contiguous ().view (bsize , - 1 , cfg .num_anchors , cfg .num_classes + 5 )
203
+ global_average_pool_reshaped = \
204
+ global_average_pool .permute (0 , 2 , 3 , 1 ).contiguous ().view (bsize ,
205
+ - 1 , cfg .num_anchors , cfg .num_classes + 5 ) # noqa
189
206
190
207
# tx, ty, tw, th, to -> sig(tx), sig(ty), exp(tw), exp(th), sig(to)
191
- xy_pred = F .sigmoid (conv5_reshaped [:, :, :, 0 :2 ])
192
- wh_pred = torch .exp (conv5_reshaped [:, :, :, 2 :4 ])
208
+ xy_pred = F .sigmoid (global_average_pool_reshaped [:, :, :, 0 :2 ])
209
+ wh_pred = torch .exp (global_average_pool_reshaped [:, :, :, 2 :4 ])
193
210
bbox_pred = torch .cat ([xy_pred , wh_pred ], 3 )
194
- iou_pred = F .sigmoid (conv5_reshaped [:, :, :, 4 :5 ])
211
+ iou_pred = F .sigmoid (global_average_pool_reshaped [:, :, :, 4 :5 ])
195
212
196
- score_pred = conv5_reshaped [:, :, :, 5 :].contiguous ()
197
- prob_pred = F .softmax (score_pred .view (- 1 , score_pred .size ()[- 1 ])).view_as (score_pred )
213
+ score_pred = global_average_pool_reshaped [:, :, :, 5 :].contiguous ()
214
+ prob_pred = F .softmax (score_pred .view (- 1 , score_pred .size ()[- 1 ])).view_as (score_pred ) # noqa
198
215
199
216
# for training
200
217
if self .training :
201
218
bbox_pred_np = bbox_pred .data .cpu ().numpy ()
202
219
iou_pred_np = iou_pred .data .cpu ().numpy ()
203
- _boxes , _ious , _classes , _box_mask , _iou_mask , _class_mask = self ._build_target (
204
- bbox_pred_np , gt_boxes , gt_classes , dontcare , iou_pred_np )
220
+ _boxes , _ious , _classes , _box_mask , _iou_mask , _class_mask = \
221
+ self ._build_target (bbox_pred_np ,
222
+ gt_boxes ,
223
+ gt_classes ,
224
+ dontcare ,
225
+ iou_pred_np ,
226
+ size_index )
205
227
206
228
_boxes = net_utils .np_to_variable (_boxes )
207
229
_ious = net_utils .np_to_variable (_ious )
208
230
_classes = net_utils .np_to_variable (_classes )
209
- box_mask = net_utils .np_to_variable (_box_mask , dtype = torch .FloatTensor )
210
- iou_mask = net_utils .np_to_variable (_iou_mask , dtype = torch .FloatTensor )
211
- class_mask = net_utils .np_to_variable (_class_mask , dtype = torch .FloatTensor )
231
+ box_mask = net_utils .np_to_variable (_box_mask ,
232
+ dtype = torch .FloatTensor )
233
+ iou_mask = net_utils .np_to_variable (_iou_mask ,
234
+ dtype = torch .FloatTensor )
235
+ class_mask = net_utils .np_to_variable (_class_mask ,
236
+ dtype = torch .FloatTensor )
212
237
213
238
num_boxes = sum ((len (boxes ) for boxes in gt_boxes ))
214
239
215
240
# _boxes[:, :, :, 2:4] = torch.log(_boxes[:, :, :, 2:4])
216
241
box_mask = box_mask .expand_as (_boxes )
217
242
218
- self .bbox_loss = nn .MSELoss (size_average = False )(bbox_pred * box_mask , _boxes * box_mask ) / num_boxes
219
- self .iou_loss = nn .MSELoss (size_average = False )(iou_pred * iou_mask , _ious * iou_mask ) / num_boxes
243
+ self .bbox_loss = nn .MSELoss (size_average = False )(bbox_pred * box_mask , _boxes * box_mask ) / num_boxes # noqa
244
+ self .iou_loss = nn .MSELoss (size_average = False )(iou_pred * iou_mask , _ious * iou_mask ) / num_boxes # noqa
220
245
221
246
class_mask = class_mask .expand_as (prob_pred )
222
- self .cls_loss = nn .MSELoss (size_average = False )(prob_pred * class_mask , _classes * class_mask ) / num_boxes
247
+ self .cls_loss = nn .MSELoss (size_average = False )(prob_pred * class_mask , _classes * class_mask ) / num_boxes # noqa
223
248
224
249
return bbox_pred , iou_pred , prob_pred
225
250
226
- def _build_target (self , bbox_pred_np , gt_boxes , gt_classes , dontcare , iou_pred_np ):
251
+ def _build_target (self , bbox_pred_np , gt_boxes , gt_classes , dontcare ,
252
+ iou_pred_np , size_index ):
227
253
"""
228
- :param bbox_pred: shape: (bsize, h x w, num_anchors, 4) : (sig(tx), sig(ty), exp(tw), exp(th))
254
+ :param bbox_pred: shape: (bsize, h x w, num_anchors, 4) :
255
+ (sig(tx), sig(ty), exp(tw), exp(th))
229
256
"""
230
257
231
258
bsize = bbox_pred_np .shape [0 ]
232
259
233
- targets = self .pool .map (_process_batch , ((bbox_pred_np [b ], gt_boxes [b ], gt_classes [b ], dontcare [b ], iou_pred_np [b ]) for b in range (bsize )))
260
+ targets = self .pool .map (partial (_process_batch , size_index = size_index ),
261
+ ((bbox_pred_np [b ], gt_boxes [b ],
262
+ gt_classes [b ], dontcare [b ], iou_pred_np [b ])
263
+ for b in range (bsize )))
234
264
235
265
_boxes = np .stack (tuple ((row [0 ] for row in targets )))
236
266
_ious = np .stack (tuple ((row [1 ] for row in targets )))
@@ -244,27 +274,28 @@ def _build_target(self, bbox_pred_np, gt_boxes, gt_classes, dontcare, iou_pred_n
244
274
def load_from_npz (self , fname , num_conv = None ):
245
275
dest_src = {'conv.weight' : 'kernel' , 'conv.bias' : 'biases' ,
246
276
'bn.weight' : 'gamma' , 'bn.bias' : 'biases' ,
247
- 'bn.running_mean' : 'moving_mean' , 'bn.running_var' : 'moving_variance' }
277
+ 'bn.running_mean' : 'moving_mean' ,
278
+ 'bn.running_var' : 'moving_variance' }
248
279
params = np .load (fname )
249
280
own_dict = self .state_dict ()
250
- keys = own_dict .keys ()
281
+ keys = list ( own_dict .keys () )
251
282
252
283
for i , start in enumerate (range (0 , len (keys ), 5 )):
253
- if num_conv is not None and i >= num_conv :
284
+ if num_conv is not None and i >= num_conv :
254
285
break
255
286
end = min (start + 5 , len (keys ))
256
287
for key in keys [start :end ]:
257
288
list_key = key .split ('.' )
258
289
ptype = dest_src ['{}.{}' .format (list_key [- 2 ], list_key [- 1 ])]
259
290
src_key = '{}-convolutional/{}:0' .format (i , ptype )
260
- print (src_key , own_dict [key ].size (), params [src_key ].shape )
291
+ print (( src_key , own_dict [key ].size (), params [src_key ].shape ) )
261
292
param = torch .from_numpy (params [src_key ])
262
293
if ptype == 'kernel' :
263
294
param = param .permute (3 , 2 , 0 , 1 )
264
295
own_dict [key ].copy_ (param )
265
296
297
+
266
298
if __name__ == '__main__' :
267
299
net = Darknet19 ()
268
300
# net.load_from_npz('models/yolo-voc.weights.npz')
269
301
net .load_from_npz ('models/darknet19.weights.npz' , num_conv = 18 )
270
-
0 commit comments