snoopybingo
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 6 additions & 1 deletion b/‎README.md‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎cfgs/config.py‎
Lines changed: 25 additions & 2 deletions b/‎cfgs/config.py‎
Lines changed: 25 additions & 2 deletions
diff --git a/‎cfgs/config_voc.py‎
Lines changed: 3 additions & 2 deletions b/‎cfgs/config_voc.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎darknet.py‎
Lines changed: 70 additions & 39 deletions b/‎darknet.py‎
Lines changed: 70 additions & 39 deletions
diff --git a/‎datasets/imdb.py‎
Lines changed: 23 additions & 12 deletions b/‎datasets/imdb.py‎
Lines changed: 23 additions & 12 deletions
@@ -100,3 +100,6 @@ ENV/
 models
 models/*
 data/*
+
+VOCdevkit/*
+src/*
@@ -82,13 +82,18 @@ and set the path in `yolo2-pytorch/cfgs/exps/darknet19_exp1.py`.
 7. (optional) Training with TensorBoard.
 
     To use the TensorBoard, install Crayon (https://github.com/torrvision/crayon)
+    How to use the crayon
+    ```
+    docker pull alband/crayon
+    docker run -d -p 8888:8888 -p 8889:8889 --name crayon alband/crayon
+    ```
+
 and set `use_tensorboard = True` in `yolo2-pytorch/cfgs/config.py`.
 
 
 6. Run the training program: `python train.py`.
 
 
-
 ### Evaluation
 
 Set the path of the `trained_model` in `yolo2-pytorch/cfgs/config.py`.
 
@@ -1,6 +1,6 @@
 import os
-from config_voc import *
-from exps.darknet19_exp1 import *
+from .config_voc import *  # noqa
+from .exps.darknet19_exp1 import *  # noqa
 
 
 def mkdir(path, max_depth=3):
@@ -14,6 +14,28 @@ def mkdir(path, max_depth=3):
 
 # input and output size
 ############################
+multi_scale_inp_size = [np.array([320, 320], dtype=np.int),
+                        np.array([352, 352], dtype=np.int),
+                        np.array([384, 384], dtype=np.int),
+                        np.array([416, 416], dtype=np.int),
+                        np.array([448, 448], dtype=np.int),
+                        np.array([480, 480], dtype=np.int),
+                        np.array([512, 512], dtype=np.int),
+                        np.array([544, 544], dtype=np.int),
+                        np.array([576, 576], dtype=np.int),
+                        # np.array([608, 608], dtype=np.int),
+                        ]   # w, h
+multi_scale_out_size = [multi_scale_inp_size[0] / 32,
+                        multi_scale_inp_size[1] / 32,
+                        multi_scale_inp_size[2] / 32,
+                        multi_scale_inp_size[3] / 32,
+                        multi_scale_inp_size[4] / 32,
+                        multi_scale_inp_size[5] / 32,
+                        multi_scale_inp_size[6] / 32,
+                        multi_scale_inp_size[7] / 32,
+                        multi_scale_inp_size[8] / 32,
+                        # multi_scale_inp_size[9] / 32,
+                        ]   # w, h
 inp_size = np.array([416, 416], dtype=np.int)   # w, h
 out_size = inp_size / 32
 
@@ -28,6 +50,7 @@ def _to_color(indx, base):
     g = 2 - (indx % base2) % base
     return b * 127, r * 127, g * 127
 
+
 base = int(np.ceil(pow(num_classes, 1. / 3)))
 colors = [_to_color(x, base) for x in range(num_classes)]
 
 
@@ -12,6 +12,7 @@
                'sheep', 'sofa', 'train', 'tvmonitor')
 num_classes = len(label_names)
 
-anchors = np.asarray([(1.08, 1.19), (3.42, 4.41), (6.63, 11.38), (9.42, 5.11), (16.62, 10.52)], dtype=np.float)
+anchors = np.asarray([(1.08, 1.19), (3.42, 4.41),
+                      (6.63, 11.38), (9.42, 5.11), (16.62, 10.52)],
+                     dtype=np.float)
 num_anchors = len(anchors)
-
 
@@ -6,8 +6,9 @@
 import utils.network as net_utils
 import cfgs.config as cfg
 from layers.reorg.reorg_layer import ReorgLayer
-from utils.cython_bbox import bbox_ious, bbox_intersections, bbox_overlaps, anchor_intersections
+from utils.cython_bbox import bbox_ious, anchor_intersections
 from utils.cython_yolo import yolo_to_bbox
+from functools import partial
 
 from multiprocessing import Pool
 
@@ -25,17 +26,21 @@ def _make_layers(in_channels, net_cfg):
                 layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
             else:
                 out_channels, ksize = item
-                layers.append(net_utils.Conv2d_BatchNorm(in_channels, out_channels, ksize, same_padding=True))
-                # layers.append(net_utils.Conv2d(in_channels, out_channels, ksize, same_padding=True))
+                layers.append(net_utils.Conv2d_BatchNorm(in_channels,
+                                                         out_channels,
+                                                         ksize,
+                                                         same_padding=True))
+                # layers.append(net_utils.Conv2d(in_channels, out_channels,
+                #     ksize, same_padding=True))
                 in_channels = out_channels
 
     return nn.Sequential(*layers), in_channels
 
 
-def _process_batch(data):
-    W, H = cfg.out_size
-    inp_size = cfg.inp_size
-    out_size = cfg.out_size
+def _process_batch(data, size_index):
+    W, H = cfg.multi_scale_out_size[size_index]
+    inp_size = cfg.multi_scale_inp_size[size_index]
+    out_size = cfg.multi_scale_out_size[size_index]
 
     bbox_pred_np, gt_boxes, gt_classes, dontcares, iou_pred_np = data
 
@@ -61,7 +66,8 @@ def _process_batch(data):
         np.ascontiguousarray(bbox_pred_np, dtype=np.float),
         anchors,
         H, W)
-    bbox_np = bbox_np[0]  # bbox_np = (hw, num_anchors, (x1, y1, x2, y2))   range: 0 ~ 1
+    # bbox_np = (hw, num_anchors, (x1, y1, x2, y2))   range: 0 ~ 1
+    bbox_np = bbox_np[0]
     bbox_np[:, :, 0::2] *= float(inp_size[0])  # rescale x
     bbox_np[:, :, 1::2] *= float(inp_size[1])  # rescale y
 
@@ -89,8 +95,10 @@ def _process_batch(data):
     target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float)
     target_boxes[:, 0] = cx - np.floor(cx)  # cx
     target_boxes[:, 1] = cy - np.floor(cy)  # cy
-    target_boxes[:, 2] = (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0]  # tw
-    target_boxes[:, 3] = (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1]  # th
+    target_boxes[:, 2] = \
+        (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0]  # tw
+    target_boxes[:, 3] = \
+        (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1]  # th
 
     # for each gt boxes, match the best anchor
     gt_boxes_resize = np.copy(gt_boxes_b)
@@ -105,12 +113,14 @@ def _process_batch(data):
     ious_reshaped = np.reshape(ious, [hw, num_anchors, len(cell_inds)])
     for i, cell_ind in enumerate(cell_inds):
         if cell_ind >= hw or cell_ind < 0:
-            print cell_ind
+            print('cell inds size {}'.format(len(cell_inds)))
+            print('cell over {} hw {}'.format(cell_ind, hw))
             continue
         a = anchor_inds[i]
 
-        iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :]  # 0 ~ 1, should be close to 1
-        _iou_mask[cell_ind, a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor)
+        # 0 ~ 1, should be close to 1
+        iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :]
+        _iou_mask[cell_ind, a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor)  # noqa
         # _ious[cell_ind, a, :] = anchor_ious[a, i]
         _ious[cell_ind, a, :] = ious_reshaped[cell_ind, a, i]
 
@@ -154,13 +164,15 @@ def __init__(self):
         self.conv3, c3 = _make_layers(c2, net_cfgs[6])
 
         stride = 2
-        self.reorg = ReorgLayer(stride=2)   # stride*stride times the channels of conv1s
+        # stride*stride times the channels of conv1s
+        self.reorg = ReorgLayer(stride=2)
         # cat [conv1s, conv3]
         self.conv4, c4 = _make_layers((c1*(stride*stride) + c3), net_cfgs[7])
 
         # linear
         out_channels = cfg.num_anchors * (cfg.num_classes + 5)
         self.conv5 = net_utils.Conv2d(c4, out_channels, 1, 1, relu=False)
+        self.global_average_pool = nn.AvgPool2d((1, 1))
 
         # train
         self.bbox_loss = None
@@ -172,65 +184,83 @@ def __init__(self):
     def loss(self):
         return self.bbox_loss + self.iou_loss + self.cls_loss
 
-    def forward(self, im_data, gt_boxes=None, gt_classes=None, dontcare=None):
+    def forward(self, im_data, gt_boxes=None, gt_classes=None, dontcare=None,
+                size_index=0):
         conv1s = self.conv1s(im_data)
         conv2 = self.conv2(conv1s)
         conv3 = self.conv3(conv2)
         conv1s_reorg = self.reorg(conv1s)
         cat_1_3 = torch.cat([conv1s_reorg, conv3], 1)
         conv4 = self.conv4(cat_1_3)
         conv5 = self.conv5(conv4)   # batch_size, out_channels, h, w
+        global_average_pool = self.global_average_pool(conv5)
 
         # for detection
-        # bsize, c, h, w -> bsize, h, w, c -> bsize, h x w, num_anchors, 5+num_classes
-        bsize, _, h, w = conv5.size()
+        # bsize, c, h, w -> bsize, h, w, c ->
+        #                   bsize, h x w, num_anchors, 5+num_classes
+        bsize, _, h, w = global_average_pool.size()
         # assert bsize == 1, 'detection only support one image per batch'
-        conv5_reshaped = conv5.permute(0, 2, 3, 1).contiguous().view(bsize, -1, cfg.num_anchors, cfg.num_classes + 5)
+        global_average_pool_reshaped = \
+            global_average_pool.permute(0, 2, 3, 1).contiguous().view(bsize,
+                                                                      -1, cfg.num_anchors, cfg.num_classes + 5)  # noqa
 
         # tx, ty, tw, th, to -> sig(tx), sig(ty), exp(tw), exp(th), sig(to)
-        xy_pred = F.sigmoid(conv5_reshaped[:, :, :, 0:2])
-        wh_pred = torch.exp(conv5_reshaped[:, :, :, 2:4])
+        xy_pred = F.sigmoid(global_average_pool_reshaped[:, :, :, 0:2])
+        wh_pred = torch.exp(global_average_pool_reshaped[:, :, :, 2:4])
         bbox_pred = torch.cat([xy_pred, wh_pred], 3)
-        iou_pred = F.sigmoid(conv5_reshaped[:, :, :, 4:5])
+        iou_pred = F.sigmoid(global_average_pool_reshaped[:, :, :, 4:5])
 
-        score_pred = conv5_reshaped[:, :, :, 5:].contiguous()
-        prob_pred = F.softmax(score_pred.view(-1, score_pred.size()[-1])).view_as(score_pred)
+        score_pred = global_average_pool_reshaped[:, :, :, 5:].contiguous()
+        prob_pred = F.softmax(score_pred.view(-1, score_pred.size()[-1])).view_as(score_pred)  # noqa
 
         # for training
         if self.training:
             bbox_pred_np = bbox_pred.data.cpu().numpy()
             iou_pred_np = iou_pred.data.cpu().numpy()
-            _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask = self._build_target(
-                bbox_pred_np, gt_boxes, gt_classes, dontcare, iou_pred_np)
+            _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask = \
+                self._build_target(bbox_pred_np,
+                                   gt_boxes,
+                                   gt_classes,
+                                   dontcare,
+                                   iou_pred_np,
+                                   size_index)
 
             _boxes = net_utils.np_to_variable(_boxes)
             _ious = net_utils.np_to_variable(_ious)
             _classes = net_utils.np_to_variable(_classes)
-            box_mask = net_utils.np_to_variable(_box_mask, dtype=torch.FloatTensor)
-            iou_mask = net_utils.np_to_variable(_iou_mask, dtype=torch.FloatTensor)
-            class_mask = net_utils.np_to_variable(_class_mask, dtype=torch.FloatTensor)
+            box_mask = net_utils.np_to_variable(_box_mask,
+                                                dtype=torch.FloatTensor)
+            iou_mask = net_utils.np_to_variable(_iou_mask,
+                                                dtype=torch.FloatTensor)
+            class_mask = net_utils.np_to_variable(_class_mask,
+                                                  dtype=torch.FloatTensor)
 
             num_boxes = sum((len(boxes) for boxes in gt_boxes))
 
             # _boxes[:, :, :, 2:4] = torch.log(_boxes[:, :, :, 2:4])
             box_mask = box_mask.expand_as(_boxes)
 
-            self.bbox_loss = nn.MSELoss(size_average=False)(bbox_pred * box_mask, _boxes * box_mask) / num_boxes
-            self.iou_loss = nn.MSELoss(size_average=False)(iou_pred * iou_mask, _ious * iou_mask) / num_boxes
+            self.bbox_loss = nn.MSELoss(size_average=False)(bbox_pred * box_mask, _boxes * box_mask) / num_boxes  # noqa
+            self.iou_loss = nn.MSELoss(size_average=False)(iou_pred * iou_mask, _ious * iou_mask) / num_boxes  # noqa
 
             class_mask = class_mask.expand_as(prob_pred)
-            self.cls_loss = nn.MSELoss(size_average=False)(prob_pred * class_mask, _classes * class_mask) / num_boxes
+            self.cls_loss = nn.MSELoss(size_average=False)(prob_pred * class_mask, _classes * class_mask) / num_boxes  # noqa
 
         return bbox_pred, iou_pred, prob_pred
 
-    def _build_target(self, bbox_pred_np, gt_boxes, gt_classes, dontcare, iou_pred_np):
+    def _build_target(self, bbox_pred_np, gt_boxes, gt_classes, dontcare,
+                      iou_pred_np, size_index):
         """
-        :param bbox_pred: shape: (bsize, h x w, num_anchors, 4) : (sig(tx), sig(ty), exp(tw), exp(th))
+        :param bbox_pred: shape: (bsize, h x w, num_anchors, 4) :
+                          (sig(tx), sig(ty), exp(tw), exp(th))
         """
 
         bsize = bbox_pred_np.shape[0]
 
-        targets = self.pool.map(_process_batch, ((bbox_pred_np[b], gt_boxes[b], gt_classes[b], dontcare[b], iou_pred_np[b]) for b in range(bsize)))
+        targets = self.pool.map(partial(_process_batch, size_index=size_index),
+                                ((bbox_pred_np[b], gt_boxes[b],
+                                  gt_classes[b], dontcare[b], iou_pred_np[b])
+                                 for b in range(bsize)))
 
         _boxes = np.stack(tuple((row[0] for row in targets)))
         _ious = np.stack(tuple((row[1] for row in targets)))
@@ -244,27 +274,28 @@ def _build_target(self, bbox_pred_np, gt_boxes, gt_classes, dontcare, iou_pred_n
     def load_from_npz(self, fname, num_conv=None):
         dest_src = {'conv.weight': 'kernel', 'conv.bias': 'biases',
                     'bn.weight': 'gamma', 'bn.bias': 'biases',
-                    'bn.running_mean': 'moving_mean', 'bn.running_var': 'moving_variance'}
+                    'bn.running_mean': 'moving_mean',
+                    'bn.running_var': 'moving_variance'}
         params = np.load(fname)
         own_dict = self.state_dict()
-        keys = own_dict.keys()
+        keys = list(own_dict.keys())
 
         for i, start in enumerate(range(0, len(keys), 5)):
-            if num_conv is not None and i>= num_conv:
+            if num_conv is not None and i >= num_conv:
                 break
             end = min(start+5, len(keys))
             for key in keys[start:end]:
                 list_key = key.split('.')
                 ptype = dest_src['{}.{}'.format(list_key[-2], list_key[-1])]
                 src_key = '{}-convolutional/{}:0'.format(i, ptype)
-                print(src_key, own_dict[key].size(), params[src_key].shape)
+                print((src_key, own_dict[key].size(), params[src_key].shape))
                 param = torch.from_numpy(params[src_key])
                 if ptype == 'kernel':
                     param = param.permute(3, 2, 0, 1)
                 own_dict[key].copy_(param)
 
+
 if __name__ == '__main__':
     net = Darknet19()
     # net.load_from_npz('models/yolo-voc.weights.npz')
     net.load_from_npz('models/darknet19.weights.npz', num_conv=18)
-
 
@@ -1,7 +1,9 @@
 import os
-import PIL
 import numpy as np
 from multiprocessing import Pool
+from functools import partial
+import cfgs.config as cfg
+import cv2
 
 
 def mkdir(path, max_depth=3):
@@ -13,8 +15,15 @@ def mkdir(path, max_depth=3):
         os.mkdir(path)
 
 
+def image_resize(im, size_index):
+    w, h = cfg.multi_scale_inp_size[size_index]
+    im = cv2.resize(im, (w, h))
+    return im
+
+
 class ImageDataset(object):
-    def __init__(self, name, datadir, batch_size, im_processor, processes=3, shuffle=True, dst_size=None):
+    def __init__(self, name, datadir, batch_size, im_processor,
+                 processes=3, shuffle=True, dst_size=None):
         self._name = name
         self._data_dir = datadir
         self._batch_size = batch_size
@@ -38,29 +47,33 @@ def __init__(self, name, datadir, batch_size, im_processor, processes=3, shuffle
         self.gen = None
         self._im_processor = im_processor
 
-    def next_batch(self):
-        batch = {'images': [], 'gt_boxes': [], 'gt_classes': [], 'dontcare': [], 'origin_im': []}
+    def next_batch(self, size_index):
+        batch = {'images': [], 'gt_boxes': [], 'gt_classes': [],
+                 'dontcare': [], 'origin_im': []}
         i = 0
         while i < self.batch_size:
             try:
-                images, gt_boxes, classes, dontcare, origin_im = self.gen.next()
+                images, gt_boxes, classes, dontcare, origin_im = next(self.gen)
+                images = image_resize(images, size_index)
                 batch['images'].append(images)
                 batch['gt_boxes'].append(gt_boxes)
                 batch['gt_classes'].append(classes)
                 batch['dontcare'].append(dontcare)
                 batch['origin_im'].append(origin_im)
                 i += 1
-            except (StopIteration, AttributeError):
+            except (StopIteration, AttributeError, TypeError):
                 indexes = np.arange(len(self.image_names), dtype=np.int)
                 if self._shuffle:
                     np.random.shuffle(indexes)
-                self.gen = self.pool.imap(self._im_processor,
-                                          ([self.image_names[i], self.get_annotation(i), self.dst_size] for i in indexes),
+                self.gen = self.pool.imap(partial(self._im_processor,
+                                                  size_index=size_index),
+                                          ([self.image_names[i],
+                                            self.get_annotation(i),
+                                            self.dst_size] for i in indexes),
                                           chunksize=self.batch_size)
                 self._epoch += 1
-                print('epoch {} start...'.format(self._epoch))
+                print(('epoch {} start...'.format(self._epoch)))
         batch['images'] = np.asarray(batch['images'])
-
         return batch
 
     def close(self):
@@ -132,5 +145,3 @@ def batch_size(self):
     @property
     def batch_per_epoch(self):
         return self.num_images // self.batch_size
-
-