Merge remote-tracking branch 'origin/master'

wjtscn · Jan 21, 2020 · 26ee424 · 26ee424
2 parents 7ff982f + 547d62b
commit 26ee424
Show file tree

Hide file tree

Showing 10 changed files with 238 additions and 221 deletions.
diff --git a/datasets/combined.py b/datasets/combined.py
@@ -2,7 +2,7 @@
 
 from gluoncv.data.base import VisionDataset
 import os
-
+import mxnet as mx
 from nltk.corpus import wordnet as wn
 
 
@@ -28,7 +28,10 @@ def __init__(self, datasets, root=os.path.join('datasets', 'combined'), class_tr
         self._root = os.path.expanduser(root)
         self._class_tree = class_tree
         self._samples = self._load_samples()
-        _, _, self._dataset_class_map = self._get_classes()
+        _, _, self._dataset_class_map, self._parents = self._get_classes()
+
+        self.class_levels = self.get_levels()
+        self.leaves = self.get_leaves()
 
     def __str__(self):
         return '\n\n' + self.__class__.__name__ + '\n' + self.stats()[0] + '\n'
@@ -45,22 +48,34 @@ def _get_classes(self):
         if self._class_tree:
             with open(os.path.join('datasets', 'trees', 'filtered_det.tree'), 'r') as f:
                 lines = f.readlines()
-            lines = [l.rstrip().split for l in lines]
+            lines = [l.rstrip().split() for l in lines]
             parents = dict()
             for cls in lines:
                 classes_wn.append(cls[0])
                 classes.append(id_to_name(cls[0]))
                 parents[cls[0]] = cls[1]
 
-            for dataset_idx, dataset in enumerate(self._datasets):
-                dataset_class_map = list()
-                for wn_cls in dataset.wn_classes:
-                    if wn_cls not in classes_wn:
-                        classes_wn.append(wn_cls)
-                        classes.append(cls)
+            # handle swapping of ids
+            with open(os.path.join('datasets', 'trees', 'new_classes.txt'), 'r') as f:
+                lines = f.readlines()
+            lines = [l.rstrip().split() for l in lines]
+            swap_ids = dict()
+            for ids in lines:
+                swap_ids[ids[0]] = ids[1]
 
-                    dataset_class_map.append(classes_wn.index(wn_cls))
-                dataset_class_maps.append(dataset_class_map)
+        for dataset_idx, dataset in enumerate(self._datasets):
+            dataset_class_map = list()
+            for cls in dataset.wn_classes:
+                if cls not in classes_wn:
+                    if self._class_tree:  # take into account where a swap needs to be done to the id
+                        assert cls in swap_ids, '%s not in swap_ids, should be added to new_classes.txt' % cls
+                        cls = swap_ids[cls]
+                    else:
+                        classes_wn.append(cls)
+                        classes.append(id_to_name(cls))
+
+                dataset_class_map.append(classes_wn.index(cls))
+            dataset_class_maps.append(dataset_class_map)
         return classes, classes_wn, dataset_class_maps, parents
 
     @property
@@ -73,6 +88,32 @@ def wn_classes(self):
         """Category names."""
         return self._get_classes()[1]
 
+    def get_levels(self):
+        levels = list()
+        for c in self.wn_classes:
+            lvl = 0
+            p = c
+            while p != 'ROOT':
+                p = self._parents[p]
+                lvl += 1
+            levels.append(lvl)
+        return levels
+
+    def get_leaves(self):
+        is_parent = set()
+
+        for c in self.wn_classes:
+            is_parent.add(self._parents[c])
+
+        leaves = list()
+        for c in self.wn_classes:
+            if c in is_parent:
+                leaves.append(0)
+            else:
+                leaves.append(1)
+
+        return leaves
+
     def __len__(self):
         return len(self._samples)
 
@@ -81,15 +122,34 @@ def __getitem__(self, idx):
         dataset = self._datasets[dataset_idx]
 
         # fix class id
-        sample = dataset[dataset_sample_idx]
-        for si in range(len(sample[1])):
-            sample[1][si][4] = float(self._dataset_class_map[dataset_idx][int(sample[1][si][4])])
-        return sample
+        sample = list(dataset[dataset_sample_idx])
+        if self._class_tree:
+            boxes = mx.nd.zeros((sample[1].shape[0], 4 + len(self.classes)))
+            boxes[:, :4] = sample[1][:, :4]
+
+            for bi in range(len(sample[1])):
+                cls = int(self._dataset_class_map[dataset_idx][int(sample[1][bi][4])])
+                if cls < 0:
+                    boxes[bi, :] = -1
+                    continue
+                clss = [cls+4]
+                while self.wn_classes[cls] in self._parents:
+                    if self._parents[self.wn_classes[cls]] == 'ROOT': break
+                    cls = self.wn_classes.index(self._parents[self.wn_classes[cls]])
+                    clss.append(cls+4)
+                clss.reverse()
+                boxes[bi, clss] = 1
+            sample[1] = boxes.asnumpy()
+        else:
+            for bi in range(len(sample[1])):
+                sample[1][bi][4] = float(self._dataset_class_map[dataset_idx][int(sample[1][bi][4])])
+
+        return sample[0], sample[1]
 
     def _load_samples(self):
         samples = []
         for dataset_idx, dataset in enumerate(self._datasets):
-            for idx, item in enumerate(dataset):
+            for idx in range(len(dataset)):
                 samples.append((dataset_idx, idx, len(samples)))
         return samples
 
@@ -113,47 +173,6 @@ def stats(self):
 
         return out_str, cls_boxes
 
-    # def build_coco_json(self):
-    #
-    #     os.makedirs(os.path.dirname(self._coco_path), exist_ok=True)
-    #
-    #     # handle categories
-    #     categories = list()
-    #     for ci, (cls, wn_cls) in enumerate(zip(self.classes, self.wn_classes)):
-    #         categories.append({'id': ci, 'name': cls, 'wnid': wn_cls})
-    #
-    #     # handle images and boxes
-    #     images = list()
-    #     done_imgs = set()
-    #     annotations = list()
-    #     for idx in range(len(self)):
-    #         dataset, dataset_idx, id, dataset_id = self._items[idx]
-    #         img_id = dataset._items[dataset_idx]
-    #         filename = dataset._anno_path.format(*img_id)
-    #         width, height = dataset._im_shapes[dataset_idx]
-    #
-    #         img_id = self.image_ids[idx]
-    #         if img_id not in done_imgs:
-    #             done_imgs.add(img_id)
-    #             images.append({'file_name': filename,
-    #                            'width': int(width),
-    #                            'height': int(height),
-    #                            'id': img_id})
-    #
-    #         for box in self._load_label(idx):
-    #             xywh = [int(box[0]), int(box[1]), int(box[2])-int(box[0]), int(box[3])-int(box[1])]
-    #             annotations.append({'image_id': img_id,
-    #                                 'id': len(annotations),
-    #                                 'bbox': xywh,
-    #                                 'area': int(xywh[2] * xywh[3]),
-    #                                 'category_id': int(box[4]),
-    #                                 'iscrowd': 0})
-    #
-    #     with open(self._coco_path, 'w') as f:
-    #         json.dump({'images': images, 'annotations': annotations, 'categories': categories}, f)
-    #
-    #     return self._coco_path
-
 
 if __name__ == '__main__':
 
@@ -172,9 +191,4 @@ def stats(self):
     datasets.append(ImageNetVidDetection(splits=[(2017, 'val')], allow_empty=True, every=25, window=[1, 1]))
     print('Loaded VID')
 
-    cd = CombinedDetection(datasets)
-
-    print(cd.stats()[0])
-
-    # for s in cd:
-    #     print(s)
+    cd = CombinedDetection(datasets, class_tree=True)
diff --git a/datasets/mscoco.py b/datasets/mscoco.py
@@ -113,7 +113,7 @@ def _parse_image_path(self, entry):
 
         """
         dirname, filename = entry['coco_url'].split('/')[-2:]
-        abs_path = os.path.join(self.root, dirname, filename)
+        abs_path = os.path.join(self.root, 'images', dirname, filename)
         return abs_path
 
     def __len__(self):

diff --git a/datasets/trees/README.md b/datasets/trees/README.md
@@ -0,0 +1,13 @@
+### Files Information
+`.tree` files are of the format `child_id  parent_id`
+
+`9k.tree` - the original tree
+
+`filtered.tree` - the final filtered tree (without ImageNet-DET classes)
+
+`filtered_det.tree` - the final filtered tree (with ImageNet-DET classes)
+
+`new_parents.tree` - the old child - parent assignments before filtering
+
+`new_classes.txt` - replacement of class ids in format `old_id new_id` (used for merging classes across sets that are
+ the same but labelled with different ids in each set)
diff --git a/datasets/trees/new_classes.txt b/datasets/trees/new_classes.txt
@@ -0,0 +1,6 @@
+n04530566   n02858304
+n07695965   n00021265
+n03614007   n03085013
+n03046257   n03196217
+n03062245   n04381994
+n04039381   n04409806
diff --git a/models/definitions/yolo/transforms.py b/models/definitions/yolo/transforms.py
@@ -140,121 +140,6 @@ def __call__(self, src, label, idx=None):
         return img, bbox.astype(img.dtype)
 
 
-class YOLO3VideoTrainTransformOld(object):  # todo delete... new one allows both single and t label output
-    """Video YOLO training transform which includes tons of image augmentations.
-    Parameters
-    ----------
-    width : int
-        Image width.
-    height : int
-        Image height.
-    net : mxnet.gluon.HybridBlock, optional
-        The yolo network.
-        .. hint::
-            If net is ``None``, the transformation will not generate training targets.
-            Otherwise it will generate training targets to accelerate the training phase
-            since we push some workload to CPU workers instead of GPUs.
-    mean : array-like of size 3
-        Mean pixel values to be subtracted from image tensor. Default is [0.485, 0.456, 0.406].
-    std : array-like of size 3
-        Standard deviation to be divided from image. Default is [0.229, 0.224, 0.225].
-    iou_thresh : float
-        IOU overlap threshold for maximum matching, default is 0.5.
-    box_norm : array-like of size 4, default is (0.1, 0.1, 0.2, 0.2)
-        Std value to be divided from encoded values.
-    """
-
-    def __init__(self, k, width, height, net=None, mean=(0.485, 0.456, 0.406),
-                 std=(0.229, 0.224, 0.225), mixup=False, **kwargs):
-        self._k = k
-        self._width = width
-        self._height = height
-        self._mean = mean
-        self._std = std
-        self._mixup = mixup
-        self._target_generator = None
-        if net is None:
-            return
-
-        # in case network has reset_ctx to gpu
-        if k > 1:
-            self._fake_x = mx.nd.zeros((1, k, 3, height, width))
-        else:
-            self._fake_x = mx.nd.zeros((1, 3, height, width))
-        net = copy.deepcopy(net)
-        net.collect_params().reset_ctx(None)
-        with autograd.train_mode():
-            _, self._anchors, self._offsets, self._feat_maps, _, _, _, _ = net(self._fake_x)
-
-        self._fake_x = mx.nd.zeros((1, 3, height, width))
-        # from gluoncv.model_zoo.yolo.yolo_target import YOLOV3PrefetchTargetGenerator
-        self._target_generator = YOLOV3PrefetchTargetGenerator(num_class=len(net.classes), **kwargs)
-
-    def __call__(self, src, label):
-        """Apply transform to training image/label."""
-
-        img = src
-        was_three = False
-        if len(img.shape) == 3:
-            img = mx.nd.expand_dims(img, axis=0)
-            was_three = True
-
-        # random color jittering
-        img = tvideo.random_color_distort(img)
-
-        # random expansion with prob 0.5
-        if np.random.uniform(0, 1) > 0.5:
-            img, expand = tvideo.random_expand(img, fill=[m * 255 for m in self._mean])
-            bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1])
-        else:
-            img, bbox = img, label
-
-        # random cropping
-        k, h, w, c = img.shape
-        bbox, crop = experimental.bbox.random_crop_with_constraints(bbox, (w, h))
-        x0, y0, w, h = crop
-        img = img[:, y0:y0 + h, x0:x0 + w, :]
-
-        # resize with random interpolation
-        k, h, w, c = img.shape
-        interp = np.random.randint(0, 5)
-        tmp = mx.nd.ones((k, self._height, self._width, c), ctx=img.context)
-        for i in range(k):
-            tmp[i] = timage.imresize(img[i], self._width, self._height, interp=interp)
-        img = tmp
-        bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))
-
-        # random horizontal flip with prob 0.5
-        k, h, w, c = img.shape
-        if np.random.uniform(0, 1) > 0.5:
-            img = mx.nd.flip(img, axis=2)
-            bbox = tbbox.flip(bbox, (w, h), flip_x=True)
-
-        img = mx.nd.image.to_tensor(img)  # to tensor, also transforms from k,h,w,c to k,c,h,w
-        # normalise
-        for i in range(k):
-            img[i] = mx.nd.image.normalize(img[i], mean=self._mean, std=self._std)  # normalise
-
-        if was_three:  # remove the k dimension so backwards compat with single frame
-            img = mx.nd.squeeze(img)
-
-        if self._target_generator is None:
-            return img, bbox.astype(img.dtype)
-
-        # generate training target so cpu workers can help reduce the workload on gpu
-        gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
-        gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
-        if self._mixup:
-            gt_mixratio = mx.nd.array(bbox[np.newaxis, :, -1:])
-        else:
-            gt_mixratio = None
-        objectness, center_targets, scale_targets, weights, class_targets = self._target_generator(
-            self._fake_x, self._feat_maps, self._anchors, self._offsets,
-            gt_bboxes, gt_ids, gt_mixratio)
-        return (img, objectness[0], center_targets[0], scale_targets[0], weights[0],
-                class_targets[0], gt_bboxes[0])
-
-
 class YOLO3VideoTrainTransform(object):
     """Video YOLO training transform which includes tons of image augmentations.
 
@@ -280,7 +165,7 @@ class YOLO3VideoTrainTransform(object):
         Std value to be divided from encoded values.
     """
     def __init__(self, k, width, height, net=None, mean=(0.485, 0.456, 0.406),
-                 std=(0.229, 0.224, 0.225), mixup=False, **kwargs):
+                 std=(0.229, 0.224, 0.225), mixup=False, num_classes=-1, **kwargs):
         self._k = k
         self._width = width
         self._height = height
@@ -292,6 +177,11 @@ def __init__(self, k, width, height, net=None, mean=(0.485, 0.456, 0.406),
         if net is None:
             return
 
+        if num_classes < 0:
+            self._num_classes = len(net.classes)
+        else:
+            self._num_classes = num_classes
+
         # in case network has reset_ctx to gpu
         if k > 1:
             self._fake_x = mx.nd.zeros((1, k, 3, height, width))
@@ -369,7 +259,11 @@ def __call__(self, src, label):
         for ts, bbox in enumerate(bboxs):
             # generate training target so cpu workers can help reduce the workload on gpu
             gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
-            gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
+            if bbox.shape[-1] == 6:  # one class
+                gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
+            else:
+                gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:4+self._num_classes])  # allow multiple classes in form of 1-hotish vector
+
             if self._mixup:
                 gt_mixratio = mx.nd.array(bbox[np.newaxis, :, -1:])
             else: