diff --git a/datasets/combined.py b/datasets/combined.py index 37e518c..0dd8d2a 100644 --- a/datasets/combined.py +++ b/datasets/combined.py @@ -3,11 +3,17 @@ from gluoncv.data.base import VisionDataset import os +from nltk.corpus import wordnet as wn + + +def id_to_name(id): + return wn.synset_from_pos_and_offset('n', int(id[1:]))._name + class CombinedDetection(VisionDataset): """Combined detection Dataset.""" - def __init__(self, datasets, root=os.path.join('datasets', 'combined'), index_map=None): + def __init__(self, datasets, root=os.path.join('datasets', 'combined'), class_tree=False): """ :param datasets: list of dataset objects :param root: root path to store the dataset, str, default '/datasets/combined/' @@ -20,6 +26,7 @@ def __init__(self, datasets, root=os.path.join('datasets', 'combined'), index_ma self._datasets = datasets self._root = os.path.expanduser(root) + self._class_tree = class_tree self._samples = self._load_samples() _, _, self._dataset_class_map = self._get_classes() @@ -31,18 +38,30 @@ def _get_classes(self): Take the classes from each dataset and reassign for the combined dataset :return: """ - unique_classes = list() - unique_wn_classes = list() + classes = list() + classes_wn = list() dataset_class_maps = list() - for dataset_idx, dataset in enumerate(self._datasets): - dataset_class_map = list() - for cls, wn_cls in zip(dataset.classes, dataset.wn_classes): - if wn_cls not in unique_wn_classes: - unique_wn_classes.append(wn_cls) - unique_classes.append(cls) - dataset_class_map.append(unique_wn_classes.index(wn_cls)) - dataset_class_maps.append(dataset_class_map) - return unique_classes, unique_wn_classes, dataset_class_maps + parents = None + if self._class_tree: + with open(os.path.join('datasets', 'trees', 'filtered_det.tree'), 'r') as f: + lines = f.readlines() + lines = [l.rstrip().split for l in lines] + parents = dict() + for cls in lines: + classes_wn.append(cls[0]) + classes.append(id_to_name(cls[0])) + parents[cls[0]] = cls[1] + + for dataset_idx, dataset in enumerate(self._datasets): + dataset_class_map = list() + for wn_cls in dataset.wn_classes: + if wn_cls not in classes_wn: + classes_wn.append(wn_cls) + classes.append(cls) + + dataset_class_map.append(classes_wn.index(wn_cls)) + dataset_class_maps.append(dataset_class_map) + return classes, classes_wn, dataset_class_maps, parents @property def classes(self): @@ -137,6 +156,7 @@ def stats(self): if __name__ == '__main__': + from datasets.pascalvoc import VOCDetection from datasets.mscoco import COCODetection from datasets.imgnetdet import ImageNetDetection diff --git a/models/definitions/yolo/transforms.py b/models/definitions/yolo/transforms.py index c792eaf..2669118 100644 --- a/models/definitions/yolo/transforms.py +++ b/models/definitions/yolo/transforms.py @@ -10,6 +10,8 @@ from ...transforms import bbox as tbbox from ...transforms import video as tvideo +from models.definitions.yolo.yolo_target import YOLOV3PrefetchTargetGenerator + class YOLO3DefaultTrainTransform(object): """Default YOLO training transform which includes tons of image augmentations. @@ -51,7 +53,7 @@ def __init__(self, width, height, net=None, mean=(0.485, 0.456, 0.406), net.collect_params().reset_ctx(None) with autograd.train_mode(): _, self._anchors, self._offsets, self._feat_maps, _, _, _, _ = net(self._fake_x) - from gluoncv.model_zoo.yolo.yolo_target import YOLOV3PrefetchTargetGenerator + # from gluoncv.model_zoo.yolo.yolo_target import YOLOV3PrefetchTargetGenerator self._target_generator = YOLOV3PrefetchTargetGenerator( num_class=len(net.classes), **kwargs) @@ -93,7 +95,7 @@ def __call__(self, src, label): # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) - gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) + gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) # make the one-hot here if self._mixup: gt_mixratio = mx.nd.array(bbox[np.newaxis, :, -1:]) else: @@ -185,7 +187,7 @@ def __init__(self, k, width, height, net=None, mean=(0.485, 0.456, 0.406), _, self._anchors, self._offsets, self._feat_maps, _, _, _, _ = net(self._fake_x) self._fake_x = mx.nd.zeros((1, 3, height, width)) - from gluoncv.model_zoo.yolo.yolo_target import YOLOV3PrefetchTargetGenerator + # from gluoncv.model_zoo.yolo.yolo_target import YOLOV3PrefetchTargetGenerator self._target_generator = YOLOV3PrefetchTargetGenerator(num_class=len(net.classes), **kwargs) def __call__(self, src, label): @@ -301,7 +303,7 @@ def __init__(self, k, width, height, net=None, mean=(0.485, 0.456, 0.406), _, self._anchors, self._offsets, self._feat_maps, _, _, _, _ = net(self._fake_x) self._fake_x = mx.nd.zeros((1, 3, height, width)) - from gluoncv.model_zoo.yolo.yolo_target import YOLOV3PrefetchTargetGenerator + # from gluoncv.model_zoo.yolo.yolo_target import YOLOV3PrefetchTargetGenerator self._target_generator = YOLOV3PrefetchTargetGenerator(num_class=len(net.classes), **kwargs) def __call__(self, src, label): @@ -498,7 +500,7 @@ def __init__(self, k, width, height, net=None, mean=(0.485, 0.456, 0.406), net.collect_params().reset_ctx(None) with autograd.train_mode(): _, self._anchors, self._offsets, self._feat_maps, _, _, _, _ = net(*self._fake_x) - from gluoncv.model_zoo.yolo.yolo_target import YOLOV3PrefetchTargetGenerator + # from gluoncv.model_zoo.yolo.yolo_target import YOLOV3PrefetchTargetGenerator self._target_generator = YOLOV3PrefetchTargetGenerator(num_class=len(net.classes), **kwargs) def __call__(self, img, f1, f2, f3, bbox): diff --git a/models/definitions/yolo/yolo_target.py b/models/definitions/yolo/yolo_target.py index 0f7074d..b26fd89 100644 --- a/models/definitions/yolo/yolo_target.py +++ b/models/definitions/yolo/yolo_target.py @@ -122,6 +122,7 @@ def forward(self, img, xs, anchors, offsets, gt_boxes, gt_ids, gt_mixratio=None) objectness[b, index, match, 0] = ( np_gt_mixratios[b, m, 0] if np_gt_mixratios is not None else 1) class_targets[b, index, match, :] = 0 + # class_targets[b, index, match, int(np_gt_ids[b, m, :])] = 1 # todo fix for multi classes class_targets[b, index, match, int(np_gt_ids[b, m, 0])] = 1 # since some stages won't see partial anchors, so we have to slice the correct targets objectness = self._slice(objectness, num_anchors, num_offsets)