Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
HaydenFaulkner committed Jan 21, 2020
2 parents 7ff982f + 547d62b commit 26ee424
Show file tree
Hide file tree
Showing 10 changed files with 238 additions and 221 deletions.
140 changes: 77 additions & 63 deletions datasets/combined.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from gluoncv.data.base import VisionDataset
import os

import mxnet as mx
from nltk.corpus import wordnet as wn


Expand All @@ -28,7 +28,10 @@ def __init__(self, datasets, root=os.path.join('datasets', 'combined'), class_tr
self._root = os.path.expanduser(root)
self._class_tree = class_tree
self._samples = self._load_samples()
_, _, self._dataset_class_map = self._get_classes()
_, _, self._dataset_class_map, self._parents = self._get_classes()

self.class_levels = self.get_levels()
self.leaves = self.get_leaves()

def __str__(self):
return '\n\n' + self.__class__.__name__ + '\n' + self.stats()[0] + '\n'
Expand All @@ -45,22 +48,34 @@ def _get_classes(self):
if self._class_tree:
with open(os.path.join('datasets', 'trees', 'filtered_det.tree'), 'r') as f:
lines = f.readlines()
lines = [l.rstrip().split for l in lines]
lines = [l.rstrip().split() for l in lines]
parents = dict()
for cls in lines:
classes_wn.append(cls[0])
classes.append(id_to_name(cls[0]))
parents[cls[0]] = cls[1]

for dataset_idx, dataset in enumerate(self._datasets):
dataset_class_map = list()
for wn_cls in dataset.wn_classes:
if wn_cls not in classes_wn:
classes_wn.append(wn_cls)
classes.append(cls)
# handle swapping of ids
with open(os.path.join('datasets', 'trees', 'new_classes.txt'), 'r') as f:
lines = f.readlines()
lines = [l.rstrip().split() for l in lines]
swap_ids = dict()
for ids in lines:
swap_ids[ids[0]] = ids[1]

dataset_class_map.append(classes_wn.index(wn_cls))
dataset_class_maps.append(dataset_class_map)
for dataset_idx, dataset in enumerate(self._datasets):
dataset_class_map = list()
for cls in dataset.wn_classes:
if cls not in classes_wn:
if self._class_tree: # take into account where a swap needs to be done to the id
assert cls in swap_ids, '%s not in swap_ids, should be added to new_classes.txt' % cls
cls = swap_ids[cls]
else:
classes_wn.append(cls)
classes.append(id_to_name(cls))

dataset_class_map.append(classes_wn.index(cls))
dataset_class_maps.append(dataset_class_map)
return classes, classes_wn, dataset_class_maps, parents

@property
Expand All @@ -73,6 +88,32 @@ def wn_classes(self):
"""Category names."""
return self._get_classes()[1]

def get_levels(self):
levels = list()
for c in self.wn_classes:
lvl = 0
p = c
while p != 'ROOT':
p = self._parents[p]
lvl += 1
levels.append(lvl)
return levels

def get_leaves(self):
is_parent = set()

for c in self.wn_classes:
is_parent.add(self._parents[c])

leaves = list()
for c in self.wn_classes:
if c in is_parent:
leaves.append(0)
else:
leaves.append(1)

return leaves

def __len__(self):
return len(self._samples)

Expand All @@ -81,15 +122,34 @@ def __getitem__(self, idx):
dataset = self._datasets[dataset_idx]

# fix class id
sample = dataset[dataset_sample_idx]
for si in range(len(sample[1])):
sample[1][si][4] = float(self._dataset_class_map[dataset_idx][int(sample[1][si][4])])
return sample
sample = list(dataset[dataset_sample_idx])
if self._class_tree:
boxes = mx.nd.zeros((sample[1].shape[0], 4 + len(self.classes)))
boxes[:, :4] = sample[1][:, :4]

for bi in range(len(sample[1])):
cls = int(self._dataset_class_map[dataset_idx][int(sample[1][bi][4])])
if cls < 0:
boxes[bi, :] = -1
continue
clss = [cls+4]
while self.wn_classes[cls] in self._parents:
if self._parents[self.wn_classes[cls]] == 'ROOT': break
cls = self.wn_classes.index(self._parents[self.wn_classes[cls]])
clss.append(cls+4)
clss.reverse()
boxes[bi, clss] = 1
sample[1] = boxes.asnumpy()
else:
for bi in range(len(sample[1])):
sample[1][bi][4] = float(self._dataset_class_map[dataset_idx][int(sample[1][bi][4])])

return sample[0], sample[1]

def _load_samples(self):
samples = []
for dataset_idx, dataset in enumerate(self._datasets):
for idx, item in enumerate(dataset):
for idx in range(len(dataset)):
samples.append((dataset_idx, idx, len(samples)))
return samples

Expand All @@ -113,47 +173,6 @@ def stats(self):

return out_str, cls_boxes

# def build_coco_json(self):
#
# os.makedirs(os.path.dirname(self._coco_path), exist_ok=True)
#
# # handle categories
# categories = list()
# for ci, (cls, wn_cls) in enumerate(zip(self.classes, self.wn_classes)):
# categories.append({'id': ci, 'name': cls, 'wnid': wn_cls})
#
# # handle images and boxes
# images = list()
# done_imgs = set()
# annotations = list()
# for idx in range(len(self)):
# dataset, dataset_idx, id, dataset_id = self._items[idx]
# img_id = dataset._items[dataset_idx]
# filename = dataset._anno_path.format(*img_id)
# width, height = dataset._im_shapes[dataset_idx]
#
# img_id = self.image_ids[idx]
# if img_id not in done_imgs:
# done_imgs.add(img_id)
# images.append({'file_name': filename,
# 'width': int(width),
# 'height': int(height),
# 'id': img_id})
#
# for box in self._load_label(idx):
# xywh = [int(box[0]), int(box[1]), int(box[2])-int(box[0]), int(box[3])-int(box[1])]
# annotations.append({'image_id': img_id,
# 'id': len(annotations),
# 'bbox': xywh,
# 'area': int(xywh[2] * xywh[3]),
# 'category_id': int(box[4]),
# 'iscrowd': 0})
#
# with open(self._coco_path, 'w') as f:
# json.dump({'images': images, 'annotations': annotations, 'categories': categories}, f)
#
# return self._coco_path


if __name__ == '__main__':

Expand All @@ -172,9 +191,4 @@ def stats(self):
datasets.append(ImageNetVidDetection(splits=[(2017, 'val')], allow_empty=True, every=25, window=[1, 1]))
print('Loaded VID')

cd = CombinedDetection(datasets)

print(cd.stats()[0])

# for s in cd:
# print(s)
cd = CombinedDetection(datasets, class_tree=True)
2 changes: 1 addition & 1 deletion datasets/mscoco.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def _parse_image_path(self, entry):
"""
dirname, filename = entry['coco_url'].split('/')[-2:]
abs_path = os.path.join(self.root, dirname, filename)
abs_path = os.path.join(self.root, 'images', dirname, filename)
return abs_path

def __len__(self):
Expand Down
13 changes: 13 additions & 0 deletions datasets/trees/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
### Files Information
`.tree` files are of the format `child_id parent_id`

`9k.tree` - the original tree

`filtered.tree` - the final filtered tree (without ImageNet-DET classes)

`filtered_det.tree` - the final filtered tree (with ImageNet-DET classes)

`new_parents.tree` - the old child - parent assignments before filtering

`new_classes.txt` - replacement of class ids in format `old_id new_id` (used for merging classes across sets that are
the same but labelled with different ids in each set)
6 changes: 6 additions & 0 deletions datasets/trees/new_classes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
n04530566 n02858304
n07695965 n00021265
n03614007 n03085013
n03046257 n03196217
n03062245 n04381994
n04039381 n04409806
128 changes: 11 additions & 117 deletions models/definitions/yolo/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,121 +140,6 @@ def __call__(self, src, label, idx=None):
return img, bbox.astype(img.dtype)


class YOLO3VideoTrainTransformOld(object): # todo delete... new one allows both single and t label output
"""Video YOLO training transform which includes tons of image augmentations.
Parameters
----------
width : int
Image width.
height : int
Image height.
net : mxnet.gluon.HybridBlock, optional
The yolo network.
.. hint::
If net is ``None``, the transformation will not generate training targets.
Otherwise it will generate training targets to accelerate the training phase
since we push some workload to CPU workers instead of GPUs.
mean : array-like of size 3
Mean pixel values to be subtracted from image tensor. Default is [0.485, 0.456, 0.406].
std : array-like of size 3
Standard deviation to be divided from image. Default is [0.229, 0.224, 0.225].
iou_thresh : float
IOU overlap threshold for maximum matching, default is 0.5.
box_norm : array-like of size 4, default is (0.1, 0.1, 0.2, 0.2)
Std value to be divided from encoded values.
"""

def __init__(self, k, width, height, net=None, mean=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225), mixup=False, **kwargs):
self._k = k
self._width = width
self._height = height
self._mean = mean
self._std = std
self._mixup = mixup
self._target_generator = None
if net is None:
return

# in case network has reset_ctx to gpu
if k > 1:
self._fake_x = mx.nd.zeros((1, k, 3, height, width))
else:
self._fake_x = mx.nd.zeros((1, 3, height, width))
net = copy.deepcopy(net)
net.collect_params().reset_ctx(None)
with autograd.train_mode():
_, self._anchors, self._offsets, self._feat_maps, _, _, _, _ = net(self._fake_x)

self._fake_x = mx.nd.zeros((1, 3, height, width))
# from gluoncv.model_zoo.yolo.yolo_target import YOLOV3PrefetchTargetGenerator
self._target_generator = YOLOV3PrefetchTargetGenerator(num_class=len(net.classes), **kwargs)

def __call__(self, src, label):
"""Apply transform to training image/label."""

img = src
was_three = False
if len(img.shape) == 3:
img = mx.nd.expand_dims(img, axis=0)
was_three = True

# random color jittering
img = tvideo.random_color_distort(img)

# random expansion with prob 0.5
if np.random.uniform(0, 1) > 0.5:
img, expand = tvideo.random_expand(img, fill=[m * 255 for m in self._mean])
bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1])
else:
img, bbox = img, label

# random cropping
k, h, w, c = img.shape
bbox, crop = experimental.bbox.random_crop_with_constraints(bbox, (w, h))
x0, y0, w, h = crop
img = img[:, y0:y0 + h, x0:x0 + w, :]

# resize with random interpolation
k, h, w, c = img.shape
interp = np.random.randint(0, 5)
tmp = mx.nd.ones((k, self._height, self._width, c), ctx=img.context)
for i in range(k):
tmp[i] = timage.imresize(img[i], self._width, self._height, interp=interp)
img = tmp
bbox = tbbox.resize(bbox, (w, h), (self._width, self._height))

# random horizontal flip with prob 0.5
k, h, w, c = img.shape
if np.random.uniform(0, 1) > 0.5:
img = mx.nd.flip(img, axis=2)
bbox = tbbox.flip(bbox, (w, h), flip_x=True)

img = mx.nd.image.to_tensor(img) # to tensor, also transforms from k,h,w,c to k,c,h,w
# normalise
for i in range(k):
img[i] = mx.nd.image.normalize(img[i], mean=self._mean, std=self._std) # normalise

if was_three: # remove the k dimension so backwards compat with single frame
img = mx.nd.squeeze(img)

if self._target_generator is None:
return img, bbox.astype(img.dtype)

# generate training target so cpu workers can help reduce the workload on gpu
gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
if self._mixup:
gt_mixratio = mx.nd.array(bbox[np.newaxis, :, -1:])
else:
gt_mixratio = None
objectness, center_targets, scale_targets, weights, class_targets = self._target_generator(
self._fake_x, self._feat_maps, self._anchors, self._offsets,
gt_bboxes, gt_ids, gt_mixratio)
return (img, objectness[0], center_targets[0], scale_targets[0], weights[0],
class_targets[0], gt_bboxes[0])


class YOLO3VideoTrainTransform(object):
"""Video YOLO training transform which includes tons of image augmentations.
Expand All @@ -280,7 +165,7 @@ class YOLO3VideoTrainTransform(object):
Std value to be divided from encoded values.
"""
def __init__(self, k, width, height, net=None, mean=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225), mixup=False, **kwargs):
std=(0.229, 0.224, 0.225), mixup=False, num_classes=-1, **kwargs):
self._k = k
self._width = width
self._height = height
Expand All @@ -292,6 +177,11 @@ def __init__(self, k, width, height, net=None, mean=(0.485, 0.456, 0.406),
if net is None:
return

if num_classes < 0:
self._num_classes = len(net.classes)
else:
self._num_classes = num_classes

# in case network has reset_ctx to gpu
if k > 1:
self._fake_x = mx.nd.zeros((1, k, 3, height, width))
Expand Down Expand Up @@ -369,7 +259,11 @@ def __call__(self, src, label):
for ts, bbox in enumerate(bboxs):
# generate training target so cpu workers can help reduce the workload on gpu
gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4])
gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
if bbox.shape[-1] == 6: # one class
gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5])
else:
gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:4+self._num_classes]) # allow multiple classes in form of 1-hotish vector

if self._mixup:
gt_mixratio = mx.nd.array(bbox[np.newaxis, :, -1:])
else:
Expand Down
Loading

0 comments on commit 26ee424

Please sign in to comment.