initial implementation

Nikolaos · Nikolaos · commit 8789289f7d86 · 2018-09-04T17:25:01.000-05:00
diff --git a/README.md b/README.md
@@ -1 +1,74 @@
-# imbalanced_learning
+This repository re-implements the ECCV 2018 paper [Deep Imbalanced Attribute Classification using Visual Attention Aggregation](https://arxiv.org/abs/1807.03903) 
+
+If you use this code, please mention this repo and cite the paper:
+```
+@InProceedings{Sarafianos_2018_ECCV,
+author = {Sarafianos, Nikolaos and Xu, Xiang and Kakadiaris, Ioannis A.},
+title = {Deep Imbalanced Attribute Classification using Visual Attention Aggregation},
+booktitle = {ECCV},
+year = {2018}
+}
+
+# Development Environment
+
+* Python 3.5
+
+* MXNet with CUDA-9
+```
+$ pip install --upgrade mxnet-cu90
+```
+* Add project path to ```PYTHONPATH```
+```
+$ export PYTHONPATH=/project/path:$PYTHONPATH
+$ cd /project/path
+```
+
+# Download Datasets
+
+* WIDER-Attribute: The original images and the annotation files are provided [here](http://mmlab.ie.cuhk.edu.hk/projects/WIDERAttribute.html), cropped images for each human bounding box can be downloaded [here](https://github.com/zhufengx/SRN_multilabel). 28,340 cropped images in "train" and "val" for training, 29,177 cropped images in "test" for testing.
+
+* PETA: The original images and the annotation files are provided [here](http://mmlab.ie.cuhk.edu.hk/projects/PETA.html). The train/val/test splits as well as the class ratio of the selected 35 attributes we used were obtained can be downloaded [here](https://github.com/asc-kit/vespa/tree/master/generated). 
+
+# Prepare Data
+
+In both datasets all records, list and txt files are provided in `records/`
+
+## PETA
+
+* Place the PETA dataset under the path `/dataset/path/PETA/PETA_dataset/` and copy paste the folder while renaming it to `/dataset/path/PETA/PETA_preproc/`. 
+
+* Call the `resize_images` function from `preprocessing/` to resize all images to 256x256 and save them. 
+
+* Then using the train/val/text files call `preprocessing/` which will create the .lst files for each set and save them. 
+
+* From the initial MXNet download you should be able to find in the `tools/` the `im2rec.py` [file](https://github.com/apache/incubator-mxnet/blob/master/tools/im2rec.py). Open a terminal and type:
+
+```
+$ cd /incubator-mxnet/tools/
+$ python im2rec.py /project/path/peta_att /dataset/path/PETA/ --quality=100 --pack-label=True
+```
+
+This will create the record files to feed to the iterator. 
+
+## WIDER-Attribute
+
+* Place the WIDER-Attribute dataset under the path `/dataset/path/WIDER/`. Then copy paste the images and rename as before to `Image_cropped/`. A similar approach is required in here with in which the images are resized using the function in `preprocessing/`.
+
+* Place the downloaded annotation text files under `/dataset/path/WIDER/wider_att/`.
+
+* Call the `data_prep` function from `preprocessing/` to obtain the image and annotation files and save them to .lst files. 
+
+* Simlarly with above run:
+
+```
+$ cd /incubator-mxnet/tools/
+$ python im2rec.py /project/path/DeepVisualAttributes /dataset/path/WIDER --quality=100 --pack-label=True
+```
+
+This will creat the record files to `wider_records/` to feed to the iterator.
+
+## Run the Code
+
+* For the WIDER dataset go the respective folder and run `main.py`. 
+
+* Remember to provide as an input argumenet the data path. 
diff --git a/attention.py b/attention.py
@@ -0,0 +1,51 @@
+import numpy as np
+from mxnet import nd
+from models import get_conv2D, get_fatt
+from mxnet import gluon
+def get_action_labels(path_list):
+    action_list = []
+    for p in path_list:
+        action_list.append(p.split("/")[3].split("--")[1])
+    d = {key: value for (value, key) in enumerate(set(action_list))}
+
+    actions = []
+    for action in action_list:
+        actions.append(d[action])
+    return np.array(actions)
+
+
+def compute_attention(features, fconv, fatt):
+    output_conv = fconv(features)
+    output_att = fatt(features)
+    temp_f = nd.reshape(output_att,
+                        (output_att.shape[0] * output_att.shape[1], output_att.shape[2] * output_att.shape[3]))
+    spatial_softmax = nd.reshape(nd.softmax(temp_f),
+                                 (output_att.shape[0], output_att.shape[1], output_att.shape[2], output_att.shape[3]))
+    return output_conv, spatial_softmax
+
+def attention_net_trainer(lr_scheduler, classes, args, stride, ctx):
+    fconv_stg = get_conv2D(classes, stride, ctx)
+    fatt_stg = get_fatt(classes, stride, ctx)
+
+    trainer_conv, trainer_att = [], []
+    if not args.test:
+        trainer_conv = gluon.Trainer(fconv_stg.collect_params(), optimizer='sgd',
+                                     optimizer_params={'lr_scheduler': lr_scheduler,
+                                                       'momentum': args.mom,
+                                                       'wd': args.wd})
+
+        trainer_att = gluon.Trainer(fatt_stg.collect_params(), optimizer='sgd',
+                                    optimizer_params={'lr_scheduler': lr_scheduler,
+                                                      'momentum': args.mom,
+                                                      'wd': args.wd})
+
+    return fconv_stg, fatt_stg, trainer_conv, trainer_att
+
+
+def attention_cl(lr_scheduler, args, ctx, kernel_size = 14):
+    fsr_stg = get_fsr(args.num_classes, ctx, kernel_size)
+    trainer_sr = gluon.Trainer(fsr_stg.collect_params(), optimizer='sgd',
+                               optimizer_params={'lr_scheduler': lr_scheduler,
+                                                 'momentum': args.mom,
+                                                 'wd': args.wd})
+    return fsr_stg, trainer_sr
diff --git a/evaluation.py b/evaluation.py
@@ -0,0 +1,53 @@
+import numpy as np
+from sklearn.metrics import average_precision_score
+from utilities import prettyfloat
+
+def evaluate_mAP(labels, predictions, testingFlag = False):
+
+    def mAP(scores, labels):
+        # unspecified = np.where(labels==0)[0]
+        # scores = np.delete(scores, unspecified)
+        # labels = np.delete(labels, unspecified)
+
+        num_truths = sum(labels == 1)
+        sort_ids = scores.argsort()[::-1]
+        fp = np.cumsum(labels[sort_ids] == -1)
+        tp = np.cumsum(labels[sort_ids] == 1)
+        rec = tp / float(num_truths)
+        prec = np.true_divide(tp, fp + tp)
+
+        mrec = np.concatenate((np.array([0]), rec, np.array([1])), axis=0)
+        mpre = np.concatenate((np.array([0]), prec, np.array([0])), axis=0)
+        for i in range(mpre.shape[0] - 2, 0, -1):
+            mpre[i] = max(mpre[i], mpre[i + 1])
+
+        i = np.where(mrec[1:] != mrec[0:-1])[0] + 1
+        ap = sum((mrec[i] - mrec[i - 1]) * mpre[i])
+        return ap
+
+    APs = []
+    for att in range(np.array(predictions).shape[1]):
+        if testingFlag:
+            APs.append(mAP(predictions[:, att], labels[:, att]))
+        else:
+            APs.append(average_precision_score(labels[:, att], predictions[:, att]))
+    mAP = sum(APs) / float(len(APs))
+    return mAP, APs
+
+
+def results(labels_tr, predicts_tr, labels_val, predicts_val, epoch, moving_loss_tr, moving_loss_val, elapsed_time):
+    predicts_tr, labels_tr = np.array(predicts_tr), np.array(labels_tr)
+    predicts_val, labels_val = np.array(predicts_val), np.array(labels_val)
+
+    train_mAP, train_APs = evaluate_mAP(labels_tr, predicts_tr)
+    val_mAP, val_APs = evaluate_mAP(labels_val, predicts_val)
+
+    print("Epoch [%d]: Train-Loss=%f" % (epoch, moving_loss_tr))
+    print("Epoch [%d]: Val-Loss=%f" % (epoch, moving_loss_val))
+    print("Epoch [%d]: Train-mAP=%f" % (epoch, train_mAP))
+    print("Epoch [%d]: Val-mAP=%f" % (epoch, val_mAP))
+    print("Epoch [%d]: Elapsed-time=%f" % (epoch, elapsed_time))
+    print(map(prettyfloat, train_APs))
+    print(map(prettyfloat, val_APs))
+
+    return train_mAP, train_APs, val_mAP, val_APs
diff --git a/main.py b/main.py
@@ -0,0 +1,33 @@
+import argparse
+import mxnet as mx
+import os
+from wider_training import train
+from wider_testing import test
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser('Deep Imbalanced Classification')
+    parser.add_argument('--data_path', help='data directory')
+    parser.add_argument('--epochs', default=250, type=int, help='epochs')
+    parser.add_argument('--lr', default=0.0001, type=float, help='learning rate')
+    parser.add_argument('--wd', default=0.0005, type=float, help='weight decay')
+    parser.add_argument('--mom', default=0.9, type=float, help='momentum')
+    parser.add_argument('--batch_size', default=24, type=int, help='batch size')
+    parser.add_argument('--num_classes', default=14, type=int, help='number of classes')
+    parser.add_argument('--finetune', action='store_true', help='fine tune backbone architecture or not?')
+    parser.add_argument('--test', action='store_true', help='testing')
+
+    args = parser.parse_args()
+
+    # Parameter Naming
+    params_name = 'saved_models/base_resNet.params'
+
+    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+
+    ctx = mx.gpu()
+
+    if args.test:
+        test(args, ctx)
+    else:
+        train(args, ctx)
+
diff --git a/models.py b/models.py
@@ -0,0 +1,96 @@
+from mxnet.gluon.model_zoo import vision
+from mxnet.gluon import nn
+import mxnet as mx
+from mxnet import gluon
+
+
+def get_fsr(num_classes, ctx, kernel_size):
+    net = nn.Sequential()
+    with net.name_scope():
+        net.add(nn.Conv2D(channels=256, kernel_size=1))
+        net.add(nn.BatchNorm())
+        net.add(nn.Activation('relu'))
+        net.add(nn.Conv2D(channels=512, kernel_size=1))
+        net.add(nn.BatchNorm())
+        net.add(nn.Activation('relu'))
+        net.add(nn.Conv2D(channels=1024, kernel_size=kernel_size))
+        net.add(nn.BatchNorm())
+        net.add(nn.Activation('relu'))
+        net.add(nn.Dense(num_classes, flatten=True))
+    net.collect_params().initialize(mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2), ctx=ctx)
+
+    return net
+
+
+def get_fatt(num_classes, stride, ctx):
+    net = nn.Sequential()
+    with net.name_scope():
+        net.add(nn.Conv2D(channels=512, kernel_size=1))
+        net.add(nn.BatchNorm())
+        net.add(nn.Activation('relu'))
+        net.add(nn.Conv2D(channels=512, kernel_size=3, padding=1))
+        net.add(nn.BatchNorm())
+        net.add(nn.Activation('relu'))
+        # net.add(nn.Conv2D(channels=512, kernel_size=3, padding=1))
+        # net.add(nn.BatchNorm())
+        # net.add(nn.Activation('relu'))
+        net.add(nn.Conv2D(channels=num_classes, kernel_size=1, strides=stride))
+    net.collect_params().initialize(mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2), ctx=ctx)
+    return net
+
+
+def get_conv2D(num_classes, stride, ctx):
+    net = nn.Sequential()
+    with net.name_scope():
+        net.add(nn.Conv2D(channels=num_classes, kernel_size=1, strides=stride))
+        net.add(nn.Activation('sigmoid'))
+    net.collect_params().initialize(mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2), ctx=ctx)
+    return net
+
+
+def getResNet(num_classes, ctx, NoTraining=True):
+    resnet = vision.resnet101_v1(pretrained=True, ctx=ctx)
+
+    net = vision.resnet101_v1(classes=num_classes, prefix='resnetv10_')
+    with net.name_scope():
+        net.output = nn.Dense(num_classes, flatten=True, in_units=resnet.output._in_units)
+        net.output.collect_params().initialize(
+            mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2), ctx=ctx)
+        net.features = resnet.features
+
+    net.collect_params().reset_ctx(ctx)
+
+    inputs = mx.sym.var('data')
+    out = net(inputs)
+    internals = out.get_internals()
+    outputs = [internals['resnetv10_stage3_activation19_output'], internals['resnetv10_stage3_activation22_output'], internals['resnetv10_stage4_activation2_output'],
+               internals['resnetv10_dense1_fwd_output']]
+    feat_model = gluon.SymbolBlock(outputs, inputs, params=net.collect_params())
+    feat_model._prefix = 'resnetv10_'
+    if NoTraining:
+        feat_model.collect_params().setattr('grad_req', 'null')
+    return feat_model
+
+
+
+def getDenseNet(num_classes, ctx):
+    densenet = vision.densenet201(pretrained=True, ctx=ctx)
+
+    net = vision.densenet201(classes=num_classes, prefix='densenet0_')
+    with net.name_scope():
+        net.output = nn.Dense(num_classes, flatten=True)
+        net.output.collect_params().initialize(
+            mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2), ctx=ctx)
+        net.features = densenet.features
+
+    net.collect_params().reset_ctx(ctx)
+
+    inputs = mx.sym.var('data')
+    out = net(inputs)
+    internals = out.get_internals()
+    outputs = [internals['densenet0_conv3_fwd_output'], internals['densenet0_stage4_concat15_output'],
+               internals['densenet0_dense1_fwd_output']]
+    feat_model = gluon.SymbolBlock(outputs, inputs, params=net.collect_params())
+    feat_model._prefix = 'densenet0_'
+
+    return feat_model
diff --git a/preprocessing.py b/preprocessing.py
@@ -0,0 +1,43 @@
+from PIL import Image
+import numpy as np
+import matplotlib.pyplot as plt
+import csv
+from scipy import misc
+from utils import get_data
+def resize_images():
+
+    full_path = ADD YOUR DATA PATH HERE
+
+    image_path = full_path
+    annotation_path = full_path + 'Annotations/'
+    size = (256, 256)
+
+    all_im_list_tr = np.array([line.rstrip('\n')[1:-2] for line in open(full_path+'wider_att/wider_att_train_imglist.txt')])
+    im_list_test = np.array([line.rstrip('\n')[1:-2] for line in open(full_path+'wider_att/wider_att_test_imglist.txt')])
+
+    # Saves Images to the same folder. Make a copy of the initial folder first
+    for im in all_im_list_tr:
+        img = Image.open(image_path + im[1:])
+        img_res = img.resize(size, Image.ANTIALIAS)
+        img_res.save(full_path[:-1] + im, 'JPEG')
+
+    for im in im_list_test:
+        img = Image.open(image_path + im[1:])
+        img_res = img.resize(size, Image.ANTIALIAS)
+        img_res.save(full_path[:-1] + im, 'JPEG')
+
+
+def save2lists(im_list, att_list, filename):
+    L = []
+    for c, im in enumerate(im_list):
+        tmp = list(att_list[c])
+        L.append([str(c)]+map(str,tmp)+[str(im)])
+    with open(filename, 'w') as f:
+        writer = csv.writer(f, delimiter='\t')
+        writer.writerows(L)
+
+def data_prep(full_path):
+    im_list_tr, att_list_tr, im_list_val, att_list_val, im_list_test, att_list_test = get_data(full_path)
+    save2lists(im_list_tr, att_list_tr,'training_list.lst')
+    save2lists(im_list_val, att_list_val,'valid_list.lst')
+    save2lists(im_list_test, att_list_test,'testing_list.lst')
diff --git a/utilities.py b/utilities.py
diff --git a/wider_testing.py b/wider_testing.py
diff --git a/wider_training.py b/wider_training.py