Skip to content

Commit 8789289

Browse files
author
Nikolaos
committed
initial implementation
1 parent 9a9af59 commit 8789289

File tree

9 files changed

+745
-1
lines changed

9 files changed

+745
-1
lines changed

README.md

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,74 @@
1-
# imbalanced_learning
1+
This repository re-implements the ECCV 2018 paper [Deep Imbalanced Attribute Classification using Visual Attention Aggregation](https://arxiv.org/abs/1807.03903)
2+
3+
If you use this code, please mention this repo and cite the paper:
4+
```
5+
@InProceedings{Sarafianos_2018_ECCV,
6+
author = {Sarafianos, Nikolaos and Xu, Xiang and Kakadiaris, Ioannis A.},
7+
title = {Deep Imbalanced Attribute Classification using Visual Attention Aggregation},
8+
booktitle = {ECCV},
9+
year = {2018}
10+
}
11+
12+
# Development Environment
13+
14+
* Python 3.5
15+
16+
* MXNet with CUDA-9
17+
```
18+
$ pip install --upgrade mxnet-cu90
19+
```
20+
* Add project path to ```PYTHONPATH```
21+
```
22+
$ export PYTHONPATH=/project/path:$PYTHONPATH
23+
$ cd /project/path
24+
```
25+
26+
# Download Datasets
27+
28+
* WIDER-Attribute: The original images and the annotation files are provided [here](http://mmlab.ie.cuhk.edu.hk/projects/WIDERAttribute.html), cropped images for each human bounding box can be downloaded [here](https://github.com/zhufengx/SRN_multilabel). 28,340 cropped images in "train" and "val" for training, 29,177 cropped images in "test" for testing.
29+
30+
* PETA: The original images and the annotation files are provided [here](http://mmlab.ie.cuhk.edu.hk/projects/PETA.html). The train/val/test splits as well as the class ratio of the selected 35 attributes we used were obtained can be downloaded [here](https://github.com/asc-kit/vespa/tree/master/generated).
31+
32+
# Prepare Data
33+
34+
In both datasets all records, list and txt files are provided in `records/`
35+
36+
## PETA
37+
38+
* Place the PETA dataset under the path `/dataset/path/PETA/PETA_dataset/` and copy paste the folder while renaming it to `/dataset/path/PETA/PETA_preproc/`.
39+
40+
* Call the `resize_images` function from `preprocessing/` to resize all images to 256x256 and save them.
41+
42+
* Then using the train/val/text files call `preprocessing/` which will create the .lst files for each set and save them.
43+
44+
* From the initial MXNet download you should be able to find in the `tools/` the `im2rec.py` [file](https://github.com/apache/incubator-mxnet/blob/master/tools/im2rec.py). Open a terminal and type:
45+
46+
```
47+
$ cd /incubator-mxnet/tools/
48+
$ python im2rec.py /project/path/peta_att /dataset/path/PETA/ --quality=100 --pack-label=True
49+
```
50+
51+
This will create the record files to feed to the iterator.
52+
53+
## WIDER-Attribute
54+
55+
* Place the WIDER-Attribute dataset under the path `/dataset/path/WIDER/`. Then copy paste the images and rename as before to `Image_cropped/`. A similar approach is required in here with in which the images are resized using the function in `preprocessing/`.
56+
57+
* Place the downloaded annotation text files under `/dataset/path/WIDER/wider_att/`.
58+
59+
* Call the `data_prep` function from `preprocessing/` to obtain the image and annotation files and save them to .lst files.
60+
61+
* Simlarly with above run:
62+
63+
```
64+
$ cd /incubator-mxnet/tools/
65+
$ python im2rec.py /project/path/DeepVisualAttributes /dataset/path/WIDER --quality=100 --pack-label=True
66+
```
67+
68+
This will creat the record files to `wider_records/` to feed to the iterator.
69+
70+
## Run the Code
71+
72+
* For the WIDER dataset go the respective folder and run `main.py`.
73+
74+
* Remember to provide as an input argumenet the data path.

attention.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import numpy as np
2+
from mxnet import nd
3+
from models import get_conv2D, get_fatt
4+
from mxnet import gluon
5+
def get_action_labels(path_list):
6+
action_list = []
7+
for p in path_list:
8+
action_list.append(p.split("/")[3].split("--")[1])
9+
d = {key: value for (value, key) in enumerate(set(action_list))}
10+
11+
actions = []
12+
for action in action_list:
13+
actions.append(d[action])
14+
return np.array(actions)
15+
16+
17+
def compute_attention(features, fconv, fatt):
18+
output_conv = fconv(features)
19+
output_att = fatt(features)
20+
temp_f = nd.reshape(output_att,
21+
(output_att.shape[0] * output_att.shape[1], output_att.shape[2] * output_att.shape[3]))
22+
spatial_softmax = nd.reshape(nd.softmax(temp_f),
23+
(output_att.shape[0], output_att.shape[1], output_att.shape[2], output_att.shape[3]))
24+
return output_conv, spatial_softmax
25+
26+
def attention_net_trainer(lr_scheduler, classes, args, stride, ctx):
27+
fconv_stg = get_conv2D(classes, stride, ctx)
28+
fatt_stg = get_fatt(classes, stride, ctx)
29+
30+
trainer_conv, trainer_att = [], []
31+
if not args.test:
32+
trainer_conv = gluon.Trainer(fconv_stg.collect_params(), optimizer='sgd',
33+
optimizer_params={'lr_scheduler': lr_scheduler,
34+
'momentum': args.mom,
35+
'wd': args.wd})
36+
37+
trainer_att = gluon.Trainer(fatt_stg.collect_params(), optimizer='sgd',
38+
optimizer_params={'lr_scheduler': lr_scheduler,
39+
'momentum': args.mom,
40+
'wd': args.wd})
41+
42+
return fconv_stg, fatt_stg, trainer_conv, trainer_att
43+
44+
45+
def attention_cl(lr_scheduler, args, ctx, kernel_size = 14):
46+
fsr_stg = get_fsr(args.num_classes, ctx, kernel_size)
47+
trainer_sr = gluon.Trainer(fsr_stg.collect_params(), optimizer='sgd',
48+
optimizer_params={'lr_scheduler': lr_scheduler,
49+
'momentum': args.mom,
50+
'wd': args.wd})
51+
return fsr_stg, trainer_sr

evaluation.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import numpy as np
2+
from sklearn.metrics import average_precision_score
3+
from utilities import prettyfloat
4+
5+
def evaluate_mAP(labels, predictions, testingFlag = False):
6+
7+
def mAP(scores, labels):
8+
# unspecified = np.where(labels==0)[0]
9+
# scores = np.delete(scores, unspecified)
10+
# labels = np.delete(labels, unspecified)
11+
12+
num_truths = sum(labels == 1)
13+
sort_ids = scores.argsort()[::-1]
14+
fp = np.cumsum(labels[sort_ids] == -1)
15+
tp = np.cumsum(labels[sort_ids] == 1)
16+
rec = tp / float(num_truths)
17+
prec = np.true_divide(tp, fp + tp)
18+
19+
mrec = np.concatenate((np.array([0]), rec, np.array([1])), axis=0)
20+
mpre = np.concatenate((np.array([0]), prec, np.array([0])), axis=0)
21+
for i in range(mpre.shape[0] - 2, 0, -1):
22+
mpre[i] = max(mpre[i], mpre[i + 1])
23+
24+
i = np.where(mrec[1:] != mrec[0:-1])[0] + 1
25+
ap = sum((mrec[i] - mrec[i - 1]) * mpre[i])
26+
return ap
27+
28+
APs = []
29+
for att in range(np.array(predictions).shape[1]):
30+
if testingFlag:
31+
APs.append(mAP(predictions[:, att], labels[:, att]))
32+
else:
33+
APs.append(average_precision_score(labels[:, att], predictions[:, att]))
34+
mAP = sum(APs) / float(len(APs))
35+
return mAP, APs
36+
37+
38+
def results(labels_tr, predicts_tr, labels_val, predicts_val, epoch, moving_loss_tr, moving_loss_val, elapsed_time):
39+
predicts_tr, labels_tr = np.array(predicts_tr), np.array(labels_tr)
40+
predicts_val, labels_val = np.array(predicts_val), np.array(labels_val)
41+
42+
train_mAP, train_APs = evaluate_mAP(labels_tr, predicts_tr)
43+
val_mAP, val_APs = evaluate_mAP(labels_val, predicts_val)
44+
45+
print("Epoch [%d]: Train-Loss=%f" % (epoch, moving_loss_tr))
46+
print("Epoch [%d]: Val-Loss=%f" % (epoch, moving_loss_val))
47+
print("Epoch [%d]: Train-mAP=%f" % (epoch, train_mAP))
48+
print("Epoch [%d]: Val-mAP=%f" % (epoch, val_mAP))
49+
print("Epoch [%d]: Elapsed-time=%f" % (epoch, elapsed_time))
50+
print(map(prettyfloat, train_APs))
51+
print(map(prettyfloat, val_APs))
52+
53+
return train_mAP, train_APs, val_mAP, val_APs

main.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import argparse
2+
import mxnet as mx
3+
import os
4+
from wider_training import train
5+
from wider_testing import test
6+
7+
if __name__ == '__main__':
8+
parser = argparse.ArgumentParser('Deep Imbalanced Classification')
9+
parser.add_argument('--data_path', help='data directory')
10+
parser.add_argument('--epochs', default=250, type=int, help='epochs')
11+
parser.add_argument('--lr', default=0.0001, type=float, help='learning rate')
12+
parser.add_argument('--wd', default=0.0005, type=float, help='weight decay')
13+
parser.add_argument('--mom', default=0.9, type=float, help='momentum')
14+
parser.add_argument('--batch_size', default=24, type=int, help='batch size')
15+
parser.add_argument('--num_classes', default=14, type=int, help='number of classes')
16+
parser.add_argument('--finetune', action='store_true', help='fine tune backbone architecture or not?')
17+
parser.add_argument('--test', action='store_true', help='testing')
18+
19+
args = parser.parse_args()
20+
21+
# Parameter Naming
22+
params_name = 'saved_models/base_resNet.params'
23+
24+
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
25+
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
26+
27+
ctx = mx.gpu()
28+
29+
if args.test:
30+
test(args, ctx)
31+
else:
32+
train(args, ctx)
33+

models.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
from mxnet.gluon.model_zoo import vision
2+
from mxnet.gluon import nn
3+
import mxnet as mx
4+
from mxnet import gluon
5+
6+
7+
def get_fsr(num_classes, ctx, kernel_size):
8+
net = nn.Sequential()
9+
with net.name_scope():
10+
net.add(nn.Conv2D(channels=256, kernel_size=1))
11+
net.add(nn.BatchNorm())
12+
net.add(nn.Activation('relu'))
13+
net.add(nn.Conv2D(channels=512, kernel_size=1))
14+
net.add(nn.BatchNorm())
15+
net.add(nn.Activation('relu'))
16+
net.add(nn.Conv2D(channels=1024, kernel_size=kernel_size))
17+
net.add(nn.BatchNorm())
18+
net.add(nn.Activation('relu'))
19+
net.add(nn.Dense(num_classes, flatten=True))
20+
net.collect_params().initialize(mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2), ctx=ctx)
21+
22+
return net
23+
24+
25+
def get_fatt(num_classes, stride, ctx):
26+
net = nn.Sequential()
27+
with net.name_scope():
28+
net.add(nn.Conv2D(channels=512, kernel_size=1))
29+
net.add(nn.BatchNorm())
30+
net.add(nn.Activation('relu'))
31+
net.add(nn.Conv2D(channels=512, kernel_size=3, padding=1))
32+
net.add(nn.BatchNorm())
33+
net.add(nn.Activation('relu'))
34+
# net.add(nn.Conv2D(channels=512, kernel_size=3, padding=1))
35+
# net.add(nn.BatchNorm())
36+
# net.add(nn.Activation('relu'))
37+
net.add(nn.Conv2D(channels=num_classes, kernel_size=1, strides=stride))
38+
net.collect_params().initialize(mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2), ctx=ctx)
39+
return net
40+
41+
42+
def get_conv2D(num_classes, stride, ctx):
43+
net = nn.Sequential()
44+
with net.name_scope():
45+
net.add(nn.Conv2D(channels=num_classes, kernel_size=1, strides=stride))
46+
net.add(nn.Activation('sigmoid'))
47+
net.collect_params().initialize(mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2), ctx=ctx)
48+
return net
49+
50+
51+
def getResNet(num_classes, ctx, NoTraining=True):
52+
resnet = vision.resnet101_v1(pretrained=True, ctx=ctx)
53+
54+
net = vision.resnet101_v1(classes=num_classes, prefix='resnetv10_')
55+
with net.name_scope():
56+
net.output = nn.Dense(num_classes, flatten=True, in_units=resnet.output._in_units)
57+
net.output.collect_params().initialize(
58+
mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2), ctx=ctx)
59+
net.features = resnet.features
60+
61+
net.collect_params().reset_ctx(ctx)
62+
63+
inputs = mx.sym.var('data')
64+
out = net(inputs)
65+
internals = out.get_internals()
66+
outputs = [internals['resnetv10_stage3_activation19_output'], internals['resnetv10_stage3_activation22_output'], internals['resnetv10_stage4_activation2_output'],
67+
internals['resnetv10_dense1_fwd_output']]
68+
feat_model = gluon.SymbolBlock(outputs, inputs, params=net.collect_params())
69+
feat_model._prefix = 'resnetv10_'
70+
if NoTraining:
71+
feat_model.collect_params().setattr('grad_req', 'null')
72+
return feat_model
73+
74+
75+
76+
def getDenseNet(num_classes, ctx):
77+
densenet = vision.densenet201(pretrained=True, ctx=ctx)
78+
79+
net = vision.densenet201(classes=num_classes, prefix='densenet0_')
80+
with net.name_scope():
81+
net.output = nn.Dense(num_classes, flatten=True)
82+
net.output.collect_params().initialize(
83+
mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2), ctx=ctx)
84+
net.features = densenet.features
85+
86+
net.collect_params().reset_ctx(ctx)
87+
88+
inputs = mx.sym.var('data')
89+
out = net(inputs)
90+
internals = out.get_internals()
91+
outputs = [internals['densenet0_conv3_fwd_output'], internals['densenet0_stage4_concat15_output'],
92+
internals['densenet0_dense1_fwd_output']]
93+
feat_model = gluon.SymbolBlock(outputs, inputs, params=net.collect_params())
94+
feat_model._prefix = 'densenet0_'
95+
96+
return feat_model

preprocessing.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from PIL import Image
2+
import numpy as np
3+
import matplotlib.pyplot as plt
4+
import csv
5+
from scipy import misc
6+
from utils import get_data
7+
def resize_images():
8+
9+
full_path = ADD YOUR DATA PATH HERE
10+
11+
image_path = full_path
12+
annotation_path = full_path + 'Annotations/'
13+
size = (256, 256)
14+
15+
all_im_list_tr = np.array([line.rstrip('\n')[1:-2] for line in open(full_path+'wider_att/wider_att_train_imglist.txt')])
16+
im_list_test = np.array([line.rstrip('\n')[1:-2] for line in open(full_path+'wider_att/wider_att_test_imglist.txt')])
17+
18+
# Saves Images to the same folder. Make a copy of the initial folder first
19+
for im in all_im_list_tr:
20+
img = Image.open(image_path + im[1:])
21+
img_res = img.resize(size, Image.ANTIALIAS)
22+
img_res.save(full_path[:-1] + im, 'JPEG')
23+
24+
for im in im_list_test:
25+
img = Image.open(image_path + im[1:])
26+
img_res = img.resize(size, Image.ANTIALIAS)
27+
img_res.save(full_path[:-1] + im, 'JPEG')
28+
29+
30+
def save2lists(im_list, att_list, filename):
31+
L = []
32+
for c, im in enumerate(im_list):
33+
tmp = list(att_list[c])
34+
L.append([str(c)]+map(str,tmp)+[str(im)])
35+
with open(filename, 'w') as f:
36+
writer = csv.writer(f, delimiter='\t')
37+
writer.writerows(L)
38+
39+
def data_prep(full_path):
40+
im_list_tr, att_list_tr, im_list_val, att_list_val, im_list_test, att_list_test = get_data(full_path)
41+
save2lists(im_list_tr, att_list_tr,'training_list.lst')
42+
save2lists(im_list_val, att_list_val,'valid_list.lst')
43+
save2lists(im_list_test, att_list_test,'testing_list.lst')

0 commit comments

Comments
 (0)