-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Min Jin Chong
committed
Aug 20, 2017
1 parent
2b56005
commit e3248b3
Showing
6 changed files
with
1,172 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
from __future__ import print_function | ||
from multiprocessing import Pool | ||
from PIL import Image | ||
import numpy as np | ||
import animeface | ||
import sys | ||
import os | ||
|
||
|
||
# im from PIL.Image.open, face_pos position object, margin | ||
def faceCrop(im,face_pos,m): | ||
""" | ||
m is the relative margin added to the face image | ||
""" | ||
x,y,w,h = face_pos.x, face_pos.y, face_pos.width, face_pos.height | ||
sizeX, sizeY = im.size | ||
new_x, new_y = max(0,x-m*w), max(0,y-m*h) | ||
new_w = w + 2*m*w if sizeX > (new_x + w + 2*m*w) else sizeX - new_x | ||
new_h = h + 2*m*h if sizeY > (new_y + h + 2*m*h) else sizeY - new_y | ||
new_x,new_y,new_w,new_h = int(new_x),int(new_y),int(new_w),int(new_h) | ||
return im.crop((new_x,new_y,new_x+new_w,new_y+new_h)) | ||
|
||
def min_resize_crop(im, min_side): | ||
sizeX,sizeY = im.size | ||
if sizeX > sizeY: | ||
im = im.resize((min_side*sizeX/sizeY, min_side), Image.ANTIALIAS) | ||
else: | ||
im = im.resize((min_side, sizeY*min_side/sizeX), Image.ANTIALIAS) | ||
return im.crop((0,0,min_side,min_side)) | ||
#return im | ||
|
||
def load_detect(img_path): | ||
"""Read original image file, return the cropped face image in the size 96x96 | ||
Input: A string indicates the image path | ||
Output: Detected face image in the size 96x96 | ||
Note that there might be multiple faces in one image, | ||
the output crossponding to the face with highest probability | ||
""" | ||
im = Image.open(img_path) | ||
faces = animeface.detect(im) | ||
prob_list = [] | ||
len_f = len(faces) | ||
if len_f == 0: | ||
return 0 | ||
for i in range(len_f): | ||
prob_list.append(faces[i].likelihood) | ||
prob_array = np.array(prob_list) | ||
idx = np.argmax(prob_array) | ||
face_pos = faces[idx].face.pos | ||
im = faceCrop(im, face_pos, 0.5) | ||
return min_resize_crop(im, 96) | ||
|
||
def process_img(img_path): | ||
""" | ||
The face images are stored in {${pwd} + faces} | ||
""" | ||
tmp = img_path.split('/') | ||
cls_name,img_name = tmp[len(tmp)-2], tmp[len(tmp)-1] | ||
new_dir_path = os.path.join('faces',cls_name) | ||
try: | ||
os.makedirs(new_dir_path) | ||
except OSError as err: | ||
print("OS error: {0}".format(err)) | ||
|
||
new_img_path = os.path.join(new_dir_path, img_name) | ||
if os.path.exists(new_img_path): | ||
return 0 | ||
im = load_detect(img_path) | ||
# no faces in this image | ||
if im == 0: | ||
return 0 | ||
im.save(new_img_path, 'JPEG') | ||
|
||
def try_process_img(img_path): | ||
try: | ||
process_img(img_path) | ||
except: | ||
e = sys.exc_info()[0] | ||
print('Err: %s \n' % e) | ||
|
||
# multiprocessing version | ||
def multi_construct_face_dataset(base_dir): | ||
cls_dirs = [f for f in os.listdir(base_dir)] | ||
imgs = [] | ||
for i in xrange(len(cls_dirs)): | ||
sub_dir = os.path.join(base_dir, cls_dirs[i]) | ||
imgs_tmp = [os.path.join(sub_dir,f) for f in os.listdir(sub_dir) if f.endswith(('.jpg', '.png'))] | ||
imgs = imgs + imgs_tmp | ||
print('There are %d classes, %d images in total. \n' % (len(cls_dirs), len(imgs))) | ||
pool = Pool(12) # 12 workers | ||
pool.map(try_process_img, imgs) | ||
|
||
|
||
base_dir = '/home/jielei/gallery-dl/danbooru' | ||
multi_construct_face_dataset(base_dir) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,274 @@ | ||
from __future__ import print_function | ||
import os | ||
os.environ["CUDA_VISIBLE_DEVICES"]="2" | ||
import time | ||
import random | ||
import argparse | ||
import torch | ||
import torch.nn as nn | ||
import torch.nn.parallel | ||
import torch.backends.cudnn as cudnn | ||
import torch.optim as optim | ||
import torch.utils.data | ||
import torchvision.datasets as dset | ||
import torchvision.transforms as transforms | ||
import torchvision.utils as vutils | ||
from torch.autograd import Variable, grad | ||
|
||
### load project files | ||
import models | ||
import srresnet | ||
from models import weights_init | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--dataRoot', default='./danbooru-faces', help='path to dataset') | ||
parser.add_argument('--workers', type=int, default=12, help='number of data loading workers') | ||
parser.add_argument('--batchSize', type=int, default=128, help='input batch size') | ||
parser.add_argument('--imageSize', type=int, default=64, help='the height / width of the input image to network') | ||
parser.add_argument('--nz', type=int, default=128, help='size of the latent z vector') | ||
parser.add_argument('--ngf', type=int, default=64) | ||
parser.add_argument('--ndf', type=int, default=64) | ||
parser.add_argument('--niter', type=int, default=100, help='number of epochs to train for') | ||
parser.add_argument('--lr', type=float, default=0.0002, help='learning rate, default=0.0002') | ||
parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5') | ||
parser.add_argument('--cuda' , type=int, default=1, help='enables cuda') | ||
parser.add_argument('--ngpu' , type=int, default=1, help='number of GPUs to use') | ||
parser.add_argument('--netG', default='', help="path to netG (to continue training)") | ||
parser.add_argument('--netD', default='', help="path to netD (to continue training)") | ||
parser.add_argument('--outDir', required=True, help='folder to output images and model checkpoints') | ||
parser.add_argument('--model', required=True, help='DCGAN | RESNET | IGAN | DRAGAN | WGAN') | ||
parser.add_argument('--d_labelSmooth', type=float, default=0.1, help='for D, use soft label "1-labelSmooth" for real samples') | ||
parser.add_argument('--n_extra_layers_d', type=int, default=0, help='number of extra conv layers in D') | ||
parser.add_argument('--n_extra_layers_g', type=int, default=1, help='number of extra conv layers in G') | ||
parser.add_argument('--pix_shuf' , type=int, default=1, help='Use pixel shuffle instead of deconvolution') | ||
parser.add_argument('--lambda_' , type=int, default=10, help='Weight of gradient penalty (DRAGAN)') | ||
parser.add_argument('--binary', action='store_true', help='z from bernoulli distribution, with prob=0.5') | ||
|
||
# simply prefer this way | ||
# arg_list = [ | ||
# '--dataRoot', '/home/jielei/data/danbooru-faces', | ||
# '--workers', '12', | ||
# '--batchSize', '128', | ||
# '--imageSize', '64', | ||
# '--nz', '100', | ||
# '--ngf', '64', | ||
# '--ndf', '64', | ||
# '--niter', '80', | ||
# '--lr', '0.0002', | ||
# '--beta1', '0.5', | ||
# '--cuda', | ||
# '--ngpu', '1', | ||
# '--netG', '', | ||
# '--netD', '', | ||
# '--outDir', './results', | ||
# '--model', '1', | ||
# '--d_labelSmooth', '0.1', # 0.25 from imporved-GAN paper | ||
# '--n_extra_layers_d', '0', | ||
# '--n_extra_layers_g', '1', # in the sense that generator should be more powerful | ||
# ] | ||
|
||
opt = parser.parse_args() | ||
# opt = parser.parse_args(arg_list) | ||
print(opt) | ||
if opt.model == 'DRAGAN': | ||
#norm = 'LayerNorm' | ||
norm = 'LayerNorm' | ||
else: | ||
norm = 'BatchNorm' | ||
|
||
# Make directories | ||
opt.outDir = './results/' + opt.outDir | ||
opt.modelsDir = opt.outDir + '/models' | ||
opt.imDir = opt.outDir + '/images' | ||
|
||
# Recursively create image and model directory | ||
try: | ||
os.makedirs(opt.imDir) | ||
except OSError: | ||
pass | ||
try: | ||
os.makedirs(opt.modelsDir) | ||
except OSError: | ||
pass | ||
|
||
opt.manualSeed = random.randint(1,10000) # fix seed, a scalar | ||
random.seed(opt.manualSeed) | ||
torch.manual_seed(opt.manualSeed) | ||
|
||
cudnn.benchmark = True | ||
|
||
if torch.cuda.is_available() and not opt.cuda: | ||
print("WARNING: You have a CUDA device, so you should probably run with --cuda") | ||
|
||
def calc_gradient_penalty_DRAGAN(netD, X): | ||
#different alpha size for latent/image | ||
if X.dim() == 2: | ||
# use size(0) instead of batchsize to prevent smaller batchsize during last batch | ||
alpha = torch.rand(X.size(0), 1) | ||
else: | ||
alpha = torch.rand(X.size(0), 1, 1, 1) | ||
alpha = alpha.expand(X.size()).cuda() | ||
|
||
rand = torch.rand(X.size()).cuda() | ||
x_hat = Variable(alpha * X.data + (1 - alpha) * (X.data + 0.5 * X.data.std() * rand), requires_grad=True).cuda() | ||
pred_hat = netD(x_hat) | ||
gradients = grad(outputs=pred_hat, inputs=x_hat, grad_outputs=torch.ones(pred_hat.size()).cuda(), | ||
create_graph=True, retain_graph=True, only_inputs=True)[0] | ||
gradient_penalty = opt.lambda_ * ((gradients.norm(2, dim=1) - 1) ** 2).mean() | ||
|
||
return gradient_penalty | ||
|
||
def lowerbound(input): | ||
output = torch.cat([input, epsilon], 0) | ||
max_val = torch.max(output) | ||
input.data.fill_(max_val.data[0]) | ||
return input | ||
|
||
|
||
nc = 3 | ||
ngpu = opt.ngpu | ||
nz = opt.nz | ||
ngf = opt.ngf | ||
ndf = opt.ndf | ||
n_extra_d = opt.n_extra_layers_d | ||
n_extra_g = opt.n_extra_layers_g | ||
|
||
dataset = dset.ImageFolder( | ||
root=opt.dataRoot, | ||
transform=transforms.Compose([ | ||
transforms.Scale(opt.imageSize), | ||
# transforms.CenterCrop(opt.imageSize), | ||
transforms.ToTensor(), | ||
transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)), # bring images to (-1,1) | ||
]) | ||
) | ||
dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batchSize, | ||
shuffle=True, num_workers=opt.workers) | ||
|
||
# load models | ||
if opt.model == 'DCGAN' or opt.model == 'DRAGAN': | ||
netG = models._netG_1(ngpu, nz, nc, ngf, n_extra_g, opt.pix_shuf) | ||
netD = models._netD_1(ngpu, nz, nc, ndf, n_extra_d, norm) | ||
elif opt.model == 'IGAN': | ||
netG = models._netG_2(ngpu, nz, nc, ngf) | ||
netD = models._netD_2(ngpu, nz, nc, ndf) | ||
elif opt.model == 'RESNET': | ||
netG = srresnet.NetG() | ||
netD = srresnet.NetD(norm) | ||
|
||
netG.apply(weights_init) | ||
if opt.netG != '': | ||
netG.load_state_dict(torch.load(opt.netG)) | ||
print(netG) | ||
|
||
netD.apply(weights_init) | ||
if opt.netD != '': | ||
netD.load_state_dict(torch.load(opt.netD)) | ||
print(netD) | ||
|
||
criterion = nn.BCELoss() | ||
criterion_MSE = nn.MSELoss() | ||
|
||
input = torch.FloatTensor(opt.batchSize, 3, opt.imageSize, opt.imageSize) | ||
noise = torch.FloatTensor(opt.batchSize, nz, 1, 1) | ||
if opt.binary: | ||
bernoulli_prob = torch.FloatTensor(opt.batchSize, nz, 1, 1).fill_(0.5) | ||
fixed_noise = torch.bernoulli(bernoulli_prob) | ||
else: | ||
fixed_noise = torch.FloatTensor(opt.batchSize, nz, 1, 1).normal_(0, 1) | ||
label = torch.FloatTensor(opt.batchSize) | ||
real_label = 1 | ||
fake_label = 0 | ||
epsilon = torch.FloatTensor(opt.batchSize).fill_(1e-9) | ||
|
||
if opt.cuda: | ||
netD.cuda() | ||
netG.cuda() | ||
criterion.cuda() | ||
criterion_MSE.cuda() | ||
input, label = input.cuda(), label.cuda() | ||
noise, fixed_noise, epsilon = noise.cuda(), fixed_noise.cuda(), epsilon.cuda() | ||
|
||
input = Variable(input) | ||
label = Variable(label) | ||
noise = Variable(noise) | ||
fixed_noise = Variable(fixed_noise) | ||
epsilon = Variable(epsilon) | ||
|
||
# setup optimizer | ||
optimizerD = optim.Adam(netD.parameters(), lr = opt.lr, betas = (opt.beta1, 0.999)) | ||
optimizerG = optim.Adam(netG.parameters(), lr = opt.lr, betas = (opt.beta1, 0.999)) | ||
|
||
for epoch in range(opt.niter): | ||
for i, data in enumerate(dataloader, 0): | ||
start_iter = time.time() | ||
############################ | ||
# (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) | ||
########################### | ||
# train with real | ||
netD.zero_grad() | ||
real_cpu, _ = data | ||
batchSize = real_cpu.size(0) | ||
input.data.resize_(real_cpu.size()).copy_(real_cpu) | ||
label.data.resize_(batchSize).fill_(real_label - opt.d_labelSmooth) # use smooth label for discriminator | ||
|
||
output = netD(input) | ||
# Prevent numerical instability | ||
#output = lowerbound(output) | ||
errD_real = criterion(output, label) | ||
errD_real.backward() | ||
D_x = output.data.mean() | ||
# train with fake | ||
noise.data.resize_(batchSize, nz, 1, 1) | ||
if opt.binary: | ||
bernoulli_prob.resize_(noise.data.size()) | ||
noise.data.copy_(2*(torch.bernoulli(bernoulli_prob)-0.5)) | ||
else: | ||
noise.data.normal_(0, 1) | ||
fake,z_prediction = netG(noise) | ||
label.data.fill_(fake_label) | ||
output = netD(fake.detach()) # add ".detach()" to avoid backprop through G | ||
#output = lowerbound(output) | ||
errD_fake = criterion(output, label) | ||
errD_fake.backward() # gradients for fake/real will be accumulated | ||
D_G_z1 = output.data.mean() | ||
errD = errD_real + errD_fake | ||
|
||
# Gradient penalty for DRAGAN | ||
if opt.model == 'DRAGAN': | ||
gradient_loss = calc_gradient_penalty_DRAGAN(netD, input) | ||
gradient_loss.backward() | ||
errD += gradient_loss | ||
|
||
optimizerD.step() # .step() can be called once the gradients are computed | ||
|
||
############################ | ||
# (2) Update G network: maximize log(D(G(z))) | ||
########################### | ||
netG.zero_grad() | ||
label.data.fill_(real_label) # fake labels are real for generator cost | ||
output = netD(fake) | ||
#output = lowerbound(output) | ||
errG = criterion(output, label) | ||
errG.backward(retain_variables=True) # True if backward through the graph for the second time | ||
if opt.model == 'IGAN': # with z predictor | ||
errG_z = criterion_MSE(z_prediction, noise) | ||
errG_z.backward() | ||
D_G_z2 = output.data.mean() | ||
optimizerG.step() | ||
|
||
end_iter = time.time() | ||
print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f Elapsed %.2f s' | ||
% (epoch, opt.niter, i, len(dataloader), | ||
errD.data[0], errG.data[0], D_x, D_G_z1, D_G_z2, end_iter-start_iter)) | ||
if i % 400 == 0: | ||
# the first 64 samples from the mini-batch are saved. | ||
vutils.save_image(real_cpu[0:64,:,:,:], | ||
'%s/real_samples_%03d_%04d.png' % (opt.imDir, epoch, i), nrow=8) | ||
fake,_ = netG(noise) | ||
vutils.save_image(fake.data[0:64,:,:,:], | ||
'%s/fake_samples_epoch_%03d_%04d.png' % (opt.imDir, epoch, i), nrow=8) | ||
if epoch % 1 == 0: | ||
# do checkpointing | ||
torch.save(netG.state_dict(), '%s/netG_epoch_%d.pth' % (opt.modelsDir, epoch)) | ||
torch.save(netD.state_dict(), '%s/netD_epoch_%d.pth' % (opt.modelsDir, epoch)) |
Oops, something went wrong.