as

mchong6 · Aug 20, 2017 · e3248b3 · e3248b3
1 parent 2b56005
commit e3248b3
Show file tree

Hide file tree

Showing 6 changed files with 1,172 additions and 0 deletions.
diff --git a/build_face_dataset.py b/build_face_dataset.py
@@ -0,0 +1,97 @@
+from __future__ import print_function
+from multiprocessing import Pool
+from PIL import Image
+import numpy as np
+import animeface
+import sys
+import os
+
+
+# im from PIL.Image.open, face_pos position object, margin
+def faceCrop(im,face_pos,m):
+    """
+    m is the relative margin added to the face image
+    """
+    x,y,w,h = face_pos.x, face_pos.y, face_pos.width, face_pos.height
+    sizeX, sizeY = im.size
+    new_x, new_y = max(0,x-m*w), max(0,y-m*h)
+    new_w = w + 2*m*w if sizeX > (new_x + w + 2*m*w) else sizeX - new_x
+    new_h = h + 2*m*h if sizeY > (new_y + h + 2*m*h) else sizeY - new_y
+    new_x,new_y,new_w,new_h = int(new_x),int(new_y),int(new_w),int(new_h)
+    return im.crop((new_x,new_y,new_x+new_w,new_y+new_h))
+
+def min_resize_crop(im, min_side):
+    sizeX,sizeY = im.size
+    if sizeX > sizeY:
+        im = im.resize((min_side*sizeX/sizeY, min_side), Image.ANTIALIAS)
+    else:
+        im = im.resize((min_side, sizeY*min_side/sizeX), Image.ANTIALIAS)
+    return im.crop((0,0,min_side,min_side))
+    #return im
+
+def load_detect(img_path):
+    """Read original image file, return the cropped face image in the size 96x96
+
+    Input: A string indicates the image path
+    Output: Detected face image in the size 96x96
+
+    Note that there might be multiple faces in one image, 
+    the output crossponding to the face with highest probability
+    """
+    im = Image.open(img_path)
+    faces = animeface.detect(im)
+    prob_list = []
+    len_f = len(faces)
+    if len_f == 0:
+        return 0
+    for i in range(len_f):
+        prob_list.append(faces[i].likelihood)
+    prob_array = np.array(prob_list)
+    idx = np.argmax(prob_array)
+    face_pos = faces[idx].face.pos
+    im = faceCrop(im, face_pos, 0.5)
+    return min_resize_crop(im, 96)
+
+def process_img(img_path):
+    """
+    The face images are stored in {${pwd} + faces} 
+    """
+    tmp = img_path.split('/')
+    cls_name,img_name = tmp[len(tmp)-2], tmp[len(tmp)-1]
+    new_dir_path = os.path.join('faces',cls_name)
+    try:
+        os.makedirs(new_dir_path)
+    except OSError as err:
+        print("OS error: {0}".format(err))
+
+    new_img_path = os.path.join(new_dir_path, img_name)
+    if os.path.exists(new_img_path):
+        return 0
+    im = load_detect(img_path)
+    # no faces in this image
+    if im == 0:
+        return 0
+    im.save(new_img_path, 'JPEG')
+
+def try_process_img(img_path):
+    try:
+        process_img(img_path)
+    except:
+        e = sys.exc_info()[0]
+        print('Err: %s \n' % e)
+
+# multiprocessing version
+def multi_construct_face_dataset(base_dir):
+    cls_dirs = [f for f in os.listdir(base_dir)]
+    imgs = []
+    for i in xrange(len(cls_dirs)):
+        sub_dir = os.path.join(base_dir, cls_dirs[i])
+        imgs_tmp = [os.path.join(sub_dir,f) for f in os.listdir(sub_dir) if f.endswith(('.jpg', '.png'))]
+        imgs = imgs + imgs_tmp
+    print('There are %d classes, %d images in total. \n' % (len(cls_dirs), len(imgs)))
+    pool = Pool(12) # 12 workers
+    pool.map(try_process_img, imgs)
+
+
+base_dir = '/home/jielei/gallery-dl/danbooru'
+multi_construct_face_dataset(base_dir)
diff --git a/main.py b/main.py
@@ -0,0 +1,274 @@
+from __future__ import print_function
+import os
+os.environ["CUDA_VISIBLE_DEVICES"]="2"
+import time
+import random
+import argparse
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+import torch.backends.cudnn as cudnn
+import torch.optim as optim
+import torch.utils.data
+import torchvision.datasets as dset
+import torchvision.transforms as transforms
+import torchvision.utils as vutils
+from torch.autograd import Variable, grad
+
+### load project files
+import models
+import srresnet
+from models import weights_init
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--dataRoot', default='./danbooru-faces', help='path to dataset')
+parser.add_argument('--workers', type=int, default=12, help='number of data loading workers')
+parser.add_argument('--batchSize', type=int, default=128, help='input batch size')
+parser.add_argument('--imageSize', type=int, default=64, help='the height / width of the input image to network')
+parser.add_argument('--nz', type=int, default=128, help='size of the latent z vector')
+parser.add_argument('--ngf', type=int, default=64)
+parser.add_argument('--ndf', type=int, default=64)
+parser.add_argument('--niter', type=int, default=100, help='number of epochs to train for')
+parser.add_argument('--lr', type=float, default=0.0002, help='learning rate, default=0.0002')
+parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5')
+parser.add_argument('--cuda'  , type=int, default=1, help='enables cuda')
+parser.add_argument('--ngpu'  , type=int, default=1, help='number of GPUs to use')
+parser.add_argument('--netG', default='', help="path to netG (to continue training)")
+parser.add_argument('--netD', default='', help="path to netD (to continue training)")
+parser.add_argument('--outDir', required=True, help='folder to output images and model checkpoints')
+parser.add_argument('--model', required=True, help='DCGAN | RESNET | IGAN | DRAGAN | WGAN')
+parser.add_argument('--d_labelSmooth', type=float, default=0.1, help='for D, use soft label "1-labelSmooth" for real samples')
+parser.add_argument('--n_extra_layers_d', type=int, default=0, help='number of extra conv layers in D')
+parser.add_argument('--n_extra_layers_g', type=int, default=1, help='number of extra conv layers in G')
+parser.add_argument('--pix_shuf'  , type=int, default=1, help='Use pixel shuffle instead of deconvolution')
+parser.add_argument('--lambda_'  , type=int, default=10, help='Weight of gradient penalty (DRAGAN)')
+parser.add_argument('--binary', action='store_true', help='z from bernoulli distribution, with prob=0.5')
+
+# simply prefer this way
+# arg_list = [
+#     '--dataRoot', '/home/jielei/data/danbooru-faces',
+#     '--workers', '12',
+#     '--batchSize', '128',
+#     '--imageSize', '64',
+#     '--nz', '100',
+#     '--ngf', '64',
+#     '--ndf', '64',
+#     '--niter', '80',
+#     '--lr', '0.0002',
+#     '--beta1', '0.5',
+#     '--cuda', 
+#     '--ngpu', '1',
+#     '--netG', '',
+#     '--netD', '',
+#     '--outDir', './results',
+#     '--model', '1',
+#     '--d_labelSmooth', '0.1', # 0.25 from imporved-GAN paper 
+#     '--n_extra_layers_d', '0',
+#     '--n_extra_layers_g', '1', # in the sense that generator should be more powerful
+# ]
+
+opt = parser.parse_args()
+# opt = parser.parse_args(arg_list)
+print(opt)
+if opt.model == 'DRAGAN':
+    #norm = 'LayerNorm'
+    norm = 'LayerNorm'
+else:
+    norm = 'BatchNorm'
+
+# Make directories
+opt.outDir = './results/' + opt.outDir
+opt.modelsDir = opt.outDir + '/models'
+opt.imDir = opt.outDir + '/images'
+
+# Recursively create image and model directory
+try:
+    os.makedirs(opt.imDir)
+except OSError:
+    pass
+try:
+    os.makedirs(opt.modelsDir)
+except OSError:
+    pass
+
+opt.manualSeed = random.randint(1,10000) # fix seed, a scalar
+random.seed(opt.manualSeed)
+torch.manual_seed(opt.manualSeed)
+
+cudnn.benchmark = True
+
+if torch.cuda.is_available() and not opt.cuda:
+    print("WARNING: You have a CUDA device, so you should probably run with --cuda")
+
+def calc_gradient_penalty_DRAGAN(netD, X):
+    #different alpha size for latent/image
+    if X.dim() == 2:
+        # use size(0) instead of batchsize to prevent smaller batchsize during last batch
+        alpha = torch.rand(X.size(0), 1)
+    else:
+        alpha = torch.rand(X.size(0), 1, 1, 1)
+    alpha = alpha.expand(X.size()).cuda()
+
+    rand = torch.rand(X.size()).cuda()
+    x_hat = Variable(alpha * X.data + (1 - alpha) * (X.data + 0.5 * X.data.std() * rand), requires_grad=True).cuda()
+    pred_hat = netD(x_hat)
+    gradients = grad(outputs=pred_hat, inputs=x_hat, grad_outputs=torch.ones(pred_hat.size()).cuda(),
+                    create_graph=True, retain_graph=True, only_inputs=True)[0]
+    gradient_penalty = opt.lambda_ * ((gradients.norm(2, dim=1) - 1) ** 2).mean()
+
+    return gradient_penalty
+
+def lowerbound(input):
+    output = torch.cat([input, epsilon], 0)
+    max_val = torch.max(output)
+    input.data.fill_(max_val.data[0])
+    return input
+
+
+nc = 3
+ngpu = opt.ngpu
+nz = opt.nz
+ngf = opt.ngf
+ndf = opt.ndf
+n_extra_d = opt.n_extra_layers_d
+n_extra_g = opt.n_extra_layers_g
+
+dataset = dset.ImageFolder(
+    root=opt.dataRoot,
+    transform=transforms.Compose([
+            transforms.Scale(opt.imageSize),
+            # transforms.CenterCrop(opt.imageSize),
+            transforms.ToTensor(),
+            transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)), # bring images to (-1,1)
+        ])
+)
+dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batchSize,
+                                         shuffle=True, num_workers=opt.workers)
+
+# load models 
+if opt.model == 'DCGAN' or opt.model == 'DRAGAN':
+    netG = models._netG_1(ngpu, nz, nc, ngf, n_extra_g, opt.pix_shuf)
+    netD = models._netD_1(ngpu, nz, nc, ndf, n_extra_d, norm)
+elif opt.model == 'IGAN':
+    netG = models._netG_2(ngpu, nz, nc, ngf)
+    netD = models._netD_2(ngpu, nz, nc, ndf)
+elif opt.model == 'RESNET':
+    netG = srresnet.NetG()
+    netD = srresnet.NetD(norm)
+
+netG.apply(weights_init)
+if opt.netG != '':
+    netG.load_state_dict(torch.load(opt.netG))
+print(netG)
+
+netD.apply(weights_init)
+if opt.netD != '':
+    netD.load_state_dict(torch.load(opt.netD))
+print(netD)
+
+criterion = nn.BCELoss()
+criterion_MSE = nn.MSELoss()
+
+input = torch.FloatTensor(opt.batchSize, 3, opt.imageSize, opt.imageSize)
+noise = torch.FloatTensor(opt.batchSize, nz, 1, 1)
+if opt.binary:
+    bernoulli_prob = torch.FloatTensor(opt.batchSize, nz, 1, 1).fill_(0.5)
+    fixed_noise = torch.bernoulli(bernoulli_prob)
+else:
+    fixed_noise = torch.FloatTensor(opt.batchSize, nz, 1, 1).normal_(0, 1)
+label = torch.FloatTensor(opt.batchSize)
+real_label = 1
+fake_label = 0
+epsilon = torch.FloatTensor(opt.batchSize).fill_(1e-9)
+
+if opt.cuda:
+    netD.cuda()
+    netG.cuda()
+    criterion.cuda()
+    criterion_MSE.cuda()
+    input, label = input.cuda(), label.cuda()
+    noise, fixed_noise, epsilon = noise.cuda(), fixed_noise.cuda(), epsilon.cuda()
+
+input = Variable(input)
+label = Variable(label)
+noise = Variable(noise)
+fixed_noise = Variable(fixed_noise)
+epsilon = Variable(epsilon)
+
+# setup optimizer
+optimizerD = optim.Adam(netD.parameters(), lr = opt.lr, betas = (opt.beta1, 0.999))
+optimizerG = optim.Adam(netG.parameters(), lr = opt.lr, betas = (opt.beta1, 0.999))
+
+for epoch in range(opt.niter):
+    for i, data in enumerate(dataloader, 0):
+        start_iter = time.time()
+        ############################
+        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
+        ###########################
+        # train with real
+        netD.zero_grad()
+        real_cpu, _ = data
+        batchSize = real_cpu.size(0)
+        input.data.resize_(real_cpu.size()).copy_(real_cpu)
+        label.data.resize_(batchSize).fill_(real_label - opt.d_labelSmooth) # use smooth label for discriminator
+
+        output = netD(input)
+        # Prevent numerical instability
+        #output = lowerbound(output)
+        errD_real = criterion(output, label)
+        errD_real.backward()
+        D_x = output.data.mean()
+        # train with fake
+        noise.data.resize_(batchSize, nz, 1, 1)
+        if opt.binary:
+            bernoulli_prob.resize_(noise.data.size())
+            noise.data.copy_(2*(torch.bernoulli(bernoulli_prob)-0.5))
+        else:
+            noise.data.normal_(0, 1)
+        fake,z_prediction = netG(noise)
+        label.data.fill_(fake_label)
+        output = netD(fake.detach()) # add ".detach()" to avoid backprop through G
+        #output = lowerbound(output)
+        errD_fake = criterion(output, label)
+        errD_fake.backward() # gradients for fake/real will be accumulated
+        D_G_z1 = output.data.mean()
+        errD = errD_real + errD_fake
+
+        # Gradient penalty for DRAGAN
+        if opt.model == 'DRAGAN':
+            gradient_loss = calc_gradient_penalty_DRAGAN(netD, input)
+            gradient_loss.backward()
+            errD += gradient_loss
+
+        optimizerD.step() # .step() can be called once the gradients are computed
+
+        ############################
+        # (2) Update G network: maximize log(D(G(z)))
+        ###########################
+        netG.zero_grad()
+        label.data.fill_(real_label) # fake labels are real for generator cost
+        output = netD(fake)
+        #output = lowerbound(output)
+        errG = criterion(output, label)
+        errG.backward(retain_variables=True) # True if backward through the graph for the second time
+        if opt.model == 'IGAN': # with z predictor
+            errG_z = criterion_MSE(z_prediction, noise)
+            errG_z.backward()
+        D_G_z2 = output.data.mean()
+        optimizerG.step()
+
+        end_iter = time.time()
+        print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f Elapsed %.2f s'
+              % (epoch, opt.niter, i, len(dataloader),
+                 errD.data[0], errG.data[0], D_x, D_G_z1, D_G_z2, end_iter-start_iter))
+        if i % 400 == 0:
+            # the first 64 samples from the mini-batch are saved.
+            vutils.save_image(real_cpu[0:64,:,:,:],
+                    '%s/real_samples_%03d_%04d.png' % (opt.imDir, epoch, i), nrow=8)
+            fake,_ = netG(noise)
+            vutils.save_image(fake.data[0:64,:,:,:],
+                    '%s/fake_samples_epoch_%03d_%04d.png' % (opt.imDir, epoch, i), nrow=8)
+    if epoch % 1 == 0:
+        # do checkpointing
+        torch.save(netG.state_dict(), '%s/netG_epoch_%d.pth' % (opt.modelsDir, epoch))
+        torch.save(netD.state_dict(), '%s/netD_epoch_%d.pth' % (opt.modelsDir, epoch))