removed conditional part

nghorbani · nghorbani · commit 4a5226779d51 · 2017-08-07T11:05:22.000+02:00
diff --git a/GenerativeAdversarialNetworks/DCGAN.py b/GenerativeAdversarialNetworks/DCGAN.py
@@ -3,8 +3,6 @@
 Generative Adversarial Networks - Goodfellow et al
 Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks - Radford et al
 
-This work is absolutely not an effort to reproduce exact results of the cited paper, nor I confine my implementations to the suggestion of the original authors.
-I have tried to implement my own limited understanding of the original paper in hope to get a better insight into their work. 
 Use this code with no warranty and please respect the accompanying license.
 '''
 
@@ -19,104 +17,104 @@
 from tools_general import tf, np
 from tools_networks import deconv, conv, dense, clipped_crossentropy, dropout
 
-def concat_labels(X, labels):
-    if X.get_shape().ndims == 4:
-        X_shape = tf.shape(X)
-        labels_reshaped = tf.reshape(labels, [-1, 1, 1, 10])
-        a = tf.ones([X_shape[0], X_shape[1], X_shape[2], 10])
-        X = tf.concat([X, labels_reshaped * a], axis=3)
-    return X
-     
-def create_gan_G(z, labels, is_training, Cout=1, trainable=True, reuse=False, networktype='ganG'):
-    '''input : batchsize * 100 and labels to make the generator conditional
+from tensorflow.examples.tutorials.mnist import input_data
+    
+def create_gan_G(z, is_training, Cout=1, trainable=True, reuse=False, networktype='ganG'):
+    '''input : batchsize * 100
         output: batchsize * 28 * 28 * 1'''
     with tf.variable_scope(networktype, reuse=reuse):
-        z = tf.concat(axis=-1, values=[z, labels])
-        Gz = dense(z, is_training, Cout=4 * 4 * 256, act='reLu', norm='batchnorm', name='dense1')
-        Gz = tf.reshape(Gz, shape=[-1, 4, 4, 256])  # 4
-        Gz = deconv(Gz, is_training, kernel_w=5, stride=2, Cout=256, trainable=trainable, act='reLu', norm='batchnorm', name='deconv1')  # 11
-        Gz = deconv(Gz, is_training, kernel_w=5, stride=2, Cout=128, trainable=trainable, act='reLu', norm='batchnorm', name='deconv2')  # 25
-        Gz = deconv(Gz, is_training, kernel_w=4, stride=1, Cout=Cout, act=None, norm=None, name='deconv3')  # 28
-        Gz = tf.nn.sigmoid(Gz)
-    return Gz
+        Gout_op = dense(z, is_training, Cout=4 * 4 * 256, trainable=trainable, act='reLu', norm='batchnorm', name='dense1')
+        Gout_op = tf.reshape(Gout_op, shape=[-1, 4, 4, 256])  # 4
+        Gout_op = deconv(Gout_op, is_training, kernel_w=5, stride=2, Cout=256, trainable=trainable, act='reLu', norm='batchnorm', name='deconv1')  # 11
+        Gout_op = deconv(Gout_op, is_training, kernel_w=5, stride=2, Cout=128, trainable=trainable, act='reLu', norm='batchnorm', name='deconv2')  # 25
+        Gout_op = deconv(Gout_op, is_training, kernel_w=4, stride=1, Cout=Cout, trainable=trainable, act=None, norm=None, name='deconv3')  # 28
+        Gout_op = tf.nn.sigmoid(Gout_op)
+    return Gout_op
 
-def create_gan_D(xz, labels, is_training, trainable=True, reuse=False, networktype='ganD'):
+def create_gan_D(xz, is_training, trainable=True, reuse=False, networktype='ganD'):
     with tf.variable_scope(networktype, reuse=reuse):
-        xz = concat_labels(xz, labels)
         Dxz = conv(xz, is_training, kernel_w=5, stride=2, Cout=128, trainable=trainable, act='lrelu', norm=None, name='conv1')  # 12
         Dxz = conv(Dxz, is_training, kernel_w=5, stride=2, Cout=256, trainable=trainable, act='lrelu', norm='batchnorm', name='conv2')  # 4
         Dxz = conv(Dxz, is_training, kernel_w=2, stride=2, Cout=256, trainable=trainable, act='lrelu', norm='batchnorm', name='conv3')  # 2
-        Dxz = conv(Dxz, is_training, kernel_w=2, stride=2, Cout=1, trainable=trainable, act='lrelu', norm='batchnorm', name='conv4')  # 2
+        Dxz = conv(Dxz, is_training, kernel_w=2, stride=2, Cout=1, trainable=trainable, act=None, norm='batchnorm', name='conv4')  # 2
         Dxz = tf.nn.sigmoid(Dxz)
     return Dxz
 
-def create_dcgan_trainer(base_lr=1e-4, networktype='dcgan'):
+def create_dcgan_trainer(base_lr=1e-4, networktype='dcgan', latentDim=100):
     '''Train a Generative Adversarial Network'''
-    # with tf.name_scope('train_%s' % networktype): 
     is_training = tf.placeholder(tf.bool, [], 'is_training')
 
-    inZ = tf.placeholder(tf.float32, [None, 100])  # tf.random_uniform(shape=[batch_size, 100], minval=-1., maxval=1., dtype=tf.float32)
-    inL = tf.placeholder(tf.float32, [None, 10])  # we want to condition the generated out put on some parameters of the input
-    inX = tf.placeholder(tf.float32, [None, 28, 28, 1])
+    Zph = tf.placeholder(tf.float32, [None, latentDim])  # tf.random_uniform(shape=[batch_size, 100], minval=-1., maxval=1., dtype=tf.float32)
+    Xph = tf.placeholder(tf.float32, [None, 28, 28, 1])
 
-    Gz = create_gan_G(inZ, inL, is_training, Cout=1, trainable=True, reuse=False, networktype=networktype + '_G') 
+    Gout_op = create_gan_G(Zph, is_training, Cout=1, trainable=True, reuse=False, networktype=networktype + '_G') 
 
-    DGz = create_gan_D(Gz, inL, is_training, trainable=True, reuse=False, networktype=networktype + '_D')
-    Dx = create_gan_D(inX, inL, is_training, trainable=True, reuse=True, networktype=networktype + '_D')
+    fakeLogits = create_gan_D(Gout_op, is_training, trainable=True, reuse=False, networktype=networktype + '_D')
+    realLogits = create_gan_D(Xph, is_training, trainable=True, reuse=True, networktype=networktype + '_D')
     
-    ganG_var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=networktype + '_G')
-    print(len(ganG_var_list), [var.name for var in ganG_var_list])
+    G_varlist = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=networktype + '_G')
+    print(len(G_varlist), [var.name for var in G_varlist])
 
-    ganD_var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=networktype + '_D')
-    print(len(ganD_var_list), [var.name for var in ganD_var_list])
+    D_varlist = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=networktype + '_D')
+    print(len(D_varlist), [var.name for var in D_varlist])
           
-    Gscore = clipped_crossentropy(DGz, tf.ones_like(DGz))
-    Dscore = clipped_crossentropy(DGz, tf.zeros_like(DGz)) + clipped_crossentropy(Dx, tf.ones_like(Dx))
+    Gloss = clipped_crossentropy(fakeLogits, tf.ones_like(fakeLogits))
+    Dloss = clipped_crossentropy(fakeLogits, tf.zeros_like(fakeLogits)) + clipped_crossentropy(realLogits, tf.ones_like(realLogits))
     
-    Gtrain = tf.train.AdamOptimizer(learning_rate=base_lr, beta1=0.5).minimize(Gscore, var_list=ganG_var_list)
-    Dtrain = tf.train.AdamOptimizer(learning_rate=base_lr, beta1=0.5).minimize(Dscore, var_list=ganD_var_list)
+    Gtrain_op = tf.train.AdamOptimizer(learning_rate=base_lr, beta1=0.5).minimize(Gloss, var_list=G_varlist)
+    Dtrain_op = tf.train.AdamOptimizer(learning_rate=base_lr, beta1=0.5).minimize(Dloss, var_list=D_varlist)
     
-    return Gtrain, Dtrain, Gscore, Dscore, is_training, inZ, inX, inL, Gz
+    return Gtrain_op, Dtrain_op, Gloss, Dloss, is_training, Zph, Xph, Gout_op
 
 if __name__ == '__main__':
     networktype = 'DCGAN_MNIST'
     
     batch_size = 128
-    base_lr = 0.0002  # 1e-4
-    epochs = 30
+    base_lr = 2e-4
+    epochs = 1000
+    latentDim = 100
     
     work_dir = expr_dir + '%s/%s/' % (networktype, datetime.strftime(datetime.today(), '%Y%m%d'))
     if not os.path.exists(work_dir): os.makedirs(work_dir)
     
-    data, max_iter, test_iter, test_int, disp_int = get_train_params(data_dir + '/' + networktype, batch_size, epochs=epochs, test_in_each_epoch=1, networktype=networktype)
+    data = input_data.read_data_sets(data_dir + '/' + networktype, reshape=False)
+    disp_int = 2 * int(data.train.num_examples / batch_size)  # every two epochs
     
     tf.reset_default_graph() 
     sess = tf.InteractiveSession()
     
-    Gtrain, Dtrain, Gscore, Dscore, is_training, inZ, inX, inL, Gz = create_dcgan_trainer(base_lr, networktype=networktype)
+    Gtrain_op, Dtrain_op, Gloss, Dloss, is_training, Zph, Xph, Gout_op = create_dcgan_trainer(base_lr, networktype=networktype)
     tf.global_variables_initializer().run()
     
     var_list = [var for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) if (networktype.lower() in var.name.lower()) and ('adam' not in var.name.lower())]  
-    saver = tf.train.Saver(var_list=var_list, max_to_keep = 1000)
+    saver = tf.train.Saver(var_list=var_list, max_to_keep=1000)
     # saver.restore(sess, expr_dir + 'ganMNIST/20170707/214_model.ckpt')  
-    
-    Z_test = np.random.uniform(size=[batch_size, 100], low=-1., high=1.).astype(np.float32)
-    labels_test = OneHot(np.random.randint(10, size=[batch_size]), n=10)    
-    
+        
     k = 1
-     
-    for it in range(1, max_iter): 
-        Z = np.random.uniform(size=[batch_size, 100], low=-1., high=1.).astype(np.float32)
-        X, labels = data.train.next_batch(batch_size)
-         
+    it = 0
+    disp_losses = False    
+
+    while data.train.epochs_completed < epochs:
+        dtemploss = 0 
+        
         for itD in range(k):
-            cur_Dscore, _ = sess.run([Dscore, Dtrain], feed_dict={inX:X, inZ:Z, inL:labels, is_training:True})
+            it += 1
+            Z = np.random.uniform(size=[batch_size, latentDim], low=-1., high=1.).astype(np.float32)
+            X, _ = data.train.next_batch(batch_size)
+            
+            cur_Dloss, _ = sess.run([Dloss, Dtrain_op], feed_dict={Xph:X, Zph:Z, is_training:True})
+            dtemploss += cur_Dloss
             
-        cur_Gscore, _ = sess.run([Gscore, Gtrain], feed_dict={inZ:Z, inL:labels, is_training:True})
+            if it % disp_int == 0:disp_losses = True
+             
+        cur_Dloss = dtemploss / k   
+        
+        Z = np.random.uniform(size=[batch_size, latentDim], low=-1., high=1.).astype(np.float32)     
+        cur_Gscore, _ = sess.run([Gloss, Gtrain_op], feed_dict={Zph:Z, is_training:True})
     
-        if it % disp_int == 0:
-            Gz_sample = sess.run(Gz, feed_dict={inZ: Z_test, inL: labels_test, is_training:False})
-            vis_square(Gz_sample[:121], [11, 11], save_path=work_dir + 'Iter_%d.jpg' % it)
-            saver.save(sess, work_dir + "%.3d_model.ckpt" % it)
-            if ('cur_Dscore' in vars()) and ('cur_Gscore' in vars()):
-                print("Iteration #%4d, Train Gscore = %f, Dscore=%f" % (it, cur_Gscore, cur_Dscore))
+        if disp_losses:
+            Gsample = sess.run(Gout_op, feed_dict={Zph: Z, is_training:False})
+            vis_square(Gsample[:121], [11, 11], save_path=work_dir + 'Epoch%.3d.jpg' % data.train.epochs_completed)
+            saver.save(sess, work_dir + "%.3d_model.ckpt" % data.train.epochs_completed)
+            print("Epoch #%.3d, Train Gloss = %f, Dloss=%f" % (data.train.epochs_completed, cur_Gloss, cur_Dloss))
+            disp_losses = False