add new examples

lazyprogrammer · lazyprogrammer · commit f35c47cd0f87 · 2017-04-17T03:44:11.000-04:00
diff --git a/ann_class2/dropout_tensorflow.py b/ann_class2/dropout_tensorflow.py
@@ -13,7 +13,7 @@ class HiddenLayer(object):
     def __init__(self, M1, M2):
         self.M1 = M1
         self.M2 = M2
-        W = np.random.randn(M1, M2) / np.sqrt(M1 + M2)
+        W = np.random.randn(M1, M2) / np.sqrt(M1)
         b = np.zeros(M2)
         self.W = tf.Variable(W.astype(np.float32))
         self.b = tf.Variable(b.astype(np.float32))
@@ -28,13 +28,16 @@ def __init__(self, hidden_layer_sizes, p_keep):
         self.hidden_layer_sizes = hidden_layer_sizes
         self.dropout_rates = p_keep
 
-    def fit(self, X, Y, lr=10e-7, mu=0.99, decay=0.999, epochs=300, batch_sz=100):
+    def fit(self, X, Y, lr=1e-3, mu=0.99, decay=0.999, epochs=300, batch_sz=100, split=True, print_every=20):
         # make a validation set
         X, Y = shuffle(X, Y)
         X = X.astype(np.float32)
         Y = Y.astype(np.int64)
-        Xvalid, Yvalid = X[-1000:], Y[-1000:]
-        X, Y = X[:-1000], Y[:-1000]
+        if split:
+            Xvalid, Yvalid = X[-1000:], Y[-1000:]
+            X, Y = X[:-1000], Y[:-1000]
+        else:
+            Xvalid, Yvalid = X, Y
 
         # initialize hidden layers
         N, D = X.shape
@@ -45,7 +48,7 @@ def fit(self, X, Y, lr=10e-7, mu=0.99, decay=0.999, epochs=300, batch_sz=100):
             h = HiddenLayer(M1, M2)
             self.hidden_layers.append(h)
             M1 = M2
-        W = np.random.randn(M1, K) / np.sqrt(M1 + K)
+        W = np.random.randn(M1, K) / np.sqrt(M1)
         b = np.zeros(K)
         self.W = tf.Variable(W.astype(np.float32))
         self.b = tf.Variable(b.astype(np.float32))
@@ -66,7 +69,8 @@ def fit(self, X, Y, lr=10e-7, mu=0.99, decay=0.999, epochs=300, batch_sz=100):
                 labels=labels
             )
         )
-        train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost)
+        # train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost)
+        train_op = tf.train.MomentumOptimizer(lr, momentum=mu).minimize(cost)
         prediction = self.predict(inputs)
 
         n_batches = N / batch_sz
@@ -75,14 +79,15 @@ def fit(self, X, Y, lr=10e-7, mu=0.99, decay=0.999, epochs=300, batch_sz=100):
         with tf.Session() as session:
             session.run(init)
             for i in xrange(epochs):
+                print "epoch:", i, "n_batches:", n_batches
                 X, Y = shuffle(X, Y)
                 for j in xrange(n_batches):
                     Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
                     Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]
 
                     session.run(train_op, feed_dict={inputs: Xbatch, labels: Ybatch})
 
-                    if j % 20 == 0:
+                    if j % print_every == 0:
                         c = session.run(cost, feed_dict={inputs: Xvalid, labels: Yvalid})
                         p = session.run(prediction, feed_dict={inputs: Xvalid})
                         costs.append(c)
diff --git a/ann_class2/dropout_theano.py b/ann_class2/dropout_theano.py
@@ -20,7 +20,7 @@ def __init__(self, M1, M2, an_id):
         self.id = an_id
         self.M1 = M1
         self.M2 = M2
-        W = np.random.randn(M1, M2) / np.sqrt(M1 + M2)
+        W = np.random.randn(M1, M2) / np.sqrt(M1)
         b = np.zeros(M2)
         self.W = theano.shared(W, 'W_%s' % self.id)
         self.b = theano.shared(b, 'b_%s' % self.id)
@@ -35,7 +35,7 @@ def __init__(self, hidden_layer_sizes, p_keep):
         self.hidden_layer_sizes = hidden_layer_sizes
         self.dropout_rates = p_keep
 
-    def fit(self, X, Y, learning_rate=10e-7, mu=0.99, decay=0.999, epochs=300, batch_sz=100, show_fig=False):
+    def fit(self, X, Y, learning_rate=1e-6, mu=0.99, decay=0.999, epochs=300, batch_sz=100, show_fig=False):
         # make a validation set
         X, Y = shuffle(X, Y)
         X = X.astype(np.float32)
@@ -56,7 +56,7 @@ def fit(self, X, Y, learning_rate=10e-7, mu=0.99, decay=0.999, epochs=300, batch
             self.hidden_layers.append(h)
             M1 = M2
             count += 1
-        W = np.random.randn(M1, K) / np.sqrt(M1 + K)
+        W = np.random.randn(M1, K) / np.sqrt(M1)
         b = np.zeros(K)
         self.W = theano.shared(W, 'W_logreg')
         self.b = theano.shared(b, 'b_logreg')
@@ -83,9 +83,9 @@ def fit(self, X, Y, learning_rate=10e-7, mu=0.99, decay=0.999, epochs=300, batch
         updates = [
             (c, decay*c + (1-decay)*T.grad(cost, p)*T.grad(cost, p)) for p, c in zip(self.params, cache)
         ] + [
-            (p, p + mu*dp - learning_rate*T.grad(cost, p)/T.sqrt(c + 10e-10)) for p, c, dp in zip(self.params, cache, dparams)
+            (p, p + mu*dp - learning_rate*T.grad(cost, p)/T.sqrt(c + 1e-10)) for p, c, dp in zip(self.params, cache, dparams)
         ] + [
-            (dp, mu*dp - learning_rate*T.grad(cost, p)/T.sqrt(c + 10e-10)) for p, c, dp in zip(self.params, cache, dparams)
+            (dp, mu*dp - learning_rate*T.grad(cost, p)/T.sqrt(c + 1e-10)) for p, c, dp in zip(self.params, cache, dparams)
         ]
 
         # momentum only
diff --git a/ann_class2/grid_search.py b/ann_class2/grid_search.py
@@ -0,0 +1,82 @@
+# For the class Data Science: Practical Deep Learning Concepts in Theano and TensorFlow
+# https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow
+# https://www.udemy.com/data-science-deep-learning-in-theano-tensorflow
+from __future__ import print_function, division
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
+
+
+import theano.tensor as T
+from theano_ann import ANN
+from util import get_spiral, get_clouds
+from sklearn.utils import shuffle
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+def grid_search():
+  # get the data and split into train/test
+  X, Y = get_spiral()
+  # X, Y = get_clouds()
+  X, Y = shuffle(X, Y)
+  Ntrain = int(0.7*len(X))
+  Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain]
+  Xtest, Ytest = X[Ntrain:], Y[Ntrain:]
+
+  # hyperparameters to try
+  hidden_layer_sizes = [
+    [300],
+    [100,100],
+    [50,50,50],
+  ]
+  learning_rates = [1e-4, 1e-3, 1e-2]
+  l2_penalties = [0., 0.1, 1.0]
+
+  # loop through all possible hyperparameter settings
+  best_validation_rate = 0
+  best_hls = None
+  best_lr = None
+  best_l2 = None
+  for hls in hidden_layer_sizes:
+    for lr in learning_rates:
+      for l2 in l2_penalties:
+        model = ANN(hls)
+        model.fit(Xtrain, Ytrain, learning_rate=lr, reg=l2, mu=0.99, epochs=3000, show_fig=False)
+        validation_accuracy = model.score(Xtest, Ytest)
+        train_accuracy = model.score(Xtrain, Ytrain)
+        print(
+          "validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" %
+            (validation_accuracy, train_accuracy, hls, lr, l2)
+        )
+        if validation_accuracy > best_validation_rate:
+          best_validation_rate = validation_accuracy
+          best_hls = hls
+          best_lr = lr
+          best_l2 = l2
+  print("Best validation_accuracy:", best_validation_rate)
+  print("Best settings:")
+  print("hidden_layer_sizes:", best_hls)
+  print("learning_rate:", best_lr)
+  print("l2:", best_l2)
+
+
+# def one():
+#   X, Y = get_spiral()
+#   X, Y = shuffle(X, Y)
+#   Ntrain = int(0.7*len(X))
+#   Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain]
+#   # plt.scatter(Xtrain[:,0], Xtrain[:,1], c=Ytrain)
+#   # plt.show()
+#   Xtest, Ytest = X[Ntrain:], Y[Ntrain:]
+
+#   model = ANN([100,100])
+#   model.fit(Xtrain, Ytrain, activation=T.nnet.relu, learning_rate=1e-3, mu=0.99, reg=0., epochs=3000, show_fig=True)
+#   validation_accuracy = model.score(Xtest, Ytest)
+#   print("validation_accuracy:", validation_accuracy)
+#   print("train accuracy:", model.score(Xtrain, Ytrain))
+
+
+if __name__ == '__main__':
+  grid_search()
+  # one()
diff --git a/ann_class2/random_search.py b/ann_class2/random_search.py
@@ -0,0 +1,76 @@
+# For the class Data Science: Practical Deep Learning Concepts in Theano and TensorFlow
+# https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow
+# https://www.udemy.com/data-science-deep-learning-in-theano-tensorflow
+from __future__ import print_function, division
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
+
+
+# import theano.tensor as T
+from theano_ann import ANN
+from util import get_spiral, get_clouds
+from sklearn.utils import shuffle
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+def random_search():
+  # get the data and split into train/test
+  X, Y = get_spiral()
+  # X, Y = get_clouds()
+  X, Y = shuffle(X, Y)
+  Ntrain = int(0.7*len(X))
+  Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain]
+  Xtest, Ytest = X[Ntrain:], Y[Ntrain:]
+
+  # starting hyperparameters
+  M = 20
+  nHidden = 2
+  log_lr = -4
+  log_l2 = -2 # since we always want it to be positive
+  max_tries = 30
+  
+
+  # loop through all possible hyperparameter settings
+  best_validation_rate = 0
+  best_hls = None
+  best_lr = None
+  best_l2 = None
+  for _ in range(max_tries):
+    model = ANN([M]*nHidden)
+    model.fit(
+      Xtrain, Ytrain,
+      learning_rate=10**log_lr, reg=10**log_l2,
+      mu=0.99, epochs=3000, show_fig=False
+    )
+    validation_accuracy = model.score(Xtest, Ytest)
+    train_accuracy = model.score(Xtrain, Ytrain)
+    print(
+      "validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" %
+        (validation_accuracy, train_accuracy, [M]*nHidden, log_lr, log_l2)
+    )
+    if validation_accuracy > best_validation_rate:
+      best_validation_rate = validation_accuracy
+      best_M = M
+      best_nHidden = nHidden
+      best_lr = log_lr
+      best_l2 = log_l2
+
+    # select new hyperparams
+    nHidden = best_nHidden + np.random.randint(-1, 2) # -1, 0, or 1
+    nHidden = max(1, nHidden)
+    M = best_M + np.random.randint(-1, 2)*10
+    M = max(10, M)
+    log_lr = best_lr + np.random.randint(-1, 2)
+    log_l2 = best_l2 + np.random.randint(-1, 2)
+  print("Best validation_accuracy:", best_validation_rate)
+  print("Best settings:")
+  print("best_M:", best_M)
+  print("best_nHidden:", best_nHidden)
+  print("learning_rate:", best_lr)
+  print("l2:", best_l2)
+
+
+if __name__ == '__main__':
+  random_search()
diff --git a/ann_class2/util.py b/ann_class2/util.py