canivel
diff --git a/‎ann_class/backprop.py
+9-5 b/‎ann_class/backprop.py
+9-5
diff --git a/‎ann_class2/dropout_theano.py
+4 b/‎ann_class2/dropout_theano.py
+4
diff --git a/‎ann_class2/mlp.py
+7-5 b/‎ann_class2/mlp.py
+7-5
diff --git a/‎ann_class2/momentum.py
+20-17 b/‎ann_class2/momentum.py
+20-17
diff --git a/‎ann_class2/rmsprop.py
+16-12 b/‎ann_class2/rmsprop.py
+16-12
diff --git a/‎ann_class2/sgd.py
+25-21 b/‎ann_class2/sgd.py
+25-21
@@ -4,6 +4,10 @@
 # the notes for this class can be found at: 
 # https://deeplearningcourses.com/c/data-science-deep-learning-in-python
 # https://www.udemy.com/data-science-deep-learning-in-python
+from __future__ import print_function, division
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
 
 import numpy as np
 import matplotlib.pyplot as plt
@@ -23,7 +27,7 @@ def forward(X, W1, b1, W2, b2):
 def classification_rate(Y, P):
     n_correct = 0
     n_total = 0
-    for i in xrange(len(Y)):
+    for i in range(len(Y)):
         n_total += 1
         if Y[i] == P[i]:
             n_correct += 1
@@ -113,7 +117,7 @@ def main():
     N = len(Y)
     # turn Y into an indicator matrix for training
     T = np.zeros((N, K))
-    for i in xrange(N):
+    for i in range(N):
         T[i, Y[i]] = 1
 
     # let's see what it looks like
@@ -126,15 +130,15 @@ def main():
     W2 = np.random.randn(M, K)
     b2 = np.random.randn(K)
 
-    learning_rate = 1e-6
+    learning_rate = 1e-3
     costs = []
-    for epoch in xrange(100000):
+    for epoch in range(1000):
         output, hidden = forward(X, W1, b1, W2, b2)
         if epoch % 100 == 0:
             c = cost(T, output)
             P = np.argmax(output, axis=1)
             r = classification_rate(Y, P)
-            print "cost:", c, "classification_rate:", r
+            print("cost:", c, "classification_rate:", r)
             costs.append(c)
 
         # this is gradient ASCENT, not DESCENT
 
@@ -5,6 +5,10 @@
 # For the class Data Science: Practical Deep Learning Concepts in Theano and TensorFlow
 # https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow
 # https://www.udemy.com/data-science-deep-learning-in-theano-tensorflow
+from __future__ import print_function, division
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
 
 import numpy as np
 import theano
 
@@ -2,22 +2,24 @@
 # For the class Data Science: Practical Deep Learning Concepts in Theano and TensorFlow
 # https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow
 # https://www.udemy.com/data-science-deep-learning-in-theano-tensorflow
+from __future__ import print_function, division
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
 
 import numpy as np
 
 def forward(X, W1, b1, W2, b2):
+    # sigmoid
     # Z = 1 / (1 + np.exp(-( X.dot(W1) + b1 )))
 
-    # rectifier
+    # relu
     Z = X.dot(W1) + b1
     Z[Z < 0] = 0
-    # print "Z:", Z
 
     A = Z.dot(W2) + b2
     expA = np.exp(A)
     Y = expA / expA.sum(axis=1, keepdims=True)
-    # print "Y:", Y, "are any 0?", np.any(Y == 0), "are any nan?", np.any(np.isnan(Y))
-    # exit()
     return Y, Z
 
 def derivative_w2(Z, T, Y):
@@ -32,4 +34,4 @@ def derivative_w1(X, Z, T, Y, W2):
 
 def derivative_b1(Z, T, Y, W2):
     # return (( Y-T ).dot(W2.T) * ( Z*(1 - Z) )).sum(axis=0) # for sigmoid
-    return (( Y-T ).dot(W2.T) * (Z > 0)).sum(axis=0) # for relu
+    return (( Y-T ).dot(W2.T) * (Z > 0)).sum(axis=0) # for relu
@@ -6,6 +6,10 @@
 # NOTE: MUST restrict initial values of W by dividing by #
 # NOTE: sigmoid vs. rectifier for hiddens
 # We get 15% error rate with sigmoid, 3% error rate with ReLU
+from __future__ import print_function, division
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
 
 import numpy as np
 from sklearn.utils import shuffle
@@ -37,7 +41,7 @@ def main():
 
     N, D = Xtrain.shape
     batch_sz = 500
-    n_batches = N / batch_sz
+    n_batches = N // batch_sz
 
     M = 300
     K = 10
@@ -50,8 +54,8 @@ def main():
     # cost = -16
     LL_batch = []
     CR_batch = []
-    for i in xrange(max_iter):
-        for j in xrange(n_batches):
+    for i in range(max_iter):
+        for j in range(n_batches):
             Xbatch = Xtrain[j*batch_sz:(j*batch_sz + batch_sz),]
             Ybatch = Ytrain_ind[j*batch_sz:(j*batch_sz + batch_sz),]
             pYbatch, Z = forward(Xbatch, W1, b1, W2, b2)
@@ -66,17 +70,16 @@ def main():
             if j % print_period == 0:
                 # calculate just for LL
                 pY, _ = forward(Xtest, W1, b1, W2, b2)
-                # print "pY:", pY
                 ll = cost(pY, Ytest_ind)
                 LL_batch.append(ll)
-                print "Cost at iteration i=%d, j=%d: %.6f" % (i, j, ll)
+                print("Cost at iteration i=%d, j=%d: %.6f" % (i, j, ll))
 
                 err = error_rate(pY, Ytest)
                 CR_batch.append(err)
-                print "Error rate:", err
+                print("Error rate:", err)
 
     pY, _ = forward(Xtest, W1, b1, W2, b2)
-    print "Final error rate:", error_rate(pY, Ytest)
+    print("Final error rate:", error_rate(pY, Ytest))
 
     # 2. batch with momentum
     W1 = np.random.randn(D, M) / np.sqrt(D)
@@ -90,8 +93,8 @@ def main():
     db2 = 0
     dW1 = 0
     db1 = 0
-    for i in xrange(max_iter):
-        for j in xrange(n_batches):
+    for i in range(max_iter):
+        for j in range(n_batches):
             Xbatch = Xtrain[j*batch_sz:(j*batch_sz + batch_sz),]
             Ybatch = Ytrain_ind[j*batch_sz:(j*batch_sz + batch_sz),]
             pYbatch, Z = forward(Xbatch, W1, b1, W2, b2)
@@ -112,13 +115,13 @@ def main():
                 # print "pY:", pY
                 ll = cost(pY, Ytest_ind)
                 LL_momentum.append(ll)
-                print "Cost at iteration i=%d, j=%d: %.6f" % (i, j, ll)
+                print("Cost at iteration i=%d, j=%d: %.6f" % (i, j, ll))
 
                 err = error_rate(pY, Ytest)
                 CR_momentum.append(err)
-                print "Error rate:", err
+                print("Error rate:", err)
     pY, _ = forward(Xtest, W1, b1, W2, b2)
-    print "Final error rate:", error_rate(pY, Ytest)
+    print("Final error rate:", error_rate(pY, Ytest))
 
 
     # 3. batch with Nesterov momentum
@@ -138,8 +141,8 @@ def main():
     vb2 = 0
     vW1 = 0
     vb1 = 0
-    for i in xrange(max_iter):
-        for j in xrange(n_batches):
+    for i in range(max_iter):
+        for j in range(n_batches):
             # because we want g(t) = grad(f(W(t-1) - lr*mu*dW(t-1)))
             # dW(t) = mu*dW(t-1) + g(t)
             # W(t) = W(t-1) - mu*dW(t)
@@ -177,13 +180,13 @@ def main():
                 # print "pY:", pY
                 ll = cost(pY, Ytest_ind)
                 LL_nest.append(ll)
-                print "Cost at iteration i=%d, j=%d: %.6f" % (i, j, ll)
+                print("Cost at iteration i=%d, j=%d: %.6f" % (i, j, ll))
 
                 err = error_rate(pY, Ytest)
                 CR_nest.append(err)
-                print "Error rate:", err
+                print("Error rate:", err)
     pY, _ = forward(Xtest, W1, b1, W2, b2)
-    print "Final error rate:", error_rate(pY, Ytest)
+    print("Final error rate:", error_rate(pY, Ytest))
 
 
 
 
@@ -2,6 +2,10 @@
 # For the class Data Science: Practical Deep Learning Concepts in Theano and TensorFlow
 # https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow
 # https://www.udemy.com/data-science-deep-learning-in-theano-tensorflow
+from __future__ import print_function, division
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
 
 import numpy as np
 from sklearn.utils import shuffle
@@ -28,7 +32,7 @@ def main():
 
     N, D = Xtrain.shape
     batch_sz = 500
-    n_batches = N / batch_sz
+    n_batches = N // batch_sz
 
     M = 300
     K = 10
@@ -41,8 +45,8 @@ def main():
     # cost = -16
     LL_batch = []
     CR_batch = []
-    for i in xrange(max_iter):
-        for j in xrange(n_batches):
+    for i in range(max_iter):
+        for j in range(n_batches):
             Xbatch = Xtrain[j*batch_sz:(j*batch_sz + batch_sz),]
             Ybatch = Ytrain_ind[j*batch_sz:(j*batch_sz + batch_sz),]
             pYbatch, Z = forward(Xbatch, W1, b1, W2, b2)
@@ -60,14 +64,14 @@ def main():
                 # print "pY:", pY
                 ll = cost(pY, Ytest_ind)
                 LL_batch.append(ll)
-                print "Cost at iteration i=%d, j=%d: %.6f" % (i, j, ll)
+                print("Cost at iteration i=%d, j=%d: %.6f" % (i, j, ll))
 
                 err = error_rate(pY, Ytest)
                 CR_batch.append(err)
-                print "Error rate:", err
+                print("Error rate:", err)
 
     pY, _ = forward(Xtest, W1, b1, W2, b2)
-    print "Final error rate:", error_rate(pY, Ytest)
+    print("Final error rate:", error_rate(pY, Ytest))
 
 
     # 2. RMSprop
@@ -83,9 +87,9 @@ def main():
     cache_W1 = 1
     cache_b1 = 1
     decay_rate = 0.999
-    eps = 0.0000000001
-    for i in xrange(max_iter):
-        for j in xrange(n_batches):
+    eps = 1e-10
+    for i in range(max_iter):
+        for j in range(n_batches):
             Xbatch = Xtrain[j*batch_sz:(j*batch_sz + batch_sz),]
             Ybatch = Ytrain_ind[j*batch_sz:(j*batch_sz + batch_sz),]
             pYbatch, Z = forward(Xbatch, W1, b1, W2, b2)
@@ -114,14 +118,14 @@ def main():
                 # print "pY:", pY
                 ll = cost(pY, Ytest_ind)
                 LL_rms.append(ll)
-                print "Cost at iteration i=%d, j=%d: %.6f" % (i, j, ll)
+                print("Cost at iteration i=%d, j=%d: %.6f" % (i, j, ll))
 
                 err = error_rate(pY, Ytest)
                 CR_rms.append(err)
-                print "Error rate:", err
+                print("Error rate:", err)
 
     pY, _ = forward(Xtest, W1, b1, W2, b2)
-    print "Final error rate:", error_rate(pY, Ytest)
+    print("Final error rate:", error_rate(pY, Ytest))
 
     plt.plot(LL_batch, label='const')
     plt.plot(LL_rms, label='rms')
 
@@ -13,6 +13,10 @@
 # For the class Data Science: Practical Deep Learning Concepts in Theano and TensorFlow
 # https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow
 # https://www.udemy.com/data-science-deep-learning-in-theano-tensorflow
+from __future__ import print_function, division
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
 
 import numpy as np
 import pandas as pd
@@ -32,7 +36,7 @@ def main():
     std = X.std(axis=0)
     X = (X - mu) / std
 
-    print "Performing logistic regression..."
+    print("Performing logistic regression...")
     Xtrain = X[:-1000,]
     Ytrain = Y[:-1000]
     Xtest  = X[-1000:,]
@@ -49,7 +53,7 @@ def main():
     lr = 0.0001
     reg = 0.01
     t0 = datetime.now()
-    for i in xrange(200):
+    for i in range(200):
         p_y = forward(Xtrain, W, b)
 
         W += lr*(gradW(Ytrain_ind, p_y, Xtrain) - reg*W)
@@ -61,11 +65,11 @@ def main():
         LL.append(ll)
         if i % 10 == 0:
             err = error_rate(p_y_test, Ytest)
-            print "Cost at iteration %d: %.6f" % (i, ll)
-            print "Error rate:", err
+            print("Cost at iteration %d: %.6f" % (i, ll))
+            print("Error rate:", err)
     p_y = forward(Xtest, W, b)
-    print "Final error rate:", error_rate(p_y, Ytest)
-    print "Elapsted time for full GD:", datetime.now() - t0
+    print("Final error rate:", error_rate(p_y, Ytest))
+    print("Elapsted time for full GD:", datetime.now() - t0)
 
 
     # 2. stochastic
@@ -76,9 +80,9 @@ def main():
     reg = 0.01
 
     t0 = datetime.now()
-    for i in xrange(1): # takes very long since we're computing cost for 41k samples
+    for i in range(1): # takes very long since we're computing cost for 41k samples
         tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
-        for n in xrange(min(N, 500)): # shortcut so it won't take so long...
+        for n in range(min(N, 500)): # shortcut so it won't take so long...
             x = tmpX[n,:].reshape(1,D)
             y = tmpY[n,:].reshape(1,10)
             p_y = forward(x, W, b)
@@ -90,13 +94,13 @@ def main():
             ll = cost(p_y_test, Ytest_ind)
             LL_stochastic.append(ll)
 
-            if n % (N/2) == 0:
+            if n % (N//2) == 0:
                 err = error_rate(p_y_test, Ytest)
-                print "Cost at iteration %d: %.6f" % (i, ll)
-                print "Error rate:", err
+                print("Cost at iteration %d: %.6f" % (i, ll))
+                print("Error rate:", err)
     p_y = forward(Xtest, W, b)
-    print "Final error rate:", error_rate(p_y, Ytest)
-    print "Elapsted time for SGD:", datetime.now() - t0
+    print("Final error rate:", error_rate(p_y, Ytest))
+    print("Elapsted time for SGD:", datetime.now() - t0)
 
 
     # 3. batch
@@ -106,12 +110,12 @@ def main():
     lr = 0.0001
     reg = 0.01
     batch_sz = 500
-    n_batches = N / batch_sz
+    n_batches = N // batch_sz
 
     t0 = datetime.now()
-    for i in xrange(50):
+    for i in range(50):
         tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
-        for j in xrange(n_batches):
+        for j in range(n_batches):
             x = tmpX[j*batch_sz:(j*batch_sz + batch_sz),:]
             y = tmpY[j*batch_sz:(j*batch_sz + batch_sz),:]
             p_y = forward(x, W, b)
@@ -122,13 +126,13 @@ def main():
             p_y_test = forward(Xtest, W, b)
             ll = cost(p_y_test, Ytest_ind)
             LL_batch.append(ll)
-            if j % (n_batches/2) == 0:
+            if j % (n_batches//2) == 0:
                 err = error_rate(p_y_test, Ytest)
-                print "Cost at iteration %d: %.6f" % (i, ll)
-                print "Error rate:", err
+                print("Cost at iteration %d: %.6f" % (i, ll))
+                print("Error rate:", err)
     p_y = forward(Xtest, W, b)
-    print "Final error rate:", error_rate(p_y, Ytest)
-    print "Elapsted time for batch GD:", datetime.now() - t0
+    print("Final error rate:", error_rate(p_y, Ytest))
+    print("Elapsted time for batch GD:", datetime.now() - t0)