Skip to content

Commit cd73081

Browse files
dropout update
1 parent 713c3dc commit cd73081

File tree

2 files changed

+40
-18
lines changed

2 files changed

+40
-18
lines changed

ann_class2/dropout_tensorflow.py

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
from __future__ import print_function, division
2+
from builtins import range
3+
# Note: you may need to update your version of future
4+
# sudo pip install -U future
5+
16
# For the class Data Science: Practical Deep Learning Concepts in Theano and TensorFlow
27
# https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow
38
# https://www.udemy.com/data-science-deep-learning-in-theano-tensorflow
@@ -13,7 +18,7 @@ class HiddenLayer(object):
1318
def __init__(self, M1, M2):
1419
self.M1 = M1
1520
self.M2 = M2
16-
W = np.random.randn(M1, M2) / np.sqrt(2.0 / M1)
21+
W = np.random.randn(M1, M2) * np.sqrt(2.0 / M1)
1722
b = np.zeros(M2)
1823
self.W = tf.Variable(W.astype(np.float32))
1924
self.b = tf.Variable(b.astype(np.float32))
@@ -28,7 +33,7 @@ def __init__(self, hidden_layer_sizes, p_keep):
2833
self.hidden_layer_sizes = hidden_layer_sizes
2934
self.dropout_rates = p_keep
3035

31-
def fit(self, X, Y, lr=1e-4, mu=0.9, decay=0.9, epochs=8, batch_sz=100, split=True, print_every=20):
36+
def fit(self, X, Y, lr=1e-4, mu=0.9, decay=0.9, epochs=15, batch_sz=100, split=True, print_every=20):
3237
# make a validation set
3338
X, Y = shuffle(X, Y)
3439
X = X.astype(np.float32)
@@ -48,7 +53,7 @@ def fit(self, X, Y, lr=1e-4, mu=0.9, decay=0.9, epochs=8, batch_sz=100, split=Tr
4853
h = HiddenLayer(M1, M2)
4954
self.hidden_layers.append(h)
5055
M1 = M2
51-
W = np.random.randn(M1, K) / np.sqrt(M1)
56+
W = np.random.randn(M1, K) * np.sqrt(2.0 / M1)
5257
b = np.zeros(K)
5358
self.W = tf.Variable(W.astype(np.float32))
5459
self.b = tf.Variable(b.astype(np.float32))
@@ -71,44 +76,61 @@ def fit(self, X, Y, lr=1e-4, mu=0.9, decay=0.9, epochs=8, batch_sz=100, split=Tr
7176
)
7277
train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost)
7378
# train_op = tf.train.MomentumOptimizer(lr, momentum=mu).minimize(cost)
79+
# train_op = tf.train.AdamOptimizer(lr).minimize(cost)
7480
prediction = self.predict(inputs)
7581

76-
n_batches = N / batch_sz
82+
# validation cost will be calculated separately since nothing will be dropped
83+
test_logits = self.forward_test(inputs)
84+
test_cost = tf.reduce_mean(
85+
tf.nn.sparse_softmax_cross_entropy_with_logits(
86+
logits=test_logits,
87+
labels=labels
88+
)
89+
)
90+
91+
n_batches = N // batch_sz
7792
costs = []
7893
init = tf.global_variables_initializer()
7994
with tf.Session() as session:
8095
session.run(init)
81-
for i in xrange(epochs):
82-
print "epoch:", i, "n_batches:", n_batches
96+
for i in range(epochs):
97+
print("epoch:", i, "n_batches:", n_batches)
8398
X, Y = shuffle(X, Y)
84-
for j in xrange(n_batches):
99+
for j in range(n_batches):
85100
Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
86101
Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]
87102

88103
session.run(train_op, feed_dict={inputs: Xbatch, labels: Ybatch})
89104

90105
if j % print_every == 0:
91-
c = session.run(cost, feed_dict={inputs: Xvalid, labels: Yvalid})
106+
c = session.run(test_cost, feed_dict={inputs: Xvalid, labels: Yvalid})
92107
p = session.run(prediction, feed_dict={inputs: Xvalid})
93108
costs.append(c)
94109
e = error_rate(Yvalid, p)
95-
print "i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e
110+
print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e)
96111

97112
plt.plot(costs)
98113
plt.show()
99114

100115
def forward(self, X):
101-
# no need to define different functions for train and predict
102-
# tf.nn.dropout takes care of the differences for us
116+
# tf.nn.dropout scales inputs by 1/p_keep
117+
# therefore, during test time, we don't have to scale anything
103118
Z = X
104119
Z = tf.nn.dropout(Z, self.dropout_rates[0])
105120
for h, p in zip(self.hidden_layers, self.dropout_rates[1:]):
106121
Z = h.forward(Z)
107122
Z = tf.nn.dropout(Z, p)
108123
return tf.matmul(Z, self.W) + self.b
109124

125+
def forward_test(self, X):
126+
Z = X
127+
Z = tf.nn.dropout(Z, self.dropout_rates[0])
128+
for h, p in zip(self.hidden_layers, self.dropout_rates[1:]):
129+
Z = h.forward(Z)
130+
return tf.matmul(Z, self.W) + self.b
131+
110132
def predict(self, X):
111-
pY = self.forward(X)
133+
pY = self.forward_test(X)
112134
return tf.argmax(pY, 1)
113135

114136

ann_class2/dropout_theano.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def __init__(self, M1, M2, an_id):
2020
self.id = an_id
2121
self.M1 = M1
2222
self.M2 = M2
23-
W = np.random.randn(M1, M2) / np.sqrt(2.0 / M1)
23+
W = np.random.randn(M1, M2) * np.sqrt(2.0 / M1)
2424
b = np.zeros(M2)
2525
self.W = theano.shared(W, 'W_%s' % self.id)
2626
self.b = theano.shared(b, 'b_%s' % self.id)
@@ -56,7 +56,7 @@ def fit(self, X, Y, learning_rate=1e-4, mu=0.9, decay=0.9, epochs=8, batch_sz=10
5656
self.hidden_layers.append(h)
5757
M1 = M2
5858
count += 1
59-
W = np.random.randn(M1, K) / np.sqrt(M1)
59+
W = np.random.randn(M1, K) * np.sqrt(2.0 / M1)
6060
b = np.zeros(K)
6161
self.W = theano.shared(W, 'W_logreg')
6262
self.b = theano.shared(b, 'b_logreg')
@@ -111,11 +111,11 @@ def fit(self, X, Y, learning_rate=1e-4, mu=0.9, decay=0.9, epochs=8, batch_sz=10
111111
prediction = self.predict(thX)
112112
cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost_predict, prediction])
113113

114-
n_batches = N / batch_sz
114+
n_batches = N // batch_sz
115115
costs = []
116-
for i in xrange(epochs):
116+
for i in range(epochs):
117117
X, Y = shuffle(X, Y)
118-
for j in xrange(n_batches):
118+
for j in range(n_batches):
119119
Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
120120
Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]
121121

@@ -125,7 +125,7 @@ def fit(self, X, Y, learning_rate=1e-4, mu=0.9, decay=0.9, epochs=8, batch_sz=10
125125
c, p = cost_predict_op(Xvalid, Yvalid)
126126
costs.append(c)
127127
e = error_rate(Yvalid, p)
128-
print "i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e
128+
print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e)
129129

130130
if show_fig:
131131
plt.plot(costs)

0 commit comments

Comments
 (0)