|
| 1 | +# New concepts and differences from Theano: |
| 2 | +# - stride is the interval at which to apply the convolution |
| 3 | +# - unlike previous course, we use constant-size input to the network |
| 4 | +# since not doing that caused us to start swapping |
| 5 | +# - the output after convpool is a different size (8,8) here, (5,5) in Theano |
| 6 | + |
| 7 | +# https://udemy.com/deep-learning-convolutional-neural-networks-theano-tensorflow |
| 8 | + |
| 9 | +import numpy as np |
| 10 | +import tensorflow as tf |
| 11 | +import matplotlib.pyplot as plt |
| 12 | + |
| 13 | +from datetime import datetime |
| 14 | +from scipy.signal import convolve2d |
| 15 | +from scipy.io import loadmat |
| 16 | +from sklearn.utils import shuffle |
| 17 | + |
| 18 | + |
| 19 | +def y2indicator(y): |
| 20 | + N = len(y) |
| 21 | + ind = np.zeros((N, 10)) |
| 22 | + for i in xrange(N): |
| 23 | + ind[i, y[i]] = 1 |
| 24 | + return ind |
| 25 | + |
| 26 | + |
| 27 | +def error_rate(p, t): |
| 28 | + return np.mean(p != t) |
| 29 | + |
| 30 | + |
| 31 | +def convpool(X, W, b): |
| 32 | + # just assume pool size is (2,2) because we need to augment it with 1s |
| 33 | + conv_out = tf.nn.conv2d(X, W, strides=[1, 1, 1, 1], padding='SAME') |
| 34 | + conv_out = tf.nn.bias_add(conv_out, b) |
| 35 | + pool_out = tf.nn.max_pool(conv_out, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') |
| 36 | + return pool_out |
| 37 | + |
| 38 | + |
| 39 | +def init_filter(shape, poolsz): |
| 40 | + w = np.random.randn(*shape) / np.sqrt(np.prod(shape[:-1]) + shape[-1]*np.prod(shape[:-2] / np.prod(poolsz))) |
| 41 | + return w.astype(np.float32) |
| 42 | + |
| 43 | + |
| 44 | +def rearrange(X): |
| 45 | + # input is (32, 32, 3, N) |
| 46 | + # output is (N, 32, 32, 3) |
| 47 | + N = X.shape[-1] |
| 48 | + out = np.zeros((N, 32, 32, 3), dtype=np.float32) |
| 49 | + for i in xrange(N): |
| 50 | + for j in xrange(3): |
| 51 | + out[i, :, :, j] = X[:, :, j, i] |
| 52 | + return out / 255 |
| 53 | + |
| 54 | + |
| 55 | +def main(): |
| 56 | + train = loadmat('../large_files/train_32x32.mat') # N = 73257 |
| 57 | + test = loadmat('../large_files/test_32x32.mat') # N = 26032 |
| 58 | + |
| 59 | + # Need to scale! don't leave as 0..255 |
| 60 | + # Y is a N x 1 matrix with values 1..10 (MATLAB indexes by 1) |
| 61 | + # So flatten it and make it 0..9 |
| 62 | + # Also need indicator matrix for cost calculation |
| 63 | + Xtrain = rearrange(train['X']) |
| 64 | + Ytrain = train['y'].flatten() - 1 |
| 65 | + print len(Ytrain) |
| 66 | + del train |
| 67 | + Xtrain, Ytrain = shuffle(Xtrain, Ytrain) |
| 68 | + Ytrain_ind = y2indicator(Ytrain) |
| 69 | + |
| 70 | + Xtest = rearrange(test['X']) |
| 71 | + Ytest = test['y'].flatten() - 1 |
| 72 | + del test |
| 73 | + Ytest_ind = y2indicator(Ytest) |
| 74 | + |
| 75 | + # gradient descent params |
| 76 | + max_iter = 20 |
| 77 | + print_period = 10 |
| 78 | + N = Xtrain.shape[0] |
| 79 | + batch_sz = 500 |
| 80 | + n_batches = N / batch_sz |
| 81 | + |
| 82 | + # limit samples since input will always have to be same size |
| 83 | + # you could also just do N = N / batch_sz * batch_sz |
| 84 | + Xtrain = Xtrain[:73000,] |
| 85 | + Ytrain = Ytrain[:73000] |
| 86 | + Xtest = Xtest[:26000,] |
| 87 | + Ytest = Ytest[:26000] |
| 88 | + Ytest_ind = Ytest_ind[:26000,] |
| 89 | + # print "Xtest.shape:", Xtest.shape |
| 90 | + # print "Ytest.shape:", Ytest.shape |
| 91 | + |
| 92 | + # initial weights |
| 93 | + M = 500 |
| 94 | + K = 10 |
| 95 | + poolsz = (2, 2) |
| 96 | + |
| 97 | + W1_shape = (5, 5, 3, 20) # (filter_width, filter_height, num_color_channels, num_feature_maps) |
| 98 | + W1_init = init_filter(W1_shape, poolsz) |
| 99 | + b1_init = np.zeros(W1_shape[-1], dtype=np.float32) # one bias per output feature map |
| 100 | + |
| 101 | + W2_shape = (5, 5, 20, 50) # (filter_width, filter_height, old_num_feature_maps, num_feature_maps) |
| 102 | + W2_init = init_filter(W2_shape, poolsz) |
| 103 | + b2_init = np.zeros(W2_shape[-1], dtype=np.float32) |
| 104 | + |
| 105 | + # vanilla ANN weights |
| 106 | + W3_init = np.random.randn(W2_shape[-1]*8*8, M) / np.sqrt(W2_shape[-1]*8*8 + M) |
| 107 | + b3_init = np.zeros(M, dtype=np.float32) |
| 108 | + W4_init = np.random.randn(M, K) / np.sqrt(M + K) |
| 109 | + b4_init = np.zeros(K, dtype=np.float32) |
| 110 | + |
| 111 | + |
| 112 | + # define variables and expressions |
| 113 | + # using None as the first shape element takes up too much RAM unfortunately |
| 114 | + X = tf.placeholder(tf.float32, shape=(batch_sz, 32, 32, 3), name='X') |
| 115 | + T = tf.placeholder(tf.float32, shape=(batch_sz, K), name='T') |
| 116 | + W1 = tf.Variable(W1_init.astype(np.float32)) |
| 117 | + b1 = tf.Variable(b1_init.astype(np.float32)) |
| 118 | + W2 = tf.Variable(W2_init.astype(np.float32)) |
| 119 | + b2 = tf.Variable(b2_init.astype(np.float32)) |
| 120 | + W3 = tf.Variable(W3_init.astype(np.float32)) |
| 121 | + b3 = tf.Variable(b3_init.astype(np.float32)) |
| 122 | + W4 = tf.Variable(W4_init.astype(np.float32)) |
| 123 | + b4 = tf.Variable(b4_init.astype(np.float32)) |
| 124 | + |
| 125 | + Z1 = convpool(X, W1, b1) |
| 126 | + Z2 = convpool(Z1, W2, b2) |
| 127 | + Z2_shape = Z2.get_shape().as_list() |
| 128 | + Z2r = tf.reshape(Z2, [Z2_shape[0], np.prod(Z2_shape[1:])]) |
| 129 | + Z3 = tf.nn.relu( tf.matmul(Z2r, W3) + b3 ) |
| 130 | + Yish = tf.matmul(Z3, W4) + b4 |
| 131 | + |
| 132 | + cost = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(Yish, T)) |
| 133 | + |
| 134 | + train_op = tf.train.RMSPropOptimizer(0.0001, decay=0.99, momentum=0.9).minimize(cost) |
| 135 | + |
| 136 | + # we'll use this to calculate the error rate |
| 137 | + predict_op = tf.argmax(Yish, 1) |
| 138 | + |
| 139 | + t0 = datetime.now() |
| 140 | + LL = [] |
| 141 | + init = tf.initialize_all_variables() |
| 142 | + with tf.Session() as session: |
| 143 | + session.run(init) |
| 144 | + |
| 145 | + for i in xrange(max_iter): |
| 146 | + for j in xrange(n_batches): |
| 147 | + Xbatch = Xtrain[j*batch_sz:(j*batch_sz + batch_sz),] |
| 148 | + Ybatch = Ytrain_ind[j*batch_sz:(j*batch_sz + batch_sz),] |
| 149 | + |
| 150 | + if len(Xbatch) == batch_sz: |
| 151 | + session.run(train_op, feed_dict={X: Xbatch, T: Ybatch}) |
| 152 | + if j % print_period == 0: |
| 153 | + # due to RAM limitations we need to have a fixed size input |
| 154 | + # so as a result, we have this ugly total cost and prediction computation |
| 155 | + test_cost = 0 |
| 156 | + prediction = np.zeros(len(Xtest)) |
| 157 | + for k in xrange(len(Xtest) / batch_sz): |
| 158 | + Xtestbatch = Xtest[k*batch_sz:(k*batch_sz + batch_sz),] |
| 159 | + Ytestbatch = Ytest_ind[k*batch_sz:(k*batch_sz + batch_sz),] |
| 160 | + test_cost += session.run(cost, feed_dict={X: Xtestbatch, T: Ytestbatch}) |
| 161 | + prediction[k*batch_sz:(k*batch_sz + batch_sz)] = session.run( |
| 162 | + predict_op, feed_dict={X: Xtestbatch}) |
| 163 | + err = error_rate(prediction, Ytest) |
| 164 | + print "Cost / err at iteration i=%d, j=%d: %.3f / %.3f" % (i, j, test_cost, err) |
| 165 | + LL.append(test_cost) |
| 166 | + print "Elapsed time:", (datetime.now() - t0) |
| 167 | + plt.plot(LL) |
| 168 | + plt.show() |
| 169 | + |
| 170 | + |
| 171 | +if __name__ == '__main__': |
| 172 | + main() |
0 commit comments