updates

lazyprogrammer · lazyprogrammer · commit 471f7bc93c59 · 2016-05-13T19:15:34.000-04:00
diff --git a/unsupervised_class2/autoencoder.py b/unsupervised_class2/autoencoder.py
@@ -24,6 +24,7 @@ def fit(self, X, learning_rate=0.5, mu=0.99, epochs=1, batch_sz=100, show_fig=Fa
         self.params = [self.W, self.bh, self.bo]
         self.forward_params = [self.W, self.bh]
 
+        # TODO: technically these should be reset before doing backprop
         self.dW = theano.shared(np.zeros(W0.shape), 'dW_%s' % self.id)
         self.dbh = theano.shared(np.zeros(self.M), 'dbh_%s' % self.id)
         self.dbo = theano.shared(np.zeros(D), 'dbo_%s' % self.id)
@@ -33,11 +34,6 @@ def fit(self, X, learning_rate=0.5, mu=0.99, epochs=1, batch_sz=100, show_fig=Fa
         X_in = T.matrix('X_%s' % self.id)
         X_hat = self.forward_output(X_in)
 
-        forward_op = theano.function(
-            inputs=[X_in],
-            outputs=X_hat,
-        )
-
         # attach it to the object so it can be used later
         # must be sigmoidal because the output is also a sigmoid
         H = T.nnet.sigmoid(X_in.dot(self.W) + self.bh)
@@ -71,7 +67,7 @@ def fit(self, X, learning_rate=0.5, mu=0.99, epochs=1, batch_sz=100, show_fig=Fa
             for j in xrange(n_batches):
                 batch = X[j*batch_sz:(j*batch_sz + batch_sz)]
                 train_op(batch)
-                the_cost = cost_op(X)
+                the_cost = cost_op(X) # technically we could also get the cost for Xtest here
                 print "j / n_batches:", j, "/", n_batches, "cost:", the_cost
                 costs.append(the_cost)
         if show_fig:
@@ -89,6 +85,16 @@ def forward_output(self, X):
         Y = T.nnet.sigmoid(Z.dot(self.W.T) + self.bo)
         return Y
 
+    @staticmethod
+    def createFromArrays(W, bh, bo, an_id):
+        ae = AutoEncoder(W.shape[1], an_id)
+        ae.W = theano.shared(W, 'W_%s' % ae.id)
+        ae.bh = theano.shared(bh, 'bh_%s' % ae.id)
+        ae.bo = theano.shared(bo, 'bo_%s' % ae.id)
+        ae.params = [ae.W, ae.bh, ae.bo]
+        ae.forward_params = [ae.W, ae.bh]
+        return ae
+
 
 class DNN(object):
     def __init__(self, hidden_layer_sizes, UnsupervisedModel=AutoEncoder):
@@ -135,14 +141,9 @@ def fit(self, X, Y, Xtest, Ytest, pretrain=True, learning_rate=0.01, mu=0.99, re
         targets = T.ivector('Targets')
         pY = self.forward(X_in)
 
-        forward_op = theano.function(
-            inputs=[X_in],
-            outputs=pY,
-        )
-
         # squared_magnitude = [(p*p).sum() for p in self.params]
         # reg_cost = T.sum(squared_magnitude)
-        cost = -T.mean( T.log(pY)[T.arange(pY.shape[0]), targets] ) #+ reg*reg_cost
+        cost = -T.mean( T.log(pY[T.arange(pY.shape[0]), targets]) ) #+ reg*reg_cost
         prediction = self.predict(X_in)
         cost_predict_op = theano.function(
             inputs=[X_in, targets],
@@ -171,7 +172,6 @@ def fit(self, X, Y, Xtest, Ytest, pretrain=True, learning_rate=0.01, mu=0.99, re
                 Ybatch = Y[j*batch_sz:(j*batch_sz + batch_sz)]
                 train_op(Xbatch, Ybatch)
                 the_cost, the_prediction = cost_predict_op(Xtest, Ytest)
-                # print "prediction:", the_prediction, "test:", Ytest
                 error = error_rate(the_prediction, Ytest)
                 print "j / n_batches:", j, "/", n_batches, "cost:", the_cost, "error:", error
                 costs.append(the_cost)
diff --git a/unsupervised_class2/rbm.py b/unsupervised_class2/rbm.py
@@ -15,7 +15,7 @@ def __init__(self, M, an_id):
         self.id = an_id
         self.rng = RandomStreams()
 
-    def fit(self, X, learning_rate=0.1, epochs=10, batch_sz=100, show_fig=False):
+    def fit(self, X, learning_rate=0.1, epochs=1, batch_sz=100, show_fig=False):
         N, D = X.shape
         n_batches = N / batch_sz
 
@@ -27,6 +27,7 @@ def fit(self, X, learning_rate=0.1, epochs=10, batch_sz=100, show_fig=False):
         self.forward_params = [self.W, self.c]
 
         # we won't use this to fit the RBM but we will use these for backpropagation later
+        # TODO: technically they should be reset before doing backprop
         self.dW = theano.shared(np.zeros(W0.shape), 'dW_%s' % self.id)
         self.dc = theano.shared(np.zeros(self.M), 'dbh_%s' % self.id)
         self.db = theano.shared(np.zeros(D), 'dbo_%s' % self.id)
@@ -75,7 +76,7 @@ def fit(self, X, learning_rate=0.1, epochs=10, batch_sz=100, show_fig=False):
             for j in xrange(n_batches):
                 batch = X[j*batch_sz:(j*batch_sz + batch_sz)]
                 train_op(batch)
-                the_cost = cost_op(X)
+                the_cost = cost_op(X)  # technically we could also get the cost for Xtest here
                 print "j / n_batches:", j, "/", n_batches, "cost:", the_cost
                 costs.append(the_cost)
         if show_fig:
@@ -103,6 +104,16 @@ def forward_output(self, X):
         Y = T.nnet.sigmoid(Z.dot(self.W.T) + self.b)
         return Y
 
+    @staticmethod
+    def createFromArrays(W, c, b, an_id):
+        rbm = AutoEncoder(W.shape[1], an_id)
+        rbm.W = theano.shared(W, 'W_%s' % rbm.id)
+        rbm.c = theano.shared(c, 'c_%s' % rbm.id)
+        rbm.b = theano.shared(b, 'b_%s' % rbm.id)
+        rbm.params = [rbm.W, rbm.c, rbm.b]
+        rbm.forward_params = [rbm.W, rbm.c]
+        return rbm
+
 
 def main():
     Xtrain, Ytrain, Xtest, Ytest = getKaggleMNIST()
diff --git a/unsupervised_class2/unsupervised.py b/unsupervised_class2/unsupervised.py
@@ -22,6 +22,8 @@ def __init__(self, hidden_layer_sizes, UnsupervisedModel=AutoEncoder):
             count += 1
 
     def fit(self, X, pretrain_epochs=1):
+        self.D = X.shape[1] # save for later
+
         current_input = X
         for ae in self.hidden_layers:
             ae.fit(current_input, epochs=pretrain_epochs)
@@ -39,6 +41,58 @@ def forward(self, X):
             current_input = Z
         return current_input
 
+    def fit_to_input(self, k, learning_rate=1.0, epochs=100000):
+        # This is not very flexible, as you would ideally
+        # like to be able to activate any node in any hidden
+        # layer, not just the last layer.
+        # Exercise for students: modify this function to be able
+        # to activate neurons in the middle layers.
+        X0 = init_weights((1, self.D))
+        X = theano.shared(X0, 'X_shared')
+        Y = self.forward(X)
+        t = np.zeros(self.hidden_layers[-1].M)
+        t[k] = 1
+
+        cost = -(t*T.log(Y[0]) + (1 - t)*(T.log(1 - Y[0]))).sum()
+        updates = [(X, X - learning_rate*T.grad(cost, X))]
+        train = theano.function(
+            inputs=[],
+            outputs=cost,
+            updates=updates,
+        )
+
+        costs = []
+        for i in xrange(epochs):
+            if i % 1000 == 0:
+                print "epoch:", i
+            the_cost = train()
+            costs.append(the_cost)
+        plt.plot(costs)
+        plt.show()
+
+        return X.eval()
+
+    def save(self, filename):
+        arrays = [p.eval() for p in layer.params for layer in self.hidden_layers]
+        np.savez(filename, *arrays)
+
+    @staticmethod
+    def load(filename, UnsupervisedModel=AutoEncoder):
+        dbn = DBN(0, UnsupervisedModel)
+        npz = np.load(filename)
+        dbn.hidden_layers = []
+        count = 0
+        for i in xrange(0, len(npz.files), 3):
+            W = npz[npz[i]]
+            bh = npz[npz[i+1]]
+            bo = npz[npz[i+2]]
+
+            ae = UnsupervisedModel.createFromArrays(W, bh, bo, count)
+            dbn.hidden_layers.append(ae)
+            count += 1
+        return dbn
+
+
 def main():
     Xtrain, Ytrain, Xtest, Ytest = getKaggleMNIST()
     dbn = DBN([1000, 750, 500], UnsupervisedModel=AutoEncoder)