add lstm wiki + minor fixes

lazyprogrammer · lazyprogrammer · commit d064f6427376 · 2016-07-09T21:21:09.000-04:00
diff --git a/ann_class/batch_donut.py b/ann_class/batch_donut.py
@@ -81,7 +81,6 @@ def test_donut():
     LL = [] # keep track of likelihoods
     learning_rate = 0.0001
     regularization = 0.1
-    last_error_rate = None
 
 
     # batch version
diff --git a/hmm_class/edgar_allan_poe.txt b/hmm_class/edgar_allan_poe.txt
@@ -1,11 +1,11 @@
 LO! Death hath rear'd himself a throne
 In a strange city, all alone,
-Far down within the dim west —
+Far down within the dim west
 Where the good, and the bad, and the worst, and the best,
 Have gone to their eternal rest.
  
 There shrines, and palaces, and towers
-Are — not like any thing of ours —
+Are not like any thing of ours
 Oh no! O no! ours never loom
 To heaven with that ungodly gloom!
 Time-eaten towers that tremble not!
@@ -17,23 +17,23 @@ The melancholy waters lie.
 No holy rays from heaven come down
 On the long night-time of that town,
 But light from out the lurid sea
-Streams up the turrets silently —
-Up thrones — up long-forgotten bowers
-Of scultur'd ivy and stone flowers —
-Up domes — up spires — up kingly halls —
-Up fanes — up Babylon-like walls —
+Streams up the turrets silently
+Up thrones up long-forgotten bowers
+Of scultur'd ivy and stone flowers
+Up domes up spires up kingly halls
+Up fanes up Babylon-like walls
 Up many a melancholy shrine
 Whose entablatures intertwine
-The mask — the viol — and the vine.
+The mask the viol and the vine.
  
-There open temples — open graves
-Are on a level with the waves —
+There open temples open graves
+Are on a level with the waves
 But not the riches there that lie
 In each idol's diamond eye,
 Not the gaily-jewell'd dead
 Tempt the waters from their bed:
 For no ripples curl, alas!
-Along that wilderness of glass —
+Along that wilderness of glass
 No swellings hint that winds may be
 Upon a far-off happier sea:
 So blend the turrets and shadows there
@@ -42,13 +42,13 @@ While from the high towers of the town
 Death looks gigantically down.
  
 But lo! a stir is in the air!
-The wave — there is a ripple there!
+The wave there is a ripple there!
 As if the towers had thrown aside,
-In slightly sinking, the dull tide —
+In slightly sinking, the dull tide
 As if the turret-tops had given
 A vacuum in the filmy heaven.
-The waves have now a redder glow —
-The very hours are breathing low —
+The waves have now a redder glow
+The very hours are breathing low
 And when, amid no earthly moans,
 Down, down, that town shall settle hence,
 All Hades, from a thousand thrones,
@@ -209,7 +209,7 @@ And love- a simple duty.
 
 When from your gems of thought I turn 
 To those pure orbs, your heart to learn, 
-I scarce know which to prize most high — 
+I scarce know which to prize most high 
 The bright i-dea, or the bright dear-eye.
 
 Of all who hail thy presence as the morning-
@@ -290,23 +290,23 @@ Darkly my Present and my Past,
 Let my Future radiant shine
 With sweet hopes of thee and thine!
 
-The bells! — ah, the bells! 
+The bells! ah, the bells! 
 The little silver bells! 
 How fairy-like a melody there floats 
-From their throats. — 
-From their merry little throats — 
+From their throats. 
+From their merry little throats 
 From the silver, tinkling throats 
-Of the bells, bells, bells — 
+Of the bells, bells, bells 
 Of the bells! 
 
-The bells! — ah, the bells! 
+The bells! ah, the bells! 
 The heavy iron bells! 
 How horrible a monody there floats 
-From their throats — 
-From their deep-toned throats — 
+From their throats 
+From their deep-toned throats 
 From their melancholy throats! 
 How I shudder at the notes 
-Of the bells, bells, bells — 
+Of the bells, bells, bells 
 Of the bells!
 
 How often we forget all time, when lone
@@ -554,7 +554,7 @@ The Elfin from the green grass, and from me
 The summer dream beneath the tamarind tree?
 
 I'll tell you a plan for gaining wealth,
-Better than banking, trade or leases — 
+Better than banking, trade or leases 
 Take a bank note and fold it up, 
 And then you will find your money in creases! 
 This wonderful plan, without danger or loss, 
diff --git a/linear_regression_class/lr_1d.py b/linear_regression_class/lr_1d.py
@@ -45,4 +45,4 @@
 d1 = Y - Yhat
 d2 = Y - Y.mean()
 r2 = 1 - d1.dot(d1) / d2.dot(d2)
-print "the r-squared is:", r2
+print("the r-squared is:", r2)
diff --git a/linear_regression_class/lr_poly.py b/linear_regression_class/lr_poly.py
@@ -49,4 +49,4 @@
 d1 = Y - Yhat
 d2 = Y - Y.mean()
 r2 = 1 - d1.dot(d1) / d2.dot(d2)
-print "the r-squared is:", r2
+print("the r-squared is:", r2)
diff --git a/nlp_class/lsa.py b/nlp_class/lsa.py
@@ -71,10 +71,14 @@ def tokens_to_vector(tokens):
     X[:,i] = tokens_to_vector(tokens)
     i += 1
 
-svd = TruncatedSVD()
-Z = svd.fit_transform(X)
-plt.scatter(Z[:,0], Z[:,1])
-for i in xrange(D):
-    plt.annotate(s=index_word_map[i], xy=(Z[i,0], Z[i,1]))
-plt.show()
+def main():
+    svd = TruncatedSVD()
+    Z = svd.fit_transform(X)
+    plt.scatter(Z[:,0], Z[:,1])
+    for i in xrange(D):
+        plt.annotate(s=index_word_map[i], xy=(Z[i,0], Z[i,1]))
+    plt.show()
+
+if __name__ == '__main__':
+    main()
 
diff --git a/rnn_class/gru_wiki.py b/rnn_class/gru_wiki.py
@@ -1,9 +1,11 @@
+import sys
 import theano
 import theano.tensor as T
 import numpy as np
 import matplotlib.pyplot as plt
 import json
 
+from datetime import datetime
 from sklearn.utils import shuffle
 from util import init_weight, get_wikipedia_data
 
@@ -63,7 +65,7 @@ def __init__(self, D, hidden_layer_sizes, V):
         self.D = D
         self.V = V
 
-    def fit(self, X, learning_rate=10e-5, mu=0.99, epochs=10, show_fig=True, activation=T.nnet.relu, RecurrentUnit=GRU):
+    def fit(self, X, learning_rate=10e-5, mu=0.99, epochs=10, show_fig=True, activation=T.nnet.relu, RecurrentUnit=GRU, normalize=True):
         D = self.D
         V = self.V
         N = len(X)
@@ -110,7 +112,8 @@ def fit(self, X, learning_rate=10e-5, mu=0.99, epochs=10, show_fig=True, activat
         gWe = T.grad(cost, self.We)
         dWe_update = mu*dWe - learning_rate*gWe
         We_update = self.We + dWe_update
-        We_update /= We_update.sum(axis=1).dimshuffle(0, 'x')
+        if normalize:
+            We_update /= We_update.sum(axis=1).dimshuffle(0, 'x')
 
         updates = [
             (p, p + mu*dp - learning_rate*g) for p, dp, g in zip(self.params, dparams, grads)
@@ -128,6 +131,7 @@ def fit(self, X, learning_rate=10e-5, mu=0.99, epochs=10, show_fig=True, activat
 
         costs = []
         for i in xrange(epochs):
+            t0 = datetime.now()
             X = shuffle(X)
             n_correct = 0
             n_total = 0
@@ -159,33 +163,34 @@ def fit(self, X, learning_rate=10e-5, mu=0.99, epochs=10, show_fig=True, activat
                     if pj == xj:
                         n_correct += 1
                 if j % 200 == 0:
-                    print "j:", j, "correct rate so far:", (float(n_correct)/n_total)
-            print "i:", i, "cost:", cost, "correct rate:", (float(n_correct)/n_total)
+                    sys.stdout.write("j/N: %d/%d correct rate so far: %f\r" % (j, N, float(n_correct)/n_total))
+                    sys.stdout.flush()
+            print "i:", i, "cost:", cost, "correct rate:", (float(n_correct)/n_total), "time for epoch:", (datetime.now() - t0)
             costs.append(cost)
 
         if show_fig:
             plt.plot(costs)
             plt.show()
 
 
-def train_wikipedia():
+def train_wikipedia(we_file='word_embeddings.npy', w2i_file='wikipedia_word2idx.json'):
     # there are 32 files
     sentences, word2idx = get_wikipedia_data(n_files=32, n_vocab=2000)
     print "finished retrieving data"
     print "vocab size:", len(word2idx), "number of sentences:", len(sentences)
-    rnn = RNN(20, [20], len(word2idx))
-    rnn.fit(sentences, learning_rate=10e-5, epochs=10, show_fig=True, activation=T.nnet.relu)
+    rnn = RNN(30, [30], len(word2idx))
+    rnn.fit(sentences, learning_rate=10e-6, epochs=10, show_fig=True, activation=T.nnet.relu)
 
-    np.save('word_embeddings.npy', rnn.We.get_value())
-    with open('wikipedia_word2idx.json', 'w') as f:
+    np.save(we_file, rnn.We.get_value())
+    with open(w2i_file, 'w') as f:
         json.dump(word2idx, f)
 
 def generate_wikipedia():
     pass
 
-def find_analogies(w1, w2, w3):
-    We = np.load('word_embeddings.npy')
-    with open('wikipedia_word2idx.json') as f:
+def find_analogies(w1, w2, w3, we_file='word_embeddings.npy', w2i_file='wikipedia_word2idx.json'):
+    We = np.load(we_file)
+    with open(w2i_file) as f:
         word2idx = json.load(f)
 
     king = We[word2idx[w1]]
diff --git a/rnn_class/lstm_wiki.py b/rnn_class/lstm_wiki.py
@@ -0,0 +1,112 @@
+import json
+import numpy as np
+import theano
+import theano.tensor as T
+
+from util import init_weight, get_wikipedia_data
+from gru_wiki import RNN, find_analogies
+
+class LSTM:
+    def __init__(self, Mi, Mo, activation):
+        self.Mi = Mi
+        self.Mo = Mo
+        self.f  = activation
+
+        # numpy init
+        Wxi = init_weight(Mi, Mo)
+        Whi = init_weight(Mo, Mo)
+        Wci = init_weight(Mo, Mo)
+        bi  = np.zeros(Mo)
+        Wxf = init_weight(Mi, Mo)
+        Whf = init_weight(Mo, Mo)
+        Wcf = init_weight(Mo, Mo)
+        bf  = np.zeros(Mo)
+        Wxc = init_weight(Mi, Mo)
+        Whc = init_weight(Mo, Mo)
+        bc  = np.zeros(Mo)
+        Wxo = init_weight(Mi, Mo)
+        Who = init_weight(Mo, Mo)
+        Wco = init_weight(Mo, Mo)
+        bo  = np.zeros(Mo)
+        c0  = np.zeros(Mo)
+        h0  = np.zeros(Mo)
+
+        # theano vars
+        self.Wxi = theano.shared(Wxi)
+        self.Whi = theano.shared(Whi)
+        self.Wci = theano.shared(Wci)
+        self.bi  = theano.shared(bi)
+        self.Wxf = theano.shared(Wxf)
+        self.Whf = theano.shared(Whf)
+        self.Wcf = theano.shared(Wcf)
+        self.bf  = theano.shared(bf)
+        self.Wxc = theano.shared(Wxc)
+        self.Whc = theano.shared(Whc)
+        self.bc  = theano.shared(bc)
+        self.Wxo = theano.shared(Wxo)
+        self.Who = theano.shared(Who)
+        self.Wco = theano.shared(Wco)
+        self.bo  = theano.shared(bo)
+        self.c0  = theano.shared(c0)
+        self.h0  = theano.shared(h0)
+        self.params = [
+            self.Wxi,
+            self.Whi,
+            self.Wci,
+            self.bi,
+            self.Wxf,
+            self.Whf,
+            self.Wcf,
+            self.bf,
+            self.Wxc,
+            self.Whc,
+            self.bc,
+            self.Wxo,
+            self.Who,
+            self.Wco,
+            self.bo,
+            self.c0,
+            self.h0,
+        ]
+
+    def recurrence(self, x_t, h_t1, c_t1):
+        i_t = T.nnet.sigmoid(x_t.dot(self.Wxi) + h_t1.dot(self.Whi) + c_t1.dot(self.Wci) + self.bi)
+        f_t = T.nnet.sigmoid(x_t.dot(self.Wxf) + h_t1.dot(self.Whf) + c_t1.dot(self.Wcf) + self.bf)
+        c_t = f_t * c_t1 + i_t * T.tanh(x_t.dot(self.Wxc) + h_t1.dot(self.Whc) + self.bc)
+        o_t = T.nnet.sigmoid(x_t.dot(self.Wxo) + h_t1.dot(self.Who) + c_t.dot(self.Wco) + self.bo)
+        h_t = o_t * T.tanh(c_t)
+        return h_t, c_t
+
+    def output(self, x):
+        # input X should be a matrix (2-D)
+        # rows index time
+        [h, c], _ = theano.scan(
+            fn=self.recurrence,
+            sequences=x,
+            outputs_info=[self.h0, self.c0],
+            n_steps=x.shape[0],
+        )
+        return h
+
+
+def train_wikipedia(we_file='lstm_word_embeddings.npy', w2i_file='lstm_wikipedia_word2idx.json'):
+    # there are 32 files
+    sentences, word2idx = get_wikipedia_data(n_files=100, n_vocab=2000)
+    print "finished retrieving data"
+    print "vocab size:", len(word2idx), "number of sentences:", len(sentences)
+    rnn = RNN(50, [50], len(word2idx))
+    # todo: next try increas LR
+    rnn.fit(sentences, learning_rate=10e-6, epochs=10, show_fig=True, activation=T.nnet.relu, RecurrentUnit=LSTM, normalize=False)
+
+    np.save(we_file, rnn.We.get_value())
+    with open(w2i_file, 'w') as f:
+        json.dump(word2idx, f)
+
+
+if __name__ == '__main__':
+    train_wikipedia()
+    find_analogies('king', 'man', 'woman', 'lstm_word_embeddings.npy', 'lstm_wikipedia_word2idx.json')
+    find_analogies('france', 'paris', 'london', 'lstm_word_embeddings.npy', 'lstm_wikipedia_word2idx.json')
+    find_analogies('france', 'paris', 'rome', 'lstm_word_embeddings.npy', 'lstm_wikipedia_word2idx.json')
+    find_analogies('paris', 'france', 'italy', 'lstm_word_embeddings.npy', 'lstm_wikipedia_word2idx.json')
+
diff --git a/rnn_class/util.py b/rnn_class/util.py