Skip to content

Commit ddd5e6f

Browse files
some wip files
1 parent dfdaeb1 commit ddd5e6f

File tree

8 files changed

+1168
-0
lines changed

8 files changed

+1168
-0
lines changed

rnn_class/gru_wiki.py

Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
import theano
2+
import theano.tensor as T
3+
import numpy as np
4+
import matplotlib.pyplot as plt
5+
import json
6+
7+
from sklearn.utils import shuffle
8+
from util import init_weight, get_wikipedia_data
9+
10+
11+
class GRU:
12+
def __init__(self, Mi, Mo, activation):
13+
self.Mi = Mi
14+
self.Mo = Mo
15+
self.f = activation
16+
17+
# numpy init
18+
Wxr = init_weight(Mi, Mo)
19+
Whr = init_weight(Mo, Mo)
20+
br = np.zeros(Mo)
21+
Wxz = init_weight(Mi, Mo)
22+
Whz = init_weight(Mo, Mo)
23+
bz = np.zeros(Mo)
24+
Wxh = init_weight(Mi, Mo)
25+
Whh = init_weight(Mo, Mo)
26+
bh = np.zeros(Mo)
27+
h0 = np.zeros(Mo)
28+
29+
# theano vars
30+
self.Wxr = theano.shared(Wxr)
31+
self.Whr = theano.shared(Whr)
32+
self.br = theano.shared(br)
33+
self.Wxz = theano.shared(Wxz)
34+
self.Whz = theano.shared(Whz)
35+
self.bz = theano.shared(bz)
36+
self.Wxh = theano.shared(Wxh)
37+
self.Whh = theano.shared(Whh)
38+
self.bh = theano.shared(bh)
39+
self.h0 = theano.shared(h0)
40+
self.params = [self.Wxr, self.Whr, self.br, self.Wxz, self.Whz, self.bz, self.Wxh, self.Whh, self.bh, self.h0]
41+
42+
def recurrence(self, x_t, h_t1):
43+
r = T.nnet.sigmoid(x_t.dot(self.Wxr) + h_t1.dot(self.Whr) + self.br)
44+
z = T.nnet.sigmoid(x_t.dot(self.Wxz) + h_t1.dot(self.Whz) + self.bz)
45+
hhat = self.f(x_t.dot(self.Wxh) + (r * h_t1).dot(self.Whh) + self.bh)
46+
h = (1 - z) * h_t1 + z * hhat
47+
return h
48+
49+
def output(self, x):
50+
# input X should be a matrix (2-D)
51+
# rows index time
52+
h, _ = theano.scan(
53+
fn=self.recurrence,
54+
sequences=x,
55+
outputs_info=[self.h0],
56+
n_steps=x.shape[0],
57+
)
58+
return h
59+
60+
class RNN:
61+
def __init__(self, D, hidden_layer_sizes, V):
62+
self.hidden_layer_sizes = hidden_layer_sizes
63+
self.D = D
64+
self.V = V
65+
66+
def fit(self, X, learning_rate=10e-5, mu=0.99, epochs=10, show_fig=True, activation=T.nnet.relu, RecurrentUnit=GRU):
67+
D = self.D
68+
V = self.V
69+
N = len(X)
70+
71+
We = init_weight(V, D)
72+
self.hidden_layers = []
73+
Mi = D
74+
for Mo in self.hidden_layer_sizes:
75+
ru = RecurrentUnit(Mi, Mo, activation)
76+
self.hidden_layers.append(ru)
77+
Mi = Mo
78+
79+
Wo = init_weight(Mi, V)
80+
bo = np.zeros(V)
81+
82+
self.We = theano.shared(We)
83+
self.Wo = theano.shared(Wo)
84+
self.bo = theano.shared(bo)
85+
self.params = [self.Wo, self.bo]
86+
for ru in self.hidden_layers:
87+
self.params += ru.params
88+
89+
thX = T.ivector('X')
90+
thY = T.ivector('Y')
91+
92+
Z = self.We[thX]
93+
for ru in self.hidden_layers:
94+
Z = ru.output(Z)
95+
py_x = T.nnet.softmax(Z.dot(self.Wo) + self.bo)
96+
97+
prediction = T.argmax(py_x, axis=1)
98+
# let's return py_x too so we can draw a sample instead
99+
self.predict_op = theano.function(
100+
inputs=[thX],
101+
outputs=[py_x, prediction],
102+
allow_input_downcast=True,
103+
)
104+
105+
cost = -T.mean(T.log(py_x[T.arange(thY.shape[0]), thY]))
106+
grads = T.grad(cost, self.params)
107+
dparams = [theano.shared(p.get_value()*0) for p in self.params]
108+
109+
dWe = theano.shared(self.We.get_value()*0)
110+
gWe = T.grad(cost, self.We)
111+
dWe_update = mu*dWe - learning_rate*gWe
112+
We_update = self.We + dWe_update
113+
We_update /= We_update.sum(axis=1).dimshuffle(0, 'x')
114+
115+
updates = [
116+
(p, p + mu*dp - learning_rate*g) for p, dp, g in zip(self.params, dparams, grads)
117+
] + [
118+
(dp, mu*dp - learning_rate*g) for dp, g in zip(dparams, grads)
119+
] + [
120+
(self.We, We_update), (dWe, dWe_update)
121+
]
122+
123+
self.train_op = theano.function(
124+
inputs=[thX, thY],
125+
outputs=[cost, prediction],
126+
updates=updates
127+
)
128+
129+
costs = []
130+
for i in xrange(epochs):
131+
X = shuffle(X)
132+
n_correct = 0
133+
n_total = 0
134+
cost = 0
135+
for j in xrange(N):
136+
if np.random.random() < 0.01 or len(X[j]) <= 1:
137+
input_sequence = [0] + X[j]
138+
output_sequence = X[j] + [1]
139+
else:
140+
input_sequence = [0] + X[j][:-1]
141+
output_sequence = X[j]
142+
n_total += len(output_sequence)
143+
144+
# test:
145+
146+
try:
147+
# we set 0 to start and 1 to end
148+
c, p = self.train_op(input_sequence, output_sequence)
149+
except Exception as e:
150+
PYX, pred = self.predict_op(input_sequence)
151+
print "input_sequence len:", len(input_sequence)
152+
print "PYX.shape:",PYX.shape
153+
print "pred.shape:", pred.shape
154+
raise e
155+
# print "p:", p
156+
cost += c
157+
# print "j:", j, "c:", c/len(X[j]+1)
158+
for pj, xj in zip(p, output_sequence):
159+
if pj == xj:
160+
n_correct += 1
161+
if j % 200 == 0:
162+
print "j:", j, "correct rate so far:", (float(n_correct)/n_total)
163+
print "i:", i, "cost:", cost, "correct rate:", (float(n_correct)/n_total)
164+
costs.append(cost)
165+
166+
if show_fig:
167+
plt.plot(costs)
168+
plt.show()
169+
170+
171+
def train_wikipedia():
172+
# there are 32 files
173+
sentences, word2idx = get_wikipedia_data(n_files=32, n_vocab=2000)
174+
print "finished retrieving data"
175+
print "vocab size:", len(word2idx), "number of sentences:", len(sentences)
176+
rnn = RNN(20, [20], len(word2idx))
177+
rnn.fit(sentences, learning_rate=10e-5, epochs=10, show_fig=True, activation=T.nnet.relu)
178+
179+
np.save('word_embeddings.npy', rnn.We.get_value())
180+
with open('wikipedia_word2idx.json', 'w') as f:
181+
json.dump(word2idx, f)
182+
183+
def generate_wikipedia():
184+
pass
185+
186+
def find_analogies(w1, w2, w3):
187+
We = np.load('word_embeddings.npy')
188+
with open('wikipedia_word2idx.json') as f:
189+
word2idx = json.load(f)
190+
191+
king = We[word2idx[w1]]
192+
man = We[word2idx[w2]]
193+
woman = We[word2idx[w3]]
194+
v0 = king - man + woman
195+
196+
def dist1(a, b):
197+
return np.linalg.norm(a - b)
198+
def dist2(a, b):
199+
return 1 - a.dot(b) / (np.linalg.norm(a) * np.linalg.norm(b))
200+
201+
for dist, name in [(dist1, 'Euclidean'), (dist2, 'cosine')]:
202+
min_dist = float('inf')
203+
best_word = '';
204+
for word, idx in word2idx.iteritems():
205+
if word not in (w1, w2, w3):
206+
v1 = We[idx]
207+
d = dist(v0, v1)
208+
if d < min_dist:
209+
min_dist = d
210+
best_word = word
211+
print "closest match by", name, "distance:", best_word
212+
print w1, "-", w2, "=", best_word, "-", w3
213+
214+
if __name__ == '__main__':
215+
train_wikipedia()
216+
find_analogies('king', 'man', 'woman')
217+
find_analogies('france', 'paris', 'london')
218+
find_analogies('france', 'paris', 'rome')
219+
find_analogies('paris', 'france', 'italy')
220+
221+
222+
223+

rnn_class/mlp_parity.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import numpy as np
2+
import theano
3+
import theano.tensor as T
4+
import matplotlib.pyplot as plt
5+
6+
from util import init_weight, all_parity_pairs
7+
from sklearn.utils import shuffle
8+
9+
class HiddenLayer:
10+
def __init__(self, M1, M2, an_id):
11+
self.id = an_id
12+
self.M1 = M1
13+
self.M2 = M2
14+
W = init_weight(M1, M2)
15+
b = np.zeros(M2)
16+
self.W = theano.shared(W, 'W_%s' % self.id)
17+
self.b = theano.shared(b, 'b_%s' % self.id)
18+
self.params = [self.W, self.b]
19+
20+
def forward(self, X):
21+
return T.nnet.relu(X.dot(self.W) + self.b)
22+
23+
class ANN:
24+
def __init__(self, hidden_layer_sizes):
25+
self.hidden_layer_sizes = hidden_layer_sizes
26+
27+
def fit(self, X, Y, learning_rate=10e-3, mu=0.99, reg=10e-12, eps=10e-10, epochs=400, batch_sz=20, print_period=1, show_fig=False):
28+
Y = Y.astype(np.int32)
29+
30+
N, D = X.shape
31+
K = len(set(Y))
32+
self.hidden_layers = []
33+
M1 = D
34+
count = 0
35+
for M2 in self.hidden_layer_sizes:
36+
h = HiddenLayer(M1, M2, count)
37+
self.hidden_layers.append(h)
38+
M1 = M2
39+
count += 1
40+
W = init_weight(M1, K)
41+
b = np.zeros(K)
42+
self.W = theano.shared(W, 'W_logreg')
43+
self.b = theano.shared(b, 'b_logreg')
44+
45+
self.params = [self.W, self.b]
46+
for h in self.hidden_layers:
47+
self.params += h.params
48+
49+
dparams = [theano.shared(np.zeros(p.get_value().shape)) for p in self.params]
50+
51+
thX = T.matrix('X')
52+
thY = T.ivector('Y')
53+
pY = self.forward(thX)
54+
55+
rcost = reg*T.sum([(p*p).sum() for p in self.params])
56+
cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
57+
prediction = self.predict(thX)
58+
grads = T.grad(cost, self.params)
59+
60+
updates = [
61+
(p, p + mu*dp - learning_rate*g) for p, dp, g in zip(self.params, dparams, grads)
62+
] + [
63+
(dp, mu*dp - learning_rate*g) for dp, g in zip(dparams, grads)
64+
]
65+
66+
train_op = theano.function(
67+
inputs=[thX, thY],
68+
outputs=[cost, prediction],
69+
updates=updates,
70+
)
71+
72+
n_batches = N /batch_sz
73+
costs = []
74+
for i in xrange(epochs):
75+
X, Y = shuffle(X, Y)
76+
for j in xrange(n_batches):
77+
Xbatch = X[j*batch_sz:(j*batch_sz + batch_sz)]
78+
Ybatch = Y[j*batch_sz:(j*batch_sz + batch_sz)]
79+
80+
c, p = train_op(Xbatch, Ybatch)
81+
82+
if j % print_period == 0:
83+
costs.append(c)
84+
e = np.mean(Ybatch != p)
85+
print "i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e
86+
87+
if show_fig:
88+
plt.plot(costs)
89+
plt.show()
90+
91+
def forward(self, X):
92+
Z = X
93+
for h in self.hidden_layers:
94+
Z = h.forward(Z)
95+
return T.nnet.softmax(Z.dot(self.W) + self.b)
96+
97+
def predict(self, X):
98+
pY = self.forward(X)
99+
return T.argmax(pY, axis=1)
100+
101+
def wide():
102+
X, Y = all_parity_pairs(12)
103+
model = ANN([2048])
104+
model.fit(X, Y, learning_rate=10e-5, print_period=10, epochs=300, show_fig=True)
105+
106+
def deep():
107+
X, Y = all_parity_pairs(12)
108+
model = ANN([1024]*2)
109+
model.fit(X, Y, learning_rate=10e-4, print_period=10, epochs=100, show_fig=True)
110+
111+
if __name__ == '__main__':
112+
wide()
113+
# deep()
114+
115+
116+
117+

0 commit comments

Comments
 (0)