forked from lazyprogrammer/machine_learning_examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbatch_donut.py
110 lines (85 loc) · 3.02 KB
/
batch_donut.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# revisiting the XOR and donut problems to show how features
# can be learned automatically using neural networks.
# since full training didn't work so well with the donut
# problem, let's try learning from randomly selected batches.
# we can consistently get up to the high 90s with this method.
#
# the notes for this class can be found at:
# https://www.udemy.com/data-science-deep-learning-in-python
import numpy as np
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
# for binary classification! no softmax here
def forward(X, W1, b1, W2, b2):
# assume we will use tanh() on hidden
# and softmax on output
Z = 1 / (1 + np.exp( -(X.dot(W1) + b1) ))
activation = Z.dot(W2) + b2
Y = 1 / (1 + np.exp(-activation))
return Y, Z
def predict(X, W1, b1, W2, b2):
Y, _ = forward(X, W1, b1, W2, b2)
return np.round(Y)
def derivative_w2(Z, T, Y):
# Z is (N, M)
return (T - Y).dot(Z)
def derivative_b2(T, Y):
return (T - Y).sum()
def derivative_w1(X, Z, T, Y, W2):
front = np.outer(T-Y, W2) * Z * (1 - Z)
return front.T.dot(X).T
def derivative_b1(Z, T, Y, W2):
front = np.outer(T-Y, W2) * Z * (1 - Z)
return front.sum(axis=0)
def cost(T, Y):
tot = 0
for n in xrange(len(T)):
if T[n] == 1:
tot += np.log(Y[n])
else:
tot += np.log(1 - Y[n])
return tot
def test_donut():
# donut example
N = 1000
R_inner = 5
R_outer = 10
# distance from origin is radius + random normal
# angle theta is uniformly distributed between (0, 2pi)
R1 = np.random.randn(N/2) + R_inner
theta = 2*np.pi*np.random.random(N/2)
X_inner = np.concatenate([[R1 * np.cos(theta)], [R1 * np.sin(theta)]]).T
R2 = np.random.randn(N/2) + R_outer
theta = 2*np.pi*np.random.random(N/2)
X_outer = np.concatenate([[R2 * np.cos(theta)], [R2 * np.sin(theta)]]).T
X = np.concatenate([ X_inner, X_outer ])
Y = np.array([0]*(N/2) + [1]*(N/2))
n_hidden = 8
W1 = np.random.randn(2, n_hidden)
b1 = np.random.randn(n_hidden)
W2 = np.random.randn(n_hidden)
b2 = np.random.randn(1)
LL = [] # keep track of likelihoods
learning_rate = 0.0001
regularization = 0.1
# batch version
batch_size = 100
for i in xrange(150000):
tmpX, tmpY = shuffle(X, Y)
tmpX = tmpX[:batch_size]
tmpY = tmpY[:batch_size]
pY, Z = forward(tmpX, W1, b1, W2, b2)
ll = cost(tmpY, pY)
prediction = predict(X, W1, b1, W2, b2)
er = np.abs(prediction - Y).mean()
LL.append(ll)
W2 += learning_rate * (derivative_w2(Z, tmpY, pY) - regularization * W2)
b2 += learning_rate * (derivative_b2(tmpY, pY) - regularization * b2)
W1 += learning_rate * (derivative_w1(tmpX, Z, tmpY, pY, W2) - regularization * W1)
b1 += learning_rate * (derivative_b1(Z, tmpY, pY, W2) - regularization * b1)
if i % 100 == 0:
print "ll:", ll, "classification rate:", 1 - er
plt.plot(LL)
plt.show()
if __name__ == '__main__':
test_donut()