Skip to content

Commit 97d0c02

Browse files
committed
setup backprop
1 parent 2ac9d17 commit 97d0c02

File tree

1 file changed

+118
-68
lines changed

1 file changed

+118
-68
lines changed

notebooks/Linear XOR.ipynb

Lines changed: 118 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -2,113 +2,163 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 2,
6-
"metadata": {},
7-
"outputs": [],
8-
"source": [
9-
"import numpy as np"
10-
]
11-
},
12-
{
13-
"cell_type": "code",
14-
"execution_count": 3,
5+
"execution_count": 1,
156
"metadata": {},
16-
"outputs": [],
7+
"outputs": [
8+
{
9+
"name": "stdout",
10+
"output_type": "stream",
11+
"text": [
12+
"bits [0 0] --> parity [0]\n",
13+
"bits [0 1] --> parity [1]\n",
14+
"bits [1 0] --> parity [1]\n",
15+
"bits [1 1] --> parity [0]\n"
16+
]
17+
}
18+
],
1719
"source": [
20+
"import numpy as np\n",
21+
"\n",
22+
"np.random.seed(0)\n",
23+
"\n",
1824
"# bits are our inputs\n",
1925
"X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])\n",
2026
"\n",
2127
"# parities are our labels\n",
22-
"Y = np.array([0, 1, 1, 0])"
28+
"Y = np.array([[0], [1], [1], [0]])\n",
29+
"\n",
30+
"for i, bits in enumerate(X):\n",
31+
" print(f'bits {bits} --> parity {Y[i]}')"
2332
]
2433
},
2534
{
2635
"cell_type": "code",
27-
"execution_count": 4,
36+
"execution_count": 2,
2837
"metadata": {},
2938
"outputs": [
3039
{
3140
"name": "stdout",
3241
"output_type": "stream",
3342
"text": [
34-
"bits [0 0] --> parity 0\n",
35-
"bits [0 1] --> parity 1\n",
36-
"bits [1 0] --> parity 1\n",
37-
"bits [1 1] --> parity 0\n"
43+
"delta sigmoid [[0.25 ]\n",
44+
" [0.19661193]\n",
45+
" [0.19661193]\n",
46+
" [0.25 ]]\n",
47+
"delta sigmoid analytical [[0.25000002]\n",
48+
" [0.19661195]\n",
49+
" [0.19661195]\n",
50+
" [0.25000002]]\n"
3851
]
3952
}
4053
],
4154
"source": [
42-
"for i, bits in enumerate(X):\n",
43-
" print(f'bits {bits} --> parity {Y[i]}')"
44-
]
45-
},
46-
{
47-
"cell_type": "code",
48-
"execution_count": 5,
49-
"metadata": {},
50-
"outputs": [],
51-
"source": [
52-
"def identity(x):\n",
53-
" return x\n",
54-
"\n",
5555
"def sigmoid(x):\n",
56-
" return 1 / (1 + np.exp(-x))"
56+
" return 1 / (1 + np.exp(-x))\n",
57+
"\n",
58+
"def delta_sigmoid(x):\n",
59+
" # to derive use the +1 trick from http://cs231n.github.io/optimization-2/\n",
60+
" return sigmoid(x) * (1 - sigmoid(x))\n",
61+
"\n",
62+
"def analytical_gradient(f, x):\n",
63+
" h = 1e-9\n",
64+
" return (f(x + h) - f(x)) / h\n",
65+
"\n",
66+
"print('delta sigmoid', delta_sigmoid(Y))\n",
67+
"print('delta sigmoid analytical', analytical_gradient(sigmoid, Y))"
5768
]
5869
},
5970
{
6071
"cell_type": "code",
61-
"execution_count": 8,
72+
"execution_count": 3,
6273
"metadata": {},
63-
"outputs": [],
74+
"outputs": [
75+
{
76+
"name": "stdout",
77+
"output_type": "stream",
78+
"text": [
79+
"loss 0.14451072667400197\n",
80+
"loss 0.007930633168167129\n",
81+
"loss 0.0031754754752917323\n",
82+
"loss 0.0021824385490060365\n"
83+
]
84+
}
85+
],
6486
"source": [
65-
"def build_layers(input_dim, hidden_units, activations):\n",
66-
" layers = [] \n",
67-
" \n",
68-
" for i, num_units in enumerate(hidden_units):\n",
69-
" layers.append({\n",
70-
" 'weights': np.random.uniform(size=(input_dim, num_units)),\n",
71-
" 'bias': np.zeros((1, num_units)),\n",
72-
" 'activation': activations[i],\n",
73-
" })\n",
74-
" \n",
75-
" # the next layers input_dim will be this layers num_units\n",
76-
" # [rows, this_num_units] X [this_num_units, next_num_units] -> [rows, next_num_units]\n",
77-
" input_dim = num_units\n",
87+
"# X [4,2]\n",
88+
"input_dim = X.shape[-1]\n",
89+
"# Y [4,1]\n",
90+
"output_dim = Y.shape[-1]\n",
91+
"hidden_units = 2\n",
92+
"lr = 0.1\n",
93+
"\n",
94+
"# [2,2]\n",
95+
"Whidden = np.random.uniform(size=(input_dim, hidden_units)) # hidden layer\n",
96+
"\n",
97+
"# [2,1]\n",
98+
"Woutput = np.random.uniform(size=(hidden_units, output_dim)) # output layer\n",
99+
"\n",
100+
"for step in range(10000):\n",
101+
" # forward pass\n",
102+
" # loss = loss(output(activation(hidden(X))))\n",
103+
"\n",
104+
" # hidden(X) [4,2]\n",
105+
" hidden = X.dot(Whidden)\n",
106+
" \n",
107+
" # activation(hidden) [4,2]\n",
108+
" activation = sigmoid(hidden)\n",
109+
"\n",
110+
" # output(activation) [4,2]x[2,1] -> [4,1]\n",
111+
" output = activation.dot(Woutput)\n",
112+
"\n",
113+
" # loss(output) [4,1]\n",
114+
" loss = 0.5 * (output - Y)**2\n",
115+
" if step % 2500 == 0:\n",
116+
" print('loss', np.mean(loss))\n",
117+
" \n",
118+
" # backward pass\n",
119+
" # loss'(output) [4,1]\n",
120+
" dloss_output = output - Y\n",
121+
" \n",
122+
" # loss'(activation) = loss'(output) * output'(activation)\n",
123+
" # [4,1]x[1,2] -> [4,2]\n",
124+
" dloss_activation = dloss_output.dot(Woutput.T)\n",
125+
"\n",
126+
" # loss'(hidden) = loss'(activation) * activation'(hidden)\n",
127+
" # [4,2]*[4,2] -> [4,2]\n",
128+
" dloss_hidden = dloss_activation * delta_sigmoid(hidden)\n",
129+
"\n",
130+
" # Take a small step in the opposite direction of the gradient \n",
78131
" \n",
79-
" return layers\n",
132+
" # loss'(Woutput) = loss'(output) * output'(Woutput)\n",
133+
" # [2,4]x[4,1] -> [2,1]\n",
134+
" dloss_woutput = activation.T.dot(dloss_output)\n",
135+
" Woutput -= dloss_woutput * lr\n",
80136
"\n",
81-
"def forward(x, layers):\n",
82-
" for layer in layers:\n",
83-
" x = x.dot(layer['weights']) + layer['bias']\n",
84-
" x = layer['activation'](x)\n",
85-
" return x"
137+
" # loss'(Whidden) = loss'(hidden) * hidden'(Whidden)\n",
138+
" # [2,4]x[4,2] -> [2,2]\n",
139+
" dloss_whidden = X.T.dot(dloss_hidden) \n",
140+
" Whidden -= dloss_whidden * lr"
86141
]
87142
},
88143
{
89144
"cell_type": "code",
90-
"execution_count": 9,
145+
"execution_count": 4,
91146
"metadata": {},
92147
"outputs": [
93148
{
94-
"data": {
95-
"text/plain": [
96-
"array([[0.51424762],\n",
97-
" [0.51508232],\n",
98-
" [0.51745592],\n",
99-
" [0.51824855]])"
100-
]
101-
},
102-
"execution_count": 9,
103-
"metadata": {},
104-
"output_type": "execute_result"
149+
"name": "stdout",
150+
"output_type": "stream",
151+
"text": [
152+
"prediction [-0.08500212] -> label [0]\n",
153+
"prediction [0.98169372] -> label [1]\n",
154+
"prediction [0.98169457] -> label [1]\n",
155+
"prediction [0.07744216] -> label [0]\n"
156+
]
105157
}
106158
],
107159
"source": [
108-
"layers = build_layers(X.shape[-1], hidden_units=[2, 1], activations=[sigmoid, sigmoid])\n",
109-
"yhat = forward(X, layers)\n",
110-
"\n",
111-
"yhat"
160+
"for i, prediction in enumerate(output):\n",
161+
" print(f'prediction {prediction} -> label {Y[i]}')"
112162
]
113163
},
114164
{

0 commit comments

Comments
 (0)