@@ -28,27 +28,31 @@ def cost(theta, visible_size, hidden_size,
28
28
"""
29
29
hv = hidden_size * visible_size
30
30
31
- W1 = theta [1 :hv ].reshape (hidden_size , visible_size )
32
- W2 = theta [hv + 1 :2 * hv ].reshape (visible_size , hidden_size )
33
- b1 = theta [2 * hv + 1 :2 * hv + hidden_size ]
34
- b2 = theta [2 * hv + hidden_size + 1 :]
31
+ W1 = theta [:hv ].reshape (hidden_size , visible_size )
32
+ W2 = theta [hv :2 * hv ].reshape (visible_size , hidden_size )
33
+ b1 = theta [2 * hv :2 * hv + hidden_size ]
34
+ b2 = theta [2 * hv + hidden_size :]
35
35
36
36
# Cost and gradient variables (your code needs to compute these values).
37
37
38
38
# Here, we initialize them to zeros.
39
- cost = 0
40
-
41
39
W1grad = np .zeros (W1 .shape )
42
40
W2grad = np .zeros (W2 .shape )
43
41
b1grad = np .zeros (b1 .shape )
44
42
b2grad = np .zeros (b2 .shape )
45
43
44
+ def T (a ):
45
+ """Given 1-d array. Make it a column vector.
46
+ Returns 2d array with Nx1 size.
47
+ """
48
+ return a .reshape (len (a ), 1 )
49
+
46
50
num_data = data .shape [1 ]
47
51
# do a feed forward pass
48
52
# a2: (hidden_size, num_data)
49
- a2 = sigmoid (np .dot (W1 , data ) + b1 . T )
53
+ a2 = sigmoid (np .dot (W1 , data ) + T ( b1 ) )
50
54
# a2: (visible_size, num_data)
51
- a3 = sigmoid (np .dot (W2 , a2 ) + b2 . T )
55
+ a3 = sigmoid (np .dot (W2 , a2 ) + T ( b2 ) )
52
56
assert a2 .shape == (hidden_size , num_data )
53
57
assert a3 .shape == (visible_size , num_data )
54
58
@@ -60,20 +64,22 @@ def cost(theta, visible_size, hidden_size,
60
64
# delta2: (hidden, num_data)
61
65
delta2 = np .dot (W2 .T , delta3 ) * (a2 * (1 - a2 ))
62
66
63
- W1grad = np .dot (delta2 , data .T )
64
- W2grad = np .dot (delta3 , a2 .T )
65
- b1grad = delta2
66
- b2grad = delta3
67
+ W1grad [:,:] = np .dot (delta2 , data .T ) / float ( num_data )
68
+ W2grad [:,:] = np .dot (delta3 , a2 .T ) / float ( num_data )
69
+ b1grad [:] = np . sum ( delta2 , axis = 1 ) / float ( num_data )
70
+ b2grad [:] = np . sum ( delta3 , axis = 1 ) / float ( num_data )
67
71
68
72
grad = flatten_params (W1grad , W2grad , b1grad , b2grad )
69
73
return cost , grad
70
74
71
- def initialize_parameters (hidden_size , visible_size ):
75
+ def initialize_params (hidden_size , visible_size ):
72
76
"""Accepts number of hidde states in sparse encoder,
73
77
and number of input states in sparse encoder..
74
78
Initialize parameters randomly based on layer sizes.
75
79
Returns a new flat array of size 2*visisble_size + hidden_size
76
80
"""
81
+ assert hidden_size < visible_size
82
+
77
83
#we'll choose weights uniformly from the interval [-r, r]
78
84
r = np .sqrt (6 ) / np .sqrt (hidden_size + visible_size + 1 )
79
85
W1 = np .random .rand (hidden_size , visible_size ) * 2 * r - r
0 commit comments