14
14
from util import relu , error_rate , getKaggleMNIST , init_weights
15
15
16
16
17
+ def T_shared_zeros_like32 (p ):
18
+ # p is a Theano shared itself
19
+ return theano .shared (np .zeros_like (p .get_value (), dtype = np .float32 ))
20
+
21
+ def momentum_updates (cost , params , mu , learning_rate ):
22
+ # momentum changes
23
+ dparams = [T_shared_zeros_like32 (p ) for p in params ]
24
+
25
+ updates = []
26
+ grads = T .grad (cost , params )
27
+ for p , dp , g in zip (params , dparams , grads ):
28
+ dp_update = mu * dp - learning_rate * g
29
+ p_update = p + dp_update
30
+
31
+ updates .append ((dp , dp_update ))
32
+ updates .append ((p , p_update ))
33
+ return updates
34
+
35
+
17
36
class AutoEncoder (object ):
18
37
def __init__ (self , M , an_id ):
19
38
self .M = M
20
39
self .id = an_id
21
40
22
41
def fit (self , X , learning_rate = 0.5 , mu = 0.99 , epochs = 1 , batch_sz = 100 , show_fig = False ):
42
+ # cast to float
43
+ mu = np .float32 (mu )
44
+ learning_rate = np .float32 (learning_rate )
45
+
23
46
N , D = X .shape
24
47
n_batches = N // batch_sz
25
48
26
49
W0 = init_weights ((D , self .M ))
27
50
self .W = theano .shared (W0 , 'W_%s' % self .id )
28
- self .bh = theano .shared (np .zeros (self .M ), 'bh_%s' % self .id )
29
- self .bo = theano .shared (np .zeros (D ), 'bo_%s' % self .id )
51
+ self .bh = theano .shared (np .zeros (self .M , dtype = np . float32 ), 'bh_%s' % self .id )
52
+ self .bo = theano .shared (np .zeros (D , dtype = np . float32 ), 'bo_%s' % self .id )
30
53
self .params = [self .W , self .bh , self .bo ]
31
54
self .forward_params = [self .W , self .bh ]
32
55
@@ -61,18 +84,17 @@ def fit(self, X, learning_rate=0.5, mu=0.99, epochs=1, batch_sz=100, show_fig=Fa
61
84
outputs = cost ,
62
85
)
63
86
64
- updates = [
65
- (p , p + mu * dp - learning_rate * T .grad (cost , p )) for p , dp in zip (self .params , self .dparams )
66
- ] + [
67
- (dp , mu * dp - learning_rate * T .grad (cost , p )) for p , dp in zip (self .params , self .dparams )
68
- ]
87
+
88
+
89
+ updates = momentum_updates (cost , self .params , mu , learning_rate )
69
90
train_op = theano .function (
70
91
inputs = [X_in ],
71
92
updates = updates ,
72
93
)
73
94
74
95
costs = []
75
96
print ("training autoencoder: %s" % self .id )
97
+ print ("epochs to do:" , epochs )
76
98
for i in range (epochs ):
77
99
print ("epoch:" , i )
78
100
X = shuffle (X )
@@ -117,9 +139,22 @@ def __init__(self, hidden_layer_sizes, UnsupervisedModel=AutoEncoder):
117
139
count += 1
118
140
119
141
120
- def fit (self , X , Y , Xtest , Ytest , pretrain = True , learning_rate = 0.01 , mu = 0.99 , reg = 0.1 , epochs = 1 , batch_sz = 100 ):
142
+ def fit (self , X , Y , Xtest , Ytest ,
143
+ pretrain = True ,
144
+ train_head_only = False ,
145
+ learning_rate = 0.1 ,
146
+ mu = 0.99 ,
147
+ reg = 0.0 ,
148
+ epochs = 1 ,
149
+ batch_sz = 100 ):
150
+
151
+ # cast to float32
152
+ learning_rate = np .float32 (learning_rate )
153
+ mu = np .float32 (mu )
154
+ reg = np .float32 (reg )
155
+
121
156
# greedy layer-wise training of autoencoders
122
- pretrain_epochs = 1
157
+ pretrain_epochs = 2
123
158
if not pretrain :
124
159
pretrain_epochs = 0
125
160
@@ -135,38 +170,27 @@ def fit(self, X, Y, Xtest, Ytest, pretrain=True, learning_rate=0.01, mu=0.99, re
135
170
K = len (set (Y ))
136
171
W0 = init_weights ((self .hidden_layers [- 1 ].M , K ))
137
172
self .W = theano .shared (W0 , "W_logreg" )
138
- self .b = theano .shared (np .zeros (K ), "b_logreg" )
173
+ self .b = theano .shared (np .zeros (K , dtype = np . float32 ), "b_logreg" )
139
174
140
175
self .params = [self .W , self .b ]
141
- for ae in self .hidden_layers :
142
- self .params += ae .forward_params
143
-
144
- # for momentum
145
- self .dW = theano .shared (np .zeros (W0 .shape ), "dW_logreg" )
146
- self .db = theano .shared (np .zeros (K ), "db_logreg" )
147
- self .dparams = [self .dW , self .db ]
148
- for ae in self .hidden_layers :
149
- self .dparams += ae .forward_dparams
176
+ if not train_head_only :
177
+ for ae in self .hidden_layers :
178
+ self .params += ae .forward_params
150
179
151
180
X_in = T .matrix ('X_in' )
152
181
targets = T .ivector ('Targets' )
153
182
pY = self .forward (X_in )
154
183
155
- # squared_magnitude = [(p*p).sum() for p in self.params]
156
- # reg_cost = T.sum(squared_magnitude)
157
- cost = - T .mean ( T .log (pY [T .arange (pY .shape [0 ]), targets ]) ) # + reg*reg_cost
184
+ squared_magnitude = [(p * p ).sum () for p in self .params ]
185
+ reg_cost = T .sum (squared_magnitude )
186
+ cost = - T .mean ( T .log (pY [T .arange (pY .shape [0 ]), targets ]) ) + reg * reg_cost
158
187
prediction = self .predict (X_in )
159
188
cost_predict_op = theano .function (
160
189
inputs = [X_in , targets ],
161
190
outputs = [cost , prediction ],
162
191
)
163
192
164
- updates = [
165
- (p , p + mu * dp - learning_rate * T .grad (cost , p )) for p , dp in zip (self .params , self .dparams )
166
- ] + [
167
- (dp , mu * dp - learning_rate * T .grad (cost , p )) for p , dp in zip (self .params , self .dparams )
168
- ]
169
- # updates = [(p, p - learning_rate*T.grad(cost, p)) for p in self.params]
193
+ updates = momentum_updates (cost , self .params , mu , learning_rate )
170
194
train_op = theano .function (
171
195
inputs = [X_in , targets ],
172
196
updates = updates ,
@@ -209,7 +233,8 @@ def main():
209
233
# dnn.fit(Xtrain, Ytrain, Xtest, Ytest, epochs=3)
210
234
# vs
211
235
dnn = DNN ([1000 , 750 , 500 ])
212
- dnn .fit (Xtrain , Ytrain , Xtest , Ytest , pretrain = False , epochs = 10 )
236
+ dnn .fit (Xtrain , Ytrain , Xtest , Ytest , pretrain = True , train_head_only = False , epochs = 3 )
237
+ # note: try training the head only too! what does that mean?
213
238
214
239
215
240
def test_single_autoencoder ():
@@ -239,5 +264,5 @@ def test_single_autoencoder():
239
264
240
265
241
266
if __name__ == '__main__' :
242
- # main()
243
- test_single_autoencoder ()
267
+ main ()
268
+ # test_single_autoencoder()
0 commit comments