1
+ import pickle
2
+ import time , os , json
3
+ import numpy as np
4
+ import matplotlib .pyplot as plt
5
+ from sklearn .decomposition import PCA
6
+
7
+ from asgn3 .gradient_check import eval_numerical_gradient , eval_numerical_gradient_array
8
+ from asgn3 .rnn_layers import *
9
+ from asgn3 .captioning_solver import CaptioningSolver
10
+ from asgn3 .classifiers .rnn import CaptioningRNN
11
+ from asgn3 .coco_utils import load_coco_data , sample_coco_minibatch , decode_captions
12
+ from asgn3 .image_utils import image_from_url
13
+ % matplotlib inline
14
+ # for auto-reloading external modules
15
+ # see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
16
+ % load_ext autoreload
17
+ % autoreload 2
18
+
19
+ def rel_error (x , y ):
20
+ """ returns relative error """
21
+ return np .max (np .abs (x - y ) / (np .maximum (1e-8 , np .abs (x ) + np .abs (y ))))
22
+
23
+
24
+ #############################################################################################
25
+
26
+ data = pickle .load (open ("../0.8s_rnn_rand.p" ,"rb" ))
27
+ for k , v in data .iteritems ():
28
+ if type (v ) == np .ndarray :
29
+ print k , type (v ), v .shape , v .dtype
30
+ else :
31
+ print k , type (v ), len (v )
32
+
33
+ #############################################################################################
34
+
35
+ X_train ,y_train = data ['train_features' ],data ['train_captions' ]
36
+ # X_val,y_val=data['X_val'],data['y_val']
37
+ # num_classes=len(data['char_to_indx'])
38
+ # def reformat(dataset):
39
+ # dataset = dataset.reshape((dataset.shape[0], -1)).astype(np.float32)
40
+ # return dataset
41
+ # X_train=reformat(X_train)
42
+ # X_val=reformat(X_val)
43
+ print "number of nan values:" ,np .count_nonzero (np .isnan (X_train ))
44
+ print "number of inf values:" ,np .count_nonzero (np .isinf (X_train ))
45
+
46
+ #############################################################################################
47
+
48
+ train_features = data ['train_features' ]
49
+ diff = rel_error (train_features [0 ],train_features [1 ])
50
+ print diff
51
+
52
+ #############################################################################################
53
+
54
+ #see a few feature vectors
55
+ F = plt .gcf ()
56
+ F .set_size_inches (10 , 25 , forward = True )#Set forward to True to resize window along with plot in figure.
57
+ plt .imshow (data ['X_train' ][2 ], interpolation = 'nearest' )
58
+ plt .colorbar ()
59
+ plt .savefig ('../feature2.jpg' )
60
+ # plt.show()
61
+
62
+ #############################################################################################
63
+
64
+ def reformat (dataset ):
65
+ pca = PCA (n_components = 300 )
66
+ dataset = dataset .reshape ((dataset .shape [0 ], - 1 )).astype (np .float32 )
67
+ # dataset=pca.fit_transform(dataset)
68
+ return dataset
69
+ data ['train_features' ]= reformat (data ['train_features' ])
70
+ data ['val_features' ]= reformat (data ['val_features' ])
71
+ data ['test_features' ]= reformat (data ['test_features' ])
72
+ for k , v in data .iteritems ():
73
+ if type (v ) == np .ndarray :
74
+ print k , type (v ), v .shape , v .dtype
75
+ else :
76
+ print k , type (v ), len (v )
77
+
78
+ #############################################################################################
79
+
80
+ # fc_hidden_dims = [512,512,512,512,512,512,512]
81
+ # dropouts=[0.1,0.25,0.5]
82
+ # regs=[1e-6,1e-5]
83
+ # weight_scale = 1e-3
84
+ # learning_rates=[8e-6,3e-5,8e-5]
85
+ # lr_decay=0.99
86
+ # input_dims=[256,512] #the input dimension to RNN from FNN. can be any arbitrary number
87
+ # best_loss=float('inf')
88
+ # best_model=None
89
+ # best_lr=None
90
+
91
+ # num_train = 100
92
+ # for dropout in dropouts:
93
+ # for reg in regs:
94
+ # for learning_rate in learning_rates:
95
+ # for input_dim in input_dims:
96
+ # print "TRYING....DROPOUT:", dropout
97
+ # print "REG:", reg
98
+ # print "INPUT_DIM", input_dim
99
+ # print "LR", learning_rate
100
+ # print ""
101
+ # mask=np.random.choice(data['train_features'].shape[0],num_train)
102
+ # small_data = {
103
+ # 'train_features': data['train_features'][mask],
104
+ # 'train_captions': data['train_captions'][mask],
105
+ # 'val_features': data['val_features'],
106
+ # 'val_captions': data['val_captions'],
107
+ # 'test_features': data['test_features'],
108
+ # 'test_captions': data['test_captions'],
109
+ # 'word_to_idx': data['word_to_idx'],
110
+ # 'idx_to_word': data['idx_to_word'],
111
+ # }
112
+
113
+ # small_lstm_model = CaptioningRNN(
114
+ # cell_type='lstm',
115
+ # word_to_idx=data['word_to_idx'],
116
+ # fc_hidden_dims=fc_hidden_dims,
117
+ # fc_input_dim=data['train_features'].shape[1],
118
+ # input_dim=input_dim,
119
+ # hidden_dim=1024,
120
+ # wordvec_dim=256,
121
+ # dtype=np.float64,
122
+ # weight_scale=weight_scale,
123
+ # use_batchnorm=True,
124
+ # dropout=dropout,
125
+ # reg=reg
126
+ # )
127
+
128
+ # small_lstm_solver = CaptioningSolver(small_lstm_model, small_data,
129
+ # update_rule='adam',
130
+ # num_epochs=30,
131
+ # batch_size=25,
132
+ # optim_config={
133
+ # 'learning_rate': learning_rate,
134
+ # },
135
+ # lr_decay=lr_decay,
136
+ # verbose=True, print_every=50,
137
+ # )
138
+
139
+ # small_lstm_solver.train()
140
+
141
+ # curr_loss=small_lstm_solver.loss_history[-1]
142
+ # if best_loss>curr_loss:
143
+ # print "BEST LOSS:", curr_loss
144
+ # print "DROPOUT:", dropout
145
+ # print "REG:", reg
146
+ # print "INPUT_DIM", input_dim
147
+ # print "LR", learning_rate
148
+ # print ""
149
+ # best_loss=curr_loss
150
+ # best_model=CaptioningRNN(
151
+ # cell_type='lstm',
152
+ # word_to_idx=data['word_to_idx'],
153
+ # fc_hidden_dims=fc_hidden_dims,
154
+ # fc_input_dim=data['train_features'].shape[1],
155
+ # input_dim=input_dim,
156
+ # hidden_dim=1024,
157
+ # wordvec_dim=256,
158
+ # dtype=np.float64,
159
+ # weight_scale=weight_scale,
160
+ # use_batchnorm=True,
161
+ # dropout=dropout,
162
+ # reg=reg
163
+ # )
164
+ # best_lr=learning_rate
165
+
166
+ #############################################################################################
167
+
168
+ fc_hidden_dims = [512 ,512 ,512 ,512 ,512 ,512 ,512 ]
169
+ dropouts = [0.1 ,0.25 ,0.5 ]
170
+ regs = [1e-6 ,1e-5 ]
171
+ weight_scale = 1e-3
172
+ learning_rates = [8e-6 ,3e-5 ,8e-5 ]
173
+ lr_decay = 0.99
174
+ input_dims = [256 ,512 ] #the input dimension to RNN from FNN. can be any arbitrary number
175
+ best_loss = float ('inf' )
176
+ best_model = None
177
+ best_lr = None
178
+
179
+ num_train = 200
180
+ mask = np .random .choice (data ['train_features' ].shape [0 ],num_train )
181
+ small_data = {
182
+ 'train_features' : data ['train_features' ][mask ],
183
+ 'train_captions' : data ['train_captions' ][mask ],
184
+ 'val_features' : data ['val_features' ],
185
+ 'val_captions' : data ['val_captions' ],
186
+ 'test_features' : data ['test_features' ],
187
+ 'test_captions' : data ['test_captions' ],
188
+ 'word_to_idx' : data ['word_to_idx' ],
189
+ 'idx_to_word' : data ['idx_to_word' ],
190
+ }
191
+
192
+ best_model = CaptioningRNN (
193
+ cell_type = 'lstm' ,
194
+ word_to_idx = data ['word_to_idx' ],
195
+ fc_hidden_dims = fc_hidden_dims ,
196
+ fc_input_dim = data ['train_features' ].shape [1 ],
197
+ input_dim = 512 ,
198
+ hidden_dim = 1024 ,
199
+ wordvec_dim = 256 ,
200
+ dtype = np .float64 ,
201
+ weight_scale = weight_scale ,
202
+ use_batchnorm = True ,
203
+ dropout = 0.1 ,
204
+ reg = 1e-6
205
+ )
206
+ best_lr = 4e-5
207
+
208
+ small_lstm_solver = CaptioningSolver (best_model , small_data ,
209
+ update_rule = 'adam' ,
210
+ num_epochs = 50 ,
211
+ batch_size = 50 ,
212
+ optim_config = {
213
+ 'learning_rate' : best_lr ,
214
+ },
215
+ lr_decay = lr_decay ,
216
+ verbose = True , print_every = 50 ,
217
+ )
218
+
219
+ small_lstm_solver .train ()
220
+
221
+ # Plot the training losses
222
+ plt .plot (small_lstm_solver .loss_history )
223
+ plt .xlabel ('Iteration' )
224
+ plt .ylabel ('Loss' )
225
+ plt .title ('Training loss history' )
226
+ plt .savefig ('../training_loss.jpg' )
227
+
228
+ pickle .dump ((best_model , best_lr ), open ( "best_model.p" , "wb" ) )
0 commit comments