@@ -68,6 +68,9 @@ def _gen_eqs(self):
68
68
69
69
input_function = None
70
70
71
+ for layer in self ._layers :
72
+ eqs .append (Eq (layer .result , 0 ))
73
+
71
74
for layer in self ._layers :
72
75
if input_function is not None :
73
76
dims = input_function .dimensions
@@ -85,6 +88,19 @@ def _gen_backprop_eqs(self):
85
88
eqs = []
86
89
args = []
87
90
91
+ for i in range (len (self ._layers )):
92
+ layer = self ._layers [i ]
93
+
94
+ if layer .kernel_gradients is not None :
95
+ eqs .append (Eq (layer .kernel_gradients , 0 ))
96
+
97
+ if layer .bias_gradients is not None :
98
+ eqs .append (Eq (layer .bias_gradients , 0 ))
99
+
100
+ if layer .result_gradients is not None \
101
+ and i < len (self ._layers ) - 1 :
102
+ eqs .append (Eq (layer .result_gradients , 0 ))
103
+
88
104
for i in range (len (self ._layers ) - 1 , - 1 , - 1 ):
89
105
if i < len (self ._layers ) - 1 :
90
106
prev_layer = self ._layers [i + 1 ]
@@ -102,6 +118,17 @@ def _gen_backprop_eqs(self):
102
118
args += layer_args
103
119
eqs += layer_eqs
104
120
121
+ batch_size = self ._layers [- 1 ].result .shape [1 ]
122
+
123
+ for layer in self ._layers :
124
+ if layer .kernel_gradients is not None :
125
+ eqs .append (Eq (layer .kernel_gradients ,
126
+ layer .kernel_gradients / batch_size ))
127
+
128
+ if layer .bias_gradients is not None :
129
+ eqs .append (Eq (layer .bias_gradients ,
130
+ layer .bias_gradients / batch_size ))
131
+
105
132
return (eqs , args )
106
133
107
134
@property
@@ -118,9 +145,6 @@ def forward(self, input_data):
118
145
input_data : np.ndarray
119
146
Input data for the network.
120
147
"""
121
- for layer in self ._layers :
122
- layer .result .data [:] = 0
123
-
124
148
self ._layers [0 ].input .data [:] = input_data
125
149
self ._forward_operator .apply (** self ._forward_arg_dict )
126
150
return self ._layers [- 1 ].result .data
@@ -154,29 +178,10 @@ def backward(self, expected, loss_gradient_func, pytorch_optimizer=None):
154
178
155
179
The default value is None.
156
180
"""
157
- for layer in self ._layers :
158
- if layer .kernel_gradients is not None :
159
- layer .kernel_gradients .data [:] = 0
160
-
161
- if layer .bias_gradients is not None :
162
- layer .bias_gradients .data [:] = 0
163
-
164
- if layer .result_gradients is not None :
165
- layer .result_gradients .data [:] = 0
166
-
167
- batch_size = self ._layers [- 1 ].result .shape [1 ]
168
-
169
181
self ._layers [- 1 ].result_gradients .data [:] = \
170
182
np .transpose (np .array (loss_gradient_func (self ._layers [- 1 ],
171
183
expected )))
172
184
self ._backward_operator .apply (** self ._backward_arg_dict )
173
185
174
- for layer in self ._layers :
175
- if layer .kernel_gradients is not None :
176
- layer .kernel_gradients .data [:] /= batch_size
177
-
178
- if layer .bias_gradients is not None :
179
- layer .bias_gradients .data [:] /= batch_size
180
-
181
186
if pytorch_optimizer is not None :
182
187
pytorch_optimizer .step ()
0 commit comments