Skip to content

Commit bde8eb5

Browse files
authored
Merge pull request #9 from devitocodes/zero_in_op
Move zeroing layers to inside an operator
2 parents 6fcd1ac + a37531e commit bde8eb5

File tree

2 files changed

+28
-24
lines changed

2 files changed

+28
-24
lines changed

joey/layers.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -483,8 +483,7 @@ def backprop_equations(self, prev_layer, next_layer):
483483
a = self._backward_tmp_constants[0]
484484
b = self._backward_tmp_constants[1]
485485

486-
return ([Eq(next_layer.result_gradients, 0),
487-
Eq(a, index // 2),
486+
return ([Eq(a, index // 2),
488487
Eq(b, index % 2),
489488
Inc(next_layer.result_gradients[dims[0],
490489
dims[1],

joey/net.py

+27-22
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ def _gen_eqs(self):
6868

6969
input_function = None
7070

71+
for layer in self._layers:
72+
eqs.append(Eq(layer.result, 0))
73+
7174
for layer in self._layers:
7275
if input_function is not None:
7376
dims = input_function.dimensions
@@ -85,6 +88,19 @@ def _gen_backprop_eqs(self):
8588
eqs = []
8689
args = []
8790

91+
for i in range(len(self._layers)):
92+
layer = self._layers[i]
93+
94+
if layer.kernel_gradients is not None:
95+
eqs.append(Eq(layer.kernel_gradients, 0))
96+
97+
if layer.bias_gradients is not None:
98+
eqs.append(Eq(layer.bias_gradients, 0))
99+
100+
if layer.result_gradients is not None \
101+
and i < len(self._layers) - 1:
102+
eqs.append(Eq(layer.result_gradients, 0))
103+
88104
for i in range(len(self._layers) - 1, -1, -1):
89105
if i < len(self._layers) - 1:
90106
prev_layer = self._layers[i + 1]
@@ -102,6 +118,17 @@ def _gen_backprop_eqs(self):
102118
args += layer_args
103119
eqs += layer_eqs
104120

121+
batch_size = self._layers[-1].result.shape[1]
122+
123+
for layer in self._layers:
124+
if layer.kernel_gradients is not None:
125+
eqs.append(Eq(layer.kernel_gradients,
126+
layer.kernel_gradients / batch_size))
127+
128+
if layer.bias_gradients is not None:
129+
eqs.append(Eq(layer.bias_gradients,
130+
layer.bias_gradients / batch_size))
131+
105132
return (eqs, args)
106133

107134
@property
@@ -118,9 +145,6 @@ def forward(self, input_data):
118145
input_data : np.ndarray
119146
Input data for the network.
120147
"""
121-
for layer in self._layers:
122-
layer.result.data[:] = 0
123-
124148
self._layers[0].input.data[:] = input_data
125149
self._forward_operator.apply(**self._forward_arg_dict)
126150
return self._layers[-1].result.data
@@ -154,29 +178,10 @@ def backward(self, expected, loss_gradient_func, pytorch_optimizer=None):
154178
155179
The default value is None.
156180
"""
157-
for layer in self._layers:
158-
if layer.kernel_gradients is not None:
159-
layer.kernel_gradients.data[:] = 0
160-
161-
if layer.bias_gradients is not None:
162-
layer.bias_gradients.data[:] = 0
163-
164-
if layer.result_gradients is not None:
165-
layer.result_gradients.data[:] = 0
166-
167-
batch_size = self._layers[-1].result.shape[1]
168-
169181
self._layers[-1].result_gradients.data[:] = \
170182
np.transpose(np.array(loss_gradient_func(self._layers[-1],
171183
expected)))
172184
self._backward_operator.apply(**self._backward_arg_dict)
173185

174-
for layer in self._layers:
175-
if layer.kernel_gradients is not None:
176-
layer.kernel_gradients.data[:] /= batch_size
177-
178-
if layer.bias_gradients is not None:
179-
layer.bias_gradients.data[:] /= batch_size
180-
181186
if pytorch_optimizer is not None:
182187
pytorch_optimizer.step()

0 commit comments

Comments
 (0)