forked from Alescontrela/Numpy-CNN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
backward.py
70 lines (57 loc) · 2.31 KB
/
backward.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
'''
Description: backpropagation operations for a convolutional neural network
Author: Alejandro Escontrela
Version: 1.0
Date: June 12th, 2018
'''
import numpy as np
from CNN.utils import *
#####################################################
############### Backward Operations #################
#####################################################
def convolutionBackward(dconv_prev, conv_in, filt, s):
'''
Backpropagation through a convolutional layer.
'''
(n_f, n_c, f, _) = filt.shape
(_, orig_dim, _) = conv_in.shape
## initialize derivatives
dout = np.zeros(conv_in.shape)
dfilt = np.zeros(filt.shape)
dbias = np.zeros((n_f,1))
for curr_f in range(n_f):
# loop through all filters
curr_y = out_y = 0
while curr_y + f <= orig_dim:
curr_x = out_x = 0
while curr_x + f <= orig_dim:
# loss gradient of filter (used to update the filter)
dfilt[curr_f] += dconv_prev[curr_f, out_y, out_x] * conv_in[:, curr_y:curr_y+f, curr_x:curr_x+f]
# loss gradient of the input to the convolution operation (conv1 in the case of this network)
dout[:, curr_y:curr_y+f, curr_x:curr_x+f] += dconv_prev[curr_f, out_y, out_x] * filt[curr_f]
curr_x += s
out_x += 1
curr_y += s
out_y += 1
# loss gradient of the bias
dbias[curr_f] = np.sum(dconv_prev[curr_f])
return dout, dfilt, dbias
def maxpoolBackward(dpool, orig, f, s):
'''
Backpropagation through a maxpooling layer. The gradients are passed through the indices of greatest value in the original maxpooling during the forward step.
'''
(n_c, orig_dim, _) = orig.shape
dout = np.zeros(orig.shape)
for curr_c in range(n_c):
curr_y = out_y = 0
while curr_y + f <= orig_dim:
curr_x = out_x = 0
while curr_x + f <= orig_dim:
# obtain index of largest value in input for current window
(a, b) = nanargmax(orig[curr_c, curr_y:curr_y+f, curr_x:curr_x+f])
dout[curr_c, curr_y+a, curr_x+b] = dpool[curr_c, out_y, out_x]
curr_x += s
out_x += 1
curr_y += s
out_y += 1
return dout