Skip to content

Commit

Permalink
Renamed 'acc_grad' to 'accum_grad' to reduce ambiguity
Browse files Browse the repository at this point in the history
  • Loading branch information
eriklindernoren committed Dec 23, 2017
1 parent c5256ff commit 22b18ba
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 67 deletions.
118 changes: 59 additions & 59 deletions mlfromscratch/deep_learning/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ def forward_pass(self, X, training):
""" Propogates the signal forward in the network """
raise NotImplementedError()

def backward_pass(self, acc_grad):
def backward_pass(self, accum_grad):
""" Propogates the accumulated gradient backwards in the network.
If the has trainable weights then these weights are also tuned in this method.
As input (acc_grad) it receives the gradient with respect to the output of the layer and
As input (accum_grad) it receives the gradient with respect to the output of the layer and
returns the gradient with respect to the output of the previous layer. """
raise NotImplementedError()

Expand Down Expand Up @@ -73,23 +73,23 @@ def forward_pass(self, X, training=True):
self.layer_input = X
return X.dot(self.W) + self.w0

def backward_pass(self, acc_grad):
def backward_pass(self, accum_grad):
# Save weights used during forwards pass
W = self.W

if self.trainable:
# Calculate gradient w.r.t layer weights
grad_w = self.layer_input.T.dot(acc_grad)
grad_w0 = np.sum(acc_grad, axis=0, keepdims=True)
grad_w = self.layer_input.T.dot(accum_grad)
grad_w0 = np.sum(accum_grad, axis=0, keepdims=True)

# Update the layer weights
self.W = self.W_opt.update(self.W, grad_w)
self.w0 = self.w0_opt.update(self.w0, grad_w0)

# Return accumulated gradient for next layer
# Calculated based on the weights used during the forward pass
acc_grad = acc_grad.dot(W.T)
return acc_grad
accum_grad = accum_grad.dot(W.T)
return accum_grad

def output_shape(self):
return (self.n_units, )
Expand Down Expand Up @@ -160,25 +160,25 @@ def forward_pass(self, X, training=True):

return self.outputs

def backward_pass(self, acc_grad):
_, timesteps, _ = acc_grad.shape
def backward_pass(self, accum_grad):
_, timesteps, _ = accum_grad.shape

# Variables where we save the accumulated gradient w.r.t each parameter
grad_U = np.zeros_like(self.U)
grad_V = np.zeros_like(self.V)
grad_W = np.zeros_like(self.W)
# The gradient w.r.t the layer input.
# Will be passed on to the previous layer in the network
acc_grad_next = np.zeros_like(acc_grad)
accum_grad_next = np.zeros_like(accum_grad)

# Back Propagation Through Time
for t in reversed(range(timesteps)):
# Update gradient w.r.t V at time step t
grad_V += acc_grad[:, t].T.dot(self.states[:, t])
grad_V += accum_grad[:, t].T.dot(self.states[:, t])
# Calculate the gradient w.r.t the state input
grad_wrt_state = acc_grad[:, t].dot(self.V) * self.activation.gradient(self.state_input[:, t])
grad_wrt_state = accum_grad[:, t].dot(self.V) * self.activation.gradient(self.state_input[:, t])
# Gradient w.r.t the layer input
acc_grad_next[:, t] = grad_wrt_state.dot(self.U)
accum_grad_next[:, t] = grad_wrt_state.dot(self.U)
# Update gradient w.r.t W and U by backprop. from time step t for at most
# self.bptt_trunc number of time steps
for t_ in reversed(np.arange(max(0, t - self.bptt_trunc), t+1)):
Expand All @@ -192,7 +192,7 @@ def backward_pass(self, acc_grad):
self.V = self.V_opt.update(self.V, grad_V)
self.W = self.W_opt.update(self.W, grad_W)

return acc_grad_next
return accum_grad_next

def output_shape(self):
return self.input_shape
Expand Down Expand Up @@ -253,31 +253,31 @@ def forward_pass(self, X, training=True):
# Redistribute axises so that batch size comes first
return output.transpose(3,0,1,2)

def backward_pass(self, acc_grad):
def backward_pass(self, accum_grad):
# Reshape accumulated gradient into column shape
acc_grad = acc_grad.transpose(1, 2, 3, 0).reshape(self.n_filters, -1)
accum_grad = accum_grad.transpose(1, 2, 3, 0).reshape(self.n_filters, -1)

if self.trainable:
# Take dot product between column shaped accum. gradient and column shape
# layer input to determine the gradient at the layer with respect to layer weights
grad_w = acc_grad.dot(self.X_col.T).reshape(self.W.shape)
grad_w = accum_grad.dot(self.X_col.T).reshape(self.W.shape)
# The gradient with respect to bias terms is the sum similarly to in Dense layer
grad_w0 = np.sum(acc_grad, axis=1, keepdims=True)
grad_w0 = np.sum(accum_grad, axis=1, keepdims=True)

# Update the layers weights
self.W = self.W_opt.update(self.W, grad_w)
self.w0 = self.w0_opt.update(self.w0, grad_w0)

# Recalculate the gradient which will be propogated back to prev. layer
acc_grad = self.W_col.T.dot(acc_grad)
accum_grad = self.W_col.T.dot(accum_grad)
# Reshape from column shape to image shape
acc_grad = column_to_image(acc_grad,
accum_grad = column_to_image(accum_grad,
self.layer_input.shape,
self.filter_shape,
stride=self.stride,
output_shape=self.padding)

return acc_grad
return accum_grad

def output_shape(self):
channels, height, width = self.input_shape
Expand Down Expand Up @@ -331,30 +331,30 @@ def forward_pass(self, X, training=True):

return output

def backward_pass(self, acc_grad):
def backward_pass(self, accum_grad):

# Save parameters used during the forward pass
gamma = self.gamma

# If the layer is trainable the parameters are updated
if self.trainable:
X_norm = self.X_centered * self.stddev_inv
grad_gamma = np.sum(acc_grad * X_norm, axis=0)
grad_beta = np.sum(acc_grad, axis=0)
grad_gamma = np.sum(accum_grad * X_norm, axis=0)
grad_beta = np.sum(accum_grad, axis=0)

self.gamma = self.gamma_opt.update(self.gamma, grad_gamma)
self.beta = self.beta_opt.update(self.beta, grad_beta)

batch_size = acc_grad.shape[0]
batch_size = accum_grad.shape[0]

# The gradient of the loss with respect to the layer inputs (use weights from forward pass)
acc_grad = (1 / batch_size) * gamma * self.stddev_inv * (
batch_size * acc_grad
- np.sum(acc_grad, axis=0)
- self.X_centered * self.stddev_inv**2 * np.sum(acc_grad * self.X_centered, axis=0)
accum_grad = (1 / batch_size) * gamma * self.stddev_inv * (
batch_size * accum_grad
- np.sum(accum_grad, axis=0)
- self.X_centered * self.stddev_inv**2 * np.sum(accum_grad * self.X_centered, axis=0)
)

return acc_grad
return accum_grad

def output_shape(self):
return self.input_shape
Expand Down Expand Up @@ -387,18 +387,18 @@ def forward_pass(self, X, training=True):

return output

def backward_pass(self, acc_grad):
batch_size, _, _, _ = acc_grad.shape
def backward_pass(self, accum_grad):
batch_size, _, _, _ = accum_grad.shape
channels, height, width = self.input_shape
acc_grad = acc_grad.transpose(2, 3, 0, 1).ravel()
accum_grad = accum_grad.transpose(2, 3, 0, 1).ravel()

# MaxPool or AveragePool specific method
acc_grad_col = self._pool_backward(acc_grad)
accum_grad_col = self._pool_backward(accum_grad)

acc_grad = column_to_image(acc_grad_col, (batch_size * channels, 1, height, width), self.pool_shape, self.stride, 0)
acc_grad = acc_grad.reshape((batch_size,) + self.input_shape)
accum_grad = column_to_image(accum_grad_col, (batch_size * channels, 1, height, width), self.pool_shape, self.stride, 0)
accum_grad = accum_grad.reshape((batch_size,) + self.input_shape)

return acc_grad
return accum_grad

def output_shape(self):
channels, height, width = self.input_shape
Expand All @@ -416,21 +416,21 @@ def _pool_forward(self, X_col):
self.cache = arg_max
return output

def _pool_backward(self, acc_grad):
acc_grad_col = np.zeros((np.prod(self.pool_shape), acc_grad.size))
def _pool_backward(self, accum_grad):
accum_grad_col = np.zeros((np.prod(self.pool_shape), accum_grad.size))
arg_max = self.cache
acc_grad_col[arg_max, range(acc_grad.size)] = acc_grad
return acc_grad_col
accum_grad_col[arg_max, range(accum_grad.size)] = accum_grad
return accum_grad_col

class AveragePooling2D(PoolingLayer):
def _pool_forward(self, X_col):
output = np.mean(X_col, axis=0)
return output

def _pool_backward(self, acc_grad):
acc_grad_col = np.zeros((np.prod(self.pool_shape), acc_grad.size))
acc_grad_col[:, range(acc_grad.size)] = 1. / acc_grad_col.shape[0] * acc_grad
return acc_grad_col
def _pool_backward(self, accum_grad):
accum_grad_col = np.zeros((np.prod(self.pool_shape), accum_grad.size))
accum_grad_col[:, range(accum_grad.size)] = 1. / accum_grad_col.shape[0] * accum_grad
return accum_grad_col


class ConstantPadding2D(Layer):
Expand Down Expand Up @@ -463,11 +463,11 @@ def forward_pass(self, X, training=True):
constant_values=self.padding_value)
return output

def backward_pass(self, acc_grad):
def backward_pass(self, accum_grad):
pad_top, pad_left = self.padding[0][0], self.padding[1][0]
height, width = self.input_shape[1], self.input_shape[2]
acc_grad = acc_grad[:, :, pad_top:pad_top+height, pad_left:pad_left+width]
return acc_grad
accum_grad = accum_grad[:, :, pad_top:pad_top+height, pad_left:pad_left+width]
return accum_grad

def output_shape(self):
new_height = self.input_shape[1] + np.sum(self.padding[0])
Expand Down Expand Up @@ -507,8 +507,8 @@ def forward_pass(self, X, training=True):
self.prev_shape = X.shape
return X.reshape((X.shape[0], -1))

def backward_pass(self, acc_grad):
return acc_grad.reshape(self.prev_shape)
def backward_pass(self, accum_grad):
return accum_grad.reshape(self.prev_shape)

def output_shape(self):
return (np.prod(self.input_shape),)
Expand All @@ -535,10 +535,10 @@ def forward_pass(self, X, training=True):
X_new = X.repeat(self.size[0], axis=2).repeat(self.size[1], axis=3)
return X_new

def backward_pass(self, acc_grad):
def backward_pass(self, accum_grad):
# Down sample input to previous shape
acc_grad = acc_grad[:, :, ::self.size[0], ::self.size[1]]
return acc_grad
accum_grad = accum_grad[:, :, ::self.size[0], ::self.size[1]]
return accum_grad

def output_shape(self):
channels, height, width = self.input_shape
Expand All @@ -563,8 +563,8 @@ def forward_pass(self, X, training=True):
self.prev_shape = X.shape
return X.reshape((X.shape[0], ) + self.shape)

def backward_pass(self, acc_grad):
return acc_grad.reshape(self.prev_shape)
def backward_pass(self, accum_grad):
return accum_grad.reshape(self.prev_shape)

def output_shape(self):
return self.shape
Expand Down Expand Up @@ -594,8 +594,8 @@ def forward_pass(self, X, training=True):
c = self._mask
return X * c

def backward_pass(self, acc_grad):
return acc_grad * self._mask
def backward_pass(self, accum_grad):
return accum_grad * self._mask

def output_shape(self):
return self.input_shape
Expand Down Expand Up @@ -632,8 +632,8 @@ def forward_pass(self, X, training=True):
self.layer_input = X
return self.activation_func(X)

def backward_pass(self, acc_grad):
return acc_grad * self.activation_func.gradient(self.layer_input)
def backward_pass(self, accum_grad):
return accum_grad * self.activation_func.gradient(self.layer_input)

def output_shape(self):
return self.input_shape
Expand Down
3 changes: 1 addition & 2 deletions mlfromscratch/deep_learning/neural_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,8 @@ def _forward_pass(self, X, training=True):

def _backward_pass(self, loss_grad):
""" Propagate the gradient 'backwards' and update the weights in each layer """
acc_grad = loss_grad
for layer in reversed(self.layers):
acc_grad = layer.backward_pass(acc_grad)
loss_grad = layer.backward_pass(loss_grad)

def summary(self, name="Model Summary"):
# Print model name
Expand Down
12 changes: 7 additions & 5 deletions mlfromscratch/supervised_learning/naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,11 @@ def _calculate_prior(self, c):
return n_class_instances / n_total_instances

def _classify(self, sample):
""" Classification using Bayes Rule P(Y|X) = P(X|Y)*P(Y)/P(X)
""" Classification using Bayes Rule P(Y|X) = P(X|Y)*P(Y)/P(X),
or Posterior = Likelihood * Prior / Scaling Factor
P(Y|X) - The posterior is the probability that sample x is of class y given the
feature values of x being distributed according to distribution of y and the prior.
P(X|Y) - Likelihood of data X given class distribution Y.
Gaussian distribution (given by _calculate_likelihood)
P(Y) - Prior (given by _calculate_prior)
Expand All @@ -52,17 +55,16 @@ def _classify(self, sample):
posteriors = []
# Go through list of classes
for i, c in enumerate(self.classes):
# Initialize posterior as prior
posterior = self._calculate_prior(c)
# Naive assumption (independence):
# P(x1,x2,x3|Y) = P(x1|Y)*P(x2|Y)*P(x3|Y)
# Multiply with the class likelihoods
# Posterior is product of prior and likelihoods (ignoring scaling factor)
for j, params in enumerate(self.parameters[i]):
sample_feature = sample[j]
# Determine P(x|Y)
# Likelihood of sample x given distribution of y
likelihood = self._calculate_likelihood(params["mean"], params["var"], sample_feature)
# Multiply with the accumulated probability
posterior *= likelihood
# Total posterior = P(Y)*P(x1|Y)*P(x2|Y)*...*P(xN|Y)
posteriors.append(posterior)
# Return the class with the largest posterior probability
index_of_max = np.argmax(posteriors)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@ class ParticleSwarmOptimizedNN():
Neural Network Training Using Particle Swarm Optimization
https://visualstudiomagazine.com/articles/2013/12/01/neural-network-training-using-particle-swarm-optimization.aspx
"""
def __init__(self, population_size, model_builder, inertia_weight=0.8, cognitive_weight=2, social_weight=2, max_velocity=10):
def __init__(self, population_size,
model_builder,
inertia_weight=0.8,
cognitive_weight=2,
social_weight=2,
max_velocity=20):
self.population_size = population_size
self.model_builder = model_builder
self.best_individual = None
Expand Down

0 comments on commit 22b18ba

Please sign in to comment.