Skip to content

Commit

Permalink
Add multilayer perceptron.
Browse files Browse the repository at this point in the history
  • Loading branch information
trekhleb committed Dec 19, 2018
1 parent e7bc4db commit 776f77f
Show file tree
Hide file tree
Showing 2 changed files with 247 additions and 96 deletions.
166 changes: 102 additions & 64 deletions homemade/neural_network/multilayer_perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,81 +26,88 @@ def __init__(self, data, labels, layers, epsilon, normalize_data=False):
self.epsilon = epsilon

# Randomly initialize the weights for each neural network layer.
self.thetas = MultilayerPerceptron.init_layers_thetas(layers, epsilon)
self.thetas = MultilayerPerceptron.thetas_init(layers, epsilon)

def train(self, regularization_param=0, max_iterations=1000):
# Flatten model thetas for gradient descent.
unrolled_thetas = MultilayerPerceptron.unroll_thetas(self.thetas)
unrolled_thetas = MultilayerPerceptron.thetas_unroll(self.thetas)

# Init cost history array.
cost_histories = []

initial_cost = MultilayerPerceptron.cost_function(
# Run gradient descent.
(current_theta, cost_history) = MultilayerPerceptron.gradient_descent(
self.data,
self.labels,
self.thetas,
unrolled_thetas,
self.layers,
regularization_param
regularization_param,
max_iterations,
)

print(initial_cost)
return self.thetas, cost_histories

# Run gradient descent.
# (current_theta, cost_history) = MultilayerPerceptron.gradient_descent(
# self.data,
# current_labels,
# unrolled_thetas,
# regularization_param,
# max_iterations,
# )
@staticmethod
def gradient_descent(data, labels, initial_theta, layers, regularization_param, max_iteration):
"""Gradient descent function.
return self.thetas, cost_histories
Iteratively optimizes theta model parameters.
# @staticmethod
# def gradient_descent(data, labels, initial_theta, lambda_param, max_iteration):
# """Gradient descent function.
#
# Iteratively optimizes theta model parameters.
#
# :param data: the set of training or test data.
# :param labels: training set outputs (0 or 1 that defines the class of an example).
# :param initial_theta: initial model parameters.
# :param lambda_param: regularization parameter.
# :param max_iteration: maximum number of gradient descent steps.
# """
#
# # Initialize cost history list.
# cost_history = []
#
# # Launch gradient descent.
# minification_result = minimize(
# # Function that we're going to minimize.
# lambda current_theta: MultilayerPerceptron.cost_function(
# data, labels, current_theta.reshape((num_features, 1)), lambda_param
# ),
# # Initial values of model parameter.
# initial_theta,
# # We will use conjugate gradient algorithm.
# method='CG',
# # Function that will help to calculate gradient direction on each step.
# jac=lambda current_theta: MultilayerPerceptron.gradient_step(
# data, labels, current_theta.reshape((num_features, 1)), lambda_param
# ),
# # Record gradient descent progress for debugging.
# callback=lambda current_theta: cost_history.append(MultilayerPerceptron.cost_function(
# data, labels, current_theta.reshape((num_features, 1)), lambda_param
# )),
# options={'maxiter': max_iteration}
# )
#
# # Throw an error in case if gradient descent ended up with error.
# if not minification_result.success:
# raise ArithmeticError('Can not minimize cost function: ' + minification_result.message)
#
# # Reshape the final version of model parameters.
# optimized_theta = minification_result.x.reshape((num_features, 1))
#
# return optimized_theta, cost_history
:param data: the set of training or test data.
:param labels: training set outputs (0 or 1 that defines the class of an example).
:param initial_theta: initial model parameters.
:param layers: model layers configuration.
:param regularization_param: regularization parameter.
:param max_iteration: maximum number of gradient descent steps.
"""

# Initialize cost history list.
cost_history = []

# Launch gradient descent.
minification_result = minimize(
# Function that we're going to minimize.
lambda current_theta: MultilayerPerceptron.cost_function(
data, labels, current_theta, layers, regularization_param
),
# Initial values of model parameter.
initial_theta,
# We will use conjugate gradient algorithm.
method='CG',
# Function that will help to calculate gradient direction on each step.
jac=lambda current_theta: MultilayerPerceptron.gradient_step(
data, labels, current_theta, regularization_param
),
# Record gradient descent progress for debugging.
callback=lambda current_theta: cost_history.append(MultilayerPerceptron.cost_function(
data, labels, current_theta, layers, regularization_param
)),
options={'maxiter': max_iteration}
)

# Throw an error in case if gradient descent ended up with error.
if not minification_result.success:
raise ArithmeticError('Can not minimize cost function: ' + minification_result.message)

optimized_theta = minification_result.x

return optimized_theta, cost_history

@staticmethod
def gradient_step(unrolled_thetas, layers):
"""Gradient step function.
Computes the cost and gradient of the neural network for unrolled theta parameters.
:param unrolled_thetas: flat vector of model parameters
:param layers: model layers configuration
"""

# Reshape nn_params back into the matrix parameters.
thetas = MultilayerPerceptron.thetas_roll(unrolled_thetas, layers)

# Do backpropagation.
MultilayerPerceptron.back_propagation()

@staticmethod
def cost_function(data, labels, thetas, layers, regularization_param):
Expand Down Expand Up @@ -176,7 +183,11 @@ def feedforward_propagation(data, thetas, layers):
return layer_in[:, 1:]

@staticmethod
def init_layers_thetas(layers, epsilon):
def back_propagation():
pass

@staticmethod
def thetas_init(layers, epsilon):
"""Randomly initialize the weights for each neural network layer
Each layer will have its own theta matrix W with L_in incoming connections and L_out
Expand Down Expand Up @@ -204,13 +215,40 @@ def init_layers_thetas(layers, epsilon):
return thetas

@staticmethod
def unroll_thetas(thetas):
def thetas_unroll(thetas):
"""Unrolls cells of theta matrices into one long vector."""

unrolled_thetas = []
unrolled_thetas = np.array([])
num_theta_layers = len(thetas)
for theta_layer_index in range(num_theta_layers):
# Unroll cells into vector form.
unrolled_thetas.extend(thetas[theta_layer_index].flatten())
unrolled_thetas = np.hstack((unrolled_thetas, thetas[theta_layer_index].flatten()))

return unrolled_thetas

@staticmethod
def thetas_roll(unrolled_thetas, layers):
"""Rolls NN params vector into the matrix"""

# Get total numbers of layers.
num_layers = len(layers)

# Init rolled thetas dictionary.
thetas = {}
unrolled_shift = 0

for layer_index in range(num_layers - 1):
layers_in = layers[layer_index]
layers_out = layers[layer_index + 1]

thetas_width = layers_in + 1 # We need to remember about bias unit.
thetas_height = layers_out
thetas_volume = thetas_width * thetas_height

# We need to remember about bias units when rolling up params.
start_index = unrolled_shift
end_index = unrolled_shift + thetas_volume
layer_thetas_unrolled = unrolled_thetas[start_index:end_index]
thetas[layer_index] = layer_thetas_unrolled.reshape((thetas_height, thetas_width))

return thetas
Loading

0 comments on commit 776f77f

Please sign in to comment.