Skip to content

Commit

Permalink
Add multilayer perceptron.
Browse files Browse the repository at this point in the history
  • Loading branch information
trekhleb committed Dec 20, 2018
1 parent 97bbb29 commit b9683cb
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 35 deletions.
77 changes: 71 additions & 6 deletions homemade/neural_network/multilayer_perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(self, data, labels, layers, epsilon, normalize_data=False):
self.labels = labels
self.layers = layers
self.epsilon = epsilon
self.normalize_data = normalize_data

# Randomly initialize the weights for each neural network layer.
self.thetas = MultilayerPerceptron.thetas_init(layers, epsilon)
Expand All @@ -32,11 +33,8 @@ def train(self, regularization_param=0, max_iterations=1000):
# Flatten model thetas for gradient descent.
unrolled_thetas = MultilayerPerceptron.thetas_unroll(self.thetas)

# Init cost history array.
cost_histories = []

# Run gradient descent.
(current_theta, cost_history) = MultilayerPerceptron.gradient_descent(
(optimized_thetas, cost_history) = MultilayerPerceptron.gradient_descent_manual(
self.data,
self.labels,
unrolled_thetas,
Expand All @@ -45,7 +43,25 @@ def train(self, regularization_param=0, max_iterations=1000):
max_iterations,
)

return self.thetas, cost_histories
# Memorize optimized theta parameters.
self.thetas = MultilayerPerceptron.thetas_roll(optimized_thetas, self.layers)

return self.thetas, cost_history

def predict(self, data):
"""Predictions function that does classification using trained model"""

data_processed = prepare_for_training(data, normalize_data=self.normalize_data)[0]

num_examples = data_processed.shape[0]

# Do feedforward propagation with trained neural network params.
predictions = MultilayerPerceptron.feedforward_propagation(
data_processed, self.thetas, self.layers
)

# Return the index of the output neuron with the highest probability.
return np.argmax(predictions, axis=1).reshape((num_examples, 1))

@staticmethod
def gradient_descent(data, labels, unrolled_theta, layers, regularization_param, max_iteration):
Expand Down Expand Up @@ -103,6 +119,44 @@ def gradient_descent(data, labels, unrolled_theta, layers, regularization_param,

return optimized_theta, cost_history

@staticmethod
def gradient_descent_manual(data, labels, unrolled_theta, layers, regularization_param, max_iteration):
"""Gradient descent function.
Iteratively optimizes theta model parameters.
:param data: the set of training or test data.
:param labels: training set outputs (0 or 1 that defines the class of an example).
:param unrolled_theta: initial model parameters.
:param layers: model layers configuration.
:param regularization_param: regularization parameter.
:param max_iteration: maximum number of gradient descent steps.
"""

optimized_theta = unrolled_theta

# Initialize cost history list.
cost_history = []

for iteration_index in range(max_iteration):
cost = MultilayerPerceptron.cost_function(
data,
labels,
MultilayerPerceptron.thetas_roll(optimized_theta, layers),
layers,
regularization_param
)

cost_history.append(cost)

theta_gradient = MultilayerPerceptron.gradient_step(
data, labels, optimized_theta, layers, regularization_param
)

optimized_theta = optimized_theta - theta_gradient

return optimized_theta, cost_history

@staticmethod
def gradient_step(data, labels, unrolled_thetas, layers, regularization_param):
"""Gradient step function.
Expand All @@ -125,7 +179,9 @@ def gradient_step(data, labels, unrolled_thetas, layers, regularization_param):
)

# Unroll thetas gradients.
return MultilayerPerceptron.thetas_unroll(thetas_rolled_gradients)
thetas_unrolled_gradients = MultilayerPerceptron.thetas_unroll(thetas_rolled_gradients)

return thetas_unrolled_gradients

@staticmethod
def cost_function(data, labels, thetas, layers, regularization_param):
Expand Down Expand Up @@ -315,6 +371,12 @@ def thetas_init(layers, epsilon):
out_count = layers[layer_index + 1]
thetas[layer_index] = np.random.rand(out_count, in_count + 1) * 2 * epsilon - epsilon

# thetas[0] = np.array([[-0.092631, -0.061615, -0.042194]])
# thetas[1] = np.array([
# [0.047492, -0.074013],
# [-0.056754, 0.022874],
# ])

return thetas

@staticmethod
Expand Down Expand Up @@ -354,4 +416,7 @@ def thetas_roll(unrolled_thetas, layers):
layer_thetas_unrolled = unrolled_thetas[start_index:end_index]
thetas[layer_index] = layer_thetas_unrolled.reshape((thetas_height, thetas_width))

# Shift frame to the right.
unrolled_shift = unrolled_shift + thetas_volume

return thetas
Loading

0 comments on commit b9683cb

Please sign in to comment.