Particle Swarm Optimization of Neural Nets

masscollaborationlabs · Oct 6, 2017 · 3794bde · 3794bde
1 parent a311699
commit 3794bde
Show file tree

Hide file tree

Showing 7 changed files with 209 additions and 15 deletions.
diff --git a/README.md b/README.md
@@ -281,6 +281,7 @@ but rather to present the inner workings of them in a transparent and accessible
 - [Multilayer Perceptron](mlfromscratch/supervised_learning/multilayer_perceptron.py)
 - [Naive Bayes](mlfromscratch/supervised_learning/naive_bayes.py)
 - [Neuroevolution](mlfromscratch/supervised_learning/neuroevolution.py)
+- [Particle Swarm Optimization of Neural Network](mlfromscratch/supervised_learning/particle_swarm_optimization.py)
 - [Perceptron](mlfromscratch/supervised_learning/perceptron.py)
 - [Polynomial Regression](mlfromscratch/supervised_learning/regression.py)
 - [Random Forest](mlfromscratch/supervised_learning/random_forest.py)

diff --git a/mlfromscratch/examples/neuroevolution.py b/mlfromscratch/examples/neuroevolution.py
@@ -31,16 +31,16 @@ def model_builder(n_inputs, n_outputs):
         return model
 
     # Print the model summary of a individual in the population
-    print ()
+    print ("")
     model_builder(n_inputs=X.shape[1], n_outputs=y.shape[1]).summary()
 
     population_size = 100
-    n_generations = 1000
+    n_generations = 3000
     mutation_rate = 0.01
 
     print ("Population Size: %d" % population_size)
     print ("Generations: %d" % n_generations)
-    print ("Mutation Rate: %d" % mutation_rate)
+    print ("Mutation Rate: %.2f" % mutation_rate)
     print ("")
 
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1)
@@ -52,7 +52,6 @@ def model_builder(n_inputs, n_outputs):
     model = model.evolve(X_train, y_train, n_generations=n_generations)
 
     loss, accuracy = model.test_on_batch(X_test, y_test)
-    print ("Test set accuracy: %.1f%%" % float(100*accuracy))
 
     # Reduce dimension to 2D using PCA and plot the results
     y_pred = np.argmax(model.predict(X_test), axis=1)

diff --git a/mlfromscratch/examples/particle_swarm_optimization.py b/mlfromscratch/examples/particle_swarm_optimization.py
@@ -0,0 +1,70 @@
+
+from __future__ import print_function
+from sklearn import datasets
+import matplotlib.pyplot as plt
+import numpy as np
+
+from mlfromscratch.supervised_learning import ParticleSwarmOptimizedNN
+from mlfromscratch.utils import train_test_split, to_categorical, normalize, Plot
+from mlfromscratch.deep_learning import NeuralNetwork
+from mlfromscratch.deep_learning.layers import Activation, Dense
+from mlfromscratch.deep_learning.loss_functions import CrossEntropy
+from mlfromscratch.deep_learning.optimizers import Adam
+
+def main():
+
+    X, y = datasets.make_classification(n_samples=1000, n_features=10, n_classes=4, n_clusters_per_class=1, n_informative=2)
+
+    data = datasets.load_iris()
+    X = normalize(data.data)
+    y = data.target
+    y = to_categorical(y.astype("int"))
+
+    # Model builder
+    def model_builder(n_inputs, n_outputs):    
+        model = NeuralNetwork(optimizer=Adam(), loss=CrossEntropy)
+        model.add(Dense(16, input_shape=(n_inputs,)))
+        model.add(Activation('relu'))
+        model.add(Dense(n_outputs))
+        model.add(Activation('softmax'))
+
+        return model
+
+    # Print the model summary of a individual in the population
+    print ("")
+    model_builder(n_inputs=X.shape[1], n_outputs=y.shape[1]).summary()
+
+    population_size = 100
+    n_generations = 10
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1)
+
+    inertia_weight = 0.8
+    cognitive_weight = 0.8
+    social_weight = 0.8
+
+    print ("Population Size: %d" % population_size)
+    print ("Generations: %d" % n_generations)
+    print ("")
+    print ("Inertia Weight: %.2f" % inertia_weight)
+    print ("Cognitive Weight: %.2f" % cognitive_weight)
+    print ("Social Weight: %.2f" % social_weight)
+    print ("")
+
+    model = ParticleSwarmOptimizedNN(population_size=population_size, 
+                        inertia_weight=inertia_weight,
+                        cognitive_weight=cognitive_weight,
+                        social_weight=social_weight,
+                        model_builder=model_builder)
+
+    model = model.evolve(X_train, y_train, n_generations=n_generations)
+
+    loss, accuracy = model.test_on_batch(X_test, y_test)
+
+    # Reduce dimension to 2D using PCA and plot the results
+    y_pred = np.argmax(model.predict(X_test), axis=1)
+    Plot().plot_in_2d(X_test, y_pred, title="Particle Swarm Optimized Neural Network", accuracy=accuracy, legend_labels=range(y.shape[1]))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/mlfromscratch/supervised_learning/__init__.py b/mlfromscratch/supervised_learning/__init__.py
@@ -14,3 +14,4 @@
 from .support_vector_machine import SupportVectorMachine
 from .xgboost import XGBoost
 from .neuroevolution import Neuroevolution
+from .particle_swarm_optimization import ParticleSwarmOptimizedNN
diff --git a/mlfromscratch/supervised_learning/multilayer_perceptron.py b/mlfromscratch/supervised_learning/multilayer_perceptron.py
@@ -97,10 +97,10 @@ def main():
     # Convert the nominal y values to binary
     y = to_categorical(y)
 
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1)
 
     # MLP
-    clf = MultilayerPerceptron(n_hidden=12,
+    clf = MultilayerPerceptron(n_hidden=16,
         n_iterations=1000,
         learning_rate=0.01)
 

diff --git a/mlfromscratch/supervised_learning/neuroevolution.py b/mlfromscratch/supervised_learning/neuroevolution.py
@@ -25,7 +25,7 @@ def _build_model(self, id):
         model.id = id
         model.fitness = 0
         model.accuracy = 0
-
+        
         return model
 
     def _initialize_population(self):
@@ -40,10 +40,11 @@ def _mutate(self, individual, var=1):
         for layer in individual.layers:
             if hasattr(layer, 'W'):
                 # Mutation of weight with probability self.mutation_rate
-                mutation_mask = np.random.binomial(1, self.mutation_rate, size=layer.W.shape)
-                layer.W += np.random.normal(0, var, size=layer.W.shape) * mutation_mask
-                mutation_mask = np.random.binomial(1, self.mutation_rate, size=layer.w0.shape)
-                layer.w0 += np.random.normal(0, var, size=layer.w0.shape) * mutation_mask
+                mutation_mask = np.random.binomial(1, p=self.mutation_rate, size=layer.W.shape)
+                layer.W += np.random.normal(loc=0, scale=var, size=layer.W.shape) * mutation_mask
+                mutation_mask = np.random.binomial(1, p=self.mutation_rate, size=layer.w0.shape)
+                layer.w0 += np.random.normal(loc=0, scale=var, size=layer.w0.shape) * mutation_mask
+
         return individual
 
     def _inherit_weights(self, child, parent):
@@ -71,7 +72,7 @@ def _crossover(self, parent1, parent2):
                 child1.layers[i].w0[:, cutoff:] = parent2.layers[i].w0[:, cutoff:].copy()
                 child2.layers[i].W[:, cutoff:] = parent1.layers[i].W[:, cutoff:].copy()
                 child2.layers[i].w0[:, cutoff:] = parent1.layers[i].w0[:, cutoff:].copy()
-
+        
         return child1, child2
 
     def _calculate_fitness(self):
@@ -109,10 +110,10 @@ def evolve(self, X, y, n_generations):
             next_population = [self.population[i] for i in range(n_winners)]
 
             total_fitness = np.sum([model.fitness for model in self.population])
-            # Parents are selected with probabilities proportionate to their 
-            # fitness (without replacement to preserve diversity)
+            # The probability that a individual will be selected as a parent is proportionate to its fitness
             parent_probabilities = [model.fitness / total_fitness for model in self.population]
-            parents = np.random.choice(a=self.population, size=n_parents, p=parent_probabilities, replace=False)
+            # Select parents according to probabilities (without replacement to preserve diversity)
+            parents = np.random.choice(self.population, size=n_parents, p=parent_probabilities, replace=False)
             for i in np.arange(0, len(parents), 2):
                 # Perform crossover to produce offspring
                 child1, child2 = self._crossover(parents[i], parents[i+1])

diff --git a/mlfromscratch/supervised_learning/particle_swarm_optimization.py b/mlfromscratch/supervised_learning/particle_swarm_optimization.py
@@ -0,0 +1,122 @@
+from __future__ import print_function, division
+import numpy as np
+import copy
+
+class ParticleSwarmOptimizedNN():
+    """ Particle Swarm Optimization of Neural Network.
+
+    Parameters:
+    -----------
+    n_individuals: int
+        The number of neural networks that are allowed in the population at a time.
+    inertia_weight:     float [0,1)
+    cognitive_weight:   float [0,1)
+    social_weight:      float [0,1)
+    model_builder: method
+        A method which returns a user specified NeuralNetwork instance.
+
+    Reference:
+        Neural Network Training Using Particle Swarm Optimization
+        https://visualstudiomagazine.com/articles/2013/12/01/neural-network-training-using-particle-swarm-optimization.aspx 
+    """
+    def __init__(self, population_size, inertia_weight, cognitive_weight, social_weight, model_builder):
+        self.population_size = population_size
+        self.model_builder = model_builder
+        self.best_individual = None
+        # Parameters used to update velocity
+        self.cognitive_w = cognitive_weight
+        self.inertia_w = inertia_weight
+        self.social_w = social_weight
+
+    def _build_model(self, id):
+        """ Returns a new individual """
+        model = self.model_builder(n_inputs=self.X.shape[1], n_outputs=self.y.shape[1])
+        model.id = id
+        model.fitness = 0
+        model.highest_fitness = 0
+        model.accuracy = 0
+
+        # Set initial velocity
+        model.velocity = []
+        for layer in model.layers:
+            if hasattr(layer, 'W'):
+                velocity = {"W": np.zeros_like(layer.W), "w0": np.zeros_like(layer.w0)}
+            else:
+                velocity = {"W": 0, "w0": 0}
+            model.velocity.append(velocity)
+
+        # Set intial best as the current initialization
+        model.best_layers = copy.copy(model.layers)
+
+        return model
+
+    def _initialize_population(self):
+        """ Initialization of the neural networks forming the population"""
+        self.population = []
+        for i in range(self.population_size):
+            model = self._build_model(id=i)
+            self.population.append(model)
+
+    def _update_weights(self, individual):
+        """ Calculate the new velocity and update weights for each layer """
+        # Two random parameters used to update the velocity
+        r1 = np.random.uniform()
+        r2 = np.random.uniform()
+        for i, layer in enumerate(individual.layers):
+            if hasattr(layer, 'W'):
+
+                # Layer weights velocity
+                first_term_W = self.inertia_w * individual.velocity[i]["W"]
+                second_term_W = self.cognitive_w * r1 * (individual.best_layers[i].W - layer.W)
+                third_term_W = self.social_w * r2 * (self.best_individual.layers[i].W - layer.W)
+                individual.velocity[i]["W"] = first_term_W + second_term_W + third_term_W
+
+                # Bias weight velocity
+                first_term_w0 = self.inertia_w * individual.velocity[i]["w0"]
+                second_term_w0 = self.cognitive_w * r1 * (individual.best_layers[i].w0 - layer.w0)
+                third_term_w0 = self.social_w * r2 * (self.best_individual.layers[i].w0 - layer.w0)
+                individual.velocity[i]["w0"] = first_term_w0 + second_term_w0 + third_term_w0
+
+                # Update layer weights with velocity
+                individual.layers[i].W += individual.velocity[i]["W"]
+                individual.layers[i].w0 += individual.velocity[i]["w0"]
+
+    def _calculate_fitness(self, individual):
+        """ Evaluate the individual on the test set to get fitness scores """
+        loss, acc = individual.test_on_batch(self.X, self.y)
+        individual.fitness = 1 / (loss + 1e-8)
+        individual.accuracy = acc
+
+    def evolve(self, X, y, n_generations):
+        """ Will evolve the population for n_generations based on dataset X and labels y"""
+        self.X, self.y = X, y
+
+        self._initialize_population()
+
+        # The best individual of the population is initialized to the first individual
+        self.best_individual = copy.copy(self.population[0])
+
+        for epoch in range(n_generations):
+            for individual in self.population:
+                # Update the NN weights by calculating new velocity
+                self._update_weights(individual)
+                # Calculate the fitness of the updated individual
+                self._calculate_fitness(individual)
+
+                # If the current fitness is higher than the previous highest
+                # => update the individual's best layer setup
+                if individual.fitness > individual.highest_fitness:
+                    individual.best_layers = copy.copy(individual.layers)
+                    individual.highest_fitness = individual.fitness
+                # If the individual's fitness is higher than the highest recorded fitness for the
+                # whole population => update the best individual
+                if individual.fitness > self.best_individual.fitness:
+                    self.best_individual = copy.copy(individual)
+
+            print ("[%d Best Individual - ID: %d Fitness: %.5f, Accuracy: %.1f%%]" % (epoch,
+                                                                            self.best_individual.id,
+                                                                            self.best_individual.fitness,
+                                                                            100*float(self.best_individual.accuracy)))
+
+        return self.best_individual
+