From 0ab7629f5463aa076560b7cfbb7027a8e87b6da4 Mon Sep 17 00:00:00 2001 From: eriklindernoren Date: Mon, 22 Jan 2018 23:09:15 +0100 Subject: [PATCH] Autoencoder --- README.md | 17 ++- .../unsupervised_learning/autoencoder.py | 119 ++++++++++++++++++ 2 files changed, 127 insertions(+), 9 deletions(-) create mode 100644 mlfromscratch/unsupervised_learning/autoencoder.py diff --git a/README.md b/README.md index 7c181926..301004b7 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ ## About Python implementations of some of the fundamental Machine Learning models and algorithms from scratch. -The purpose of this project is not to produce as optimized and computationally efficient algorithms as possible +The purpose of this project is not to produce as optimized and computationally efficient algorithms as possible but rather to present the inner workings of them in a transparent and accessible way. ## Table of Contents @@ -27,12 +27,12 @@ but rather to present the inner workings of them in a transparent and accessible + [Reinforcement Learning](#reinforcement-learning) + [Deep Learning](#deep-learning) * [Contact](#contact) - + ## Installation $ git clone https://github.com/eriklindernoren/ML-From-Scratch $ cd ML-From-Scratch $ python setup.py install - + ## Example Usage ### Polynomial Regression $ python mlfromscratch/examples/polynomial_regression.py @@ -85,7 +85,7 @@ but rather to present the inner workings of them in a transparent and accessible ### Density-Based Clustering $ python mlfromscratch/examples/dbscan.py - +

@@ -135,7 +135,7 @@ but rather to present the inner workings of them in a transparent and accessible +------------------------+------------+--------------+ Total Parameters: 533762 - +

@@ -244,7 +244,7 @@ but rather to present the inner workings of them in a transparent and accessible [294 Answer: 'Genetic Algorithm'] ### Association Analysis - $ python mlfromscratch/examples/apriori.py + $ python mlfromscratch/examples/apriori.py +-------------+ | Apriori | +-------------+ @@ -292,6 +292,7 @@ but rather to present the inner workings of them in a transparent and accessible ### Unsupervised Learning - [Apriori](mlfromscratch/unsupervised_learning/apriori.py) +- [Autoencoder](mlfromscratch/unsupervised_learning/autoencoder.py) - [DBSCAN](mlfromscratch/unsupervised_learning/dbscan.py) - [FP-Growth](mlfromscratch/unsupervised_learning/fp_growth.py) - [Gaussian Mixture Model](mlfromscratch/unsupervised_learning/gaussian_mixture_model.py) @@ -327,7 +328,5 @@ but rather to present the inner workings of them in a transparent and accessible * [Recurrent Neural Network](mlfromscratch/examples/recurrent_neural_network.py) ## Contact -If there's some implementation you would like to see here or if you're just feeling social, +If there's some implementation you would like to see here or if you're just feeling social, feel free to [email](mailto:eriklindernoren@gmail.com) me or connect with me on [LinkedIn](https://www.linkedin.com/in/eriklindernoren/). - - diff --git a/mlfromscratch/unsupervised_learning/autoencoder.py b/mlfromscratch/unsupervised_learning/autoencoder.py new file mode 100644 index 00000000..1765affe --- /dev/null +++ b/mlfromscratch/unsupervised_learning/autoencoder.py @@ -0,0 +1,119 @@ +from __future__ import print_function, division +from sklearn import datasets +import math +import matplotlib.pyplot as plt +import numpy as np +import progressbar + +from sklearn.datasets import fetch_mldata + +from mlfromscratch.deep_learning.optimizers import Adam +from mlfromscratch.deep_learning.loss_functions import CrossEntropy, SquareLoss +from mlfromscratch.deep_learning.layers import Dense, Dropout, Flatten, Activation, Reshape, BatchNormalization +from mlfromscratch.deep_learning import NeuralNetwork + + +class Autoencoder(): + """A Autoencoder with deep fully-connected neural nets. + + Training Data: MNIST Handwritten Digits (28x28 images) + """ + def __init__(self): + self.img_rows = 28 + self.img_cols = 28 + self.img_dim = self.img_rows * self.img_cols + self.latent_dim = 128 # The dimension of the data embedding + + optimizer = Adam(learning_rate=0.0002, b1=0.5) + loss_function = SquareLoss + + # Build the discriminator + self.encoder = self.build_encoder(optimizer, loss_function) + self.decoder = self.build_decoder(optimizer, loss_function) + + self.autoencoder = NeuralNetwork(optimizer=optimizer, loss=loss_function) + self.autoencoder.layers.extend(self.encoder.layers) + self.autoencoder.layers.extend(self.decoder.layers) + + print () + self.autoencoder.summary(name="Variational Autoencoder") + + def build_encoder(self, optimizer, loss_function): + + encoder = NeuralNetwork(optimizer=optimizer, loss=loss_function) + encoder.add(Dense(512, input_shape=(self.img_dim,))) + encoder.add(Activation('leaky_relu')) + encoder.add(BatchNormalization(momentum=0.8)) + encoder.add(Dense(256)) + encoder.add(Activation('leaky_relu')) + encoder.add(BatchNormalization(momentum=0.8)) + encoder.add(Dense(self.latent_dim)) + + return encoder + + def build_decoder(self, optimizer, loss_function): + + decoder = NeuralNetwork(optimizer=optimizer, loss=loss_function) + decoder.add(Dense(256, input_shape=(self.latent_dim,))) + decoder.add(Activation('leaky_relu')) + decoder.add(BatchNormalization(momentum=0.8)) + decoder.add(Dense(512)) + decoder.add(Activation('leaky_relu')) + decoder.add(BatchNormalization(momentum=0.8)) + decoder.add(Dense(self.img_dim)) + decoder.add(Activation('tanh')) + + return decoder + + def train(self, n_epochs, batch_size=128, save_interval=50): + + mnist = fetch_mldata('MNIST original') + + X = mnist.data + y = mnist.target + + # Rescale [-1, 1] + X = (X.astype(np.float32) - 127.5) / 127.5 + + for epoch in range(n_epochs): + + # Select a random half batch of images + idx = np.random.randint(0, X.shape[0], batch_size) + imgs = X[idx] + + # Train the Autoencoder + loss, _ = self.autoencoder.train_on_batch(imgs, imgs) + + # Display the progress + print ("%d [D loss: %f]" % (epoch, loss)) + + # If at save interval => save generated image samples + if epoch % save_interval == 0: + self.save_imgs(epoch, X) + + def save_imgs(self, epoch, X): + r, c = 5, 5 # Grid size + # Select a random half batch of images + idx = np.random.randint(0, X.shape[0], r*c) + imgs = X[idx] + # Generate images and reshape to image shape + gen_imgs = self.autoencoder.predict(imgs).reshape((-1, self.img_rows, self.img_cols)) + + # Rescale images 0 - 1 + gen_imgs = 0.5 * gen_imgs + 0.5 + + fig, axs = plt.subplots(r, c) + plt.suptitle("Autoencoder") + cnt = 0 + for i in range(r): + for j in range(c): + axs[i,j].imshow(gen_imgs[cnt,:,:], cmap='gray') + axs[i,j].axis('off') + cnt += 1 + fig.savefig("ae_%d.png" % epoch) + plt.close() + + +if __name__ == '__main__': + ae = Autoencoder() + ae.train(n_epochs=200000, batch_size=64, save_interval=400)