From 0ab7629f5463aa076560b7cfbb7027a8e87b6da4 Mon Sep 17 00:00:00 2001
From: eriklindernoren <eriklindernoren@live.se>
Date: Mon, 22 Jan 2018 23:09:15 +0100
Subject: [PATCH] Autoencoder

---
 README.md                                     |  17 ++-
 .../unsupervised_learning/autoencoder.py      | 119 ++++++++++++++++++
 2 files changed, 127 insertions(+), 9 deletions(-)
 create mode 100644 mlfromscratch/unsupervised_learning/autoencoder.py
diff --git a/README.md b/README.md
index 7c181926..301004b7 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 ## About
 Python implementations of some of the fundamental Machine Learning models and algorithms from scratch.
 
-The purpose of this project is not to produce as optimized and computationally efficient algorithms as possible 
+The purpose of this project is not to produce as optimized and computationally efficient algorithms as possible
 but rather to present the inner workings of them in a transparent and accessible way.
 
 ## Table of Contents
@@ -27,12 +27,12 @@ but rather to present the inner workings of them in a transparent and accessible
     + [Reinforcement Learning](#reinforcement-learning)
     + [Deep Learning](#deep-learning)
   * [Contact](#contact)
-    
+
 ## Installation
     $ git clone https://github.com/eriklindernoren/ML-From-Scratch
     $ cd ML-From-Scratch
     $ python setup.py install
-    
+
 ## Example Usage
 ### Polynomial Regression
     $ python mlfromscratch/examples/polynomial_regression.py
@@ -85,7 +85,7 @@ but rather to present the inner workings of them in a transparent and accessible
 
 ### Density-Based Clustering
     $ python mlfromscratch/examples/dbscan.py
-   
+
 <p align="center">
     <img src="http://eriklindernoren.se/images/mlfs_dbscan.png" width="640">
 </p>
@@ -135,7 +135,7 @@ but rather to present the inner workings of them in a transparent and accessible
     +------------------------+------------+--------------+
     Total Parameters: 533762
 
-   
+
 <p align="center">
     <img src="http://eriklindernoren.se/images/gan_mnist5.gif" width="640">
 </p>
@@ -244,7 +244,7 @@ but rather to present the inner workings of them in a transparent and accessible
     [294 Answer: 'Genetic Algorithm']
 
 ### Association Analysis
-    $ python mlfromscratch/examples/apriori.py 
+    $ python mlfromscratch/examples/apriori.py
     +-------------+
     |   Apriori   |
     +-------------+
@@ -292,6 +292,7 @@ but rather to present the inner workings of them in a transparent and accessible
 
 ### Unsupervised Learning
 - [Apriori](mlfromscratch/unsupervised_learning/apriori.py)
+- [Autoencoder](mlfromscratch/unsupervised_learning/autoencoder.py)
 - [DBSCAN](mlfromscratch/unsupervised_learning/dbscan.py)
 - [FP-Growth](mlfromscratch/unsupervised_learning/fp_growth.py)
 - [Gaussian Mixture Model](mlfromscratch/unsupervised_learning/gaussian_mixture_model.py)
@@ -327,7 +328,5 @@ but rather to present the inner workings of them in a transparent and accessible
     * [Recurrent Neural Network](mlfromscratch/examples/recurrent_neural_network.py)
 
 ## Contact
-If there's some implementation you would like to see here or if you're just feeling social, 
+If there's some implementation you would like to see here or if you're just feeling social,
 feel free to [email](mailto:eriklindernoren@gmail.com) me or connect with me on [LinkedIn](https://www.linkedin.com/in/eriklindernoren/).
-
-
diff --git a/mlfromscratch/unsupervised_learning/autoencoder.py b/mlfromscratch/unsupervised_learning/autoencoder.py
new file mode 100644
index 00000000..1765affe
--- /dev/null
+++ b/mlfromscratch/unsupervised_learning/autoencoder.py
@@ -0,0 +1,119 @@
+from __future__ import print_function, division
+from sklearn import datasets
+import math
+import matplotlib.pyplot as plt
+import numpy as np
+import progressbar
+
+from sklearn.datasets import fetch_mldata
+
+from mlfromscratch.deep_learning.optimizers import Adam
+from mlfromscratch.deep_learning.loss_functions import CrossEntropy, SquareLoss
+from mlfromscratch.deep_learning.layers import Dense, Dropout, Flatten, Activation, Reshape, BatchNormalization
+from mlfromscratch.deep_learning import NeuralNetwork
+
+
+class Autoencoder():
+    """A Autoencoder with deep fully-connected neural nets.
+
+    Training Data: MNIST Handwritten Digits (28x28 images)
+    """
+    def __init__(self):
+        self.img_rows = 28
+        self.img_cols = 28
+        self.img_dim = self.img_rows * self.img_cols
+        self.latent_dim = 128 # The dimension of the data embedding
+
+        optimizer = Adam(learning_rate=0.0002, b1=0.5)
+        loss_function = SquareLoss
+
+        # Build the discriminator
+        self.encoder = self.build_encoder(optimizer, loss_function)
+        self.decoder = self.build_decoder(optimizer, loss_function)
+
+        self.autoencoder = NeuralNetwork(optimizer=optimizer, loss=loss_function)
+        self.autoencoder.layers.extend(self.encoder.layers)
+        self.autoencoder.layers.extend(self.decoder.layers)
+
+        print ()
+        self.autoencoder.summary(name="Variational Autoencoder")
+
+    def build_encoder(self, optimizer, loss_function):
+
+        encoder = NeuralNetwork(optimizer=optimizer, loss=loss_function)
+        encoder.add(Dense(512, input_shape=(self.img_dim,)))
+        encoder.add(Activation('leaky_relu'))
+        encoder.add(BatchNormalization(momentum=0.8))
+        encoder.add(Dense(256))
+        encoder.add(Activation('leaky_relu'))
+        encoder.add(BatchNormalization(momentum=0.8))
+        encoder.add(Dense(self.latent_dim))
+
+        return encoder
+
+    def build_decoder(self, optimizer, loss_function):
+
+        decoder = NeuralNetwork(optimizer=optimizer, loss=loss_function)
+        decoder.add(Dense(256, input_shape=(self.latent_dim,)))
+        decoder.add(Activation('leaky_relu'))
+        decoder.add(BatchNormalization(momentum=0.8))
+        decoder.add(Dense(512))
+        decoder.add(Activation('leaky_relu'))
+        decoder.add(BatchNormalization(momentum=0.8))
+        decoder.add(Dense(self.img_dim))
+        decoder.add(Activation('tanh'))
+
+        return decoder
+
+    def train(self, n_epochs, batch_size=128, save_interval=50):
+
+        mnist = fetch_mldata('MNIST original')
+
+        X = mnist.data
+        y = mnist.target
+
+        # Rescale [-1, 1]
+        X = (X.astype(np.float32) - 127.5) / 127.5
+
+        for epoch in range(n_epochs):
+
+            # Select a random half batch of images
+            idx = np.random.randint(0, X.shape[0], batch_size)
+            imgs = X[idx]
+
+            # Train the Autoencoder
+            loss, _ = self.autoencoder.train_on_batch(imgs, imgs)
+
+            # Display the progress
+            print ("%d [D loss: %f]" % (epoch, loss))
+
+            # If at save interval => save generated image samples
+            if epoch % save_interval == 0:
+                self.save_imgs(epoch, X)
+
+    def save_imgs(self, epoch, X):
+        r, c = 5, 5 # Grid size
+        # Select a random half batch of images
+        idx = np.random.randint(0, X.shape[0], r*c)
+        imgs = X[idx]
+        # Generate images and reshape to image shape
+        gen_imgs = self.autoencoder.predict(imgs).reshape((-1, self.img_rows, self.img_cols))
+
+        # Rescale images 0 - 1
+        gen_imgs = 0.5 * gen_imgs + 0.5
+
+        fig, axs = plt.subplots(r, c)
+        plt.suptitle("Autoencoder")
+        cnt = 0
+        for i in range(r):
+            for j in range(c):
+                axs[i,j].imshow(gen_imgs[cnt,:,:], cmap='gray')
+                axs[i,j].axis('off')
+                cnt += 1
+        fig.savefig("ae_%d.png" % epoch)
+        plt.close()
+
+
+if __name__ == '__main__':
+    ae = Autoencoder()
+    ae.train(n_epochs=200000, batch_size=64, save_interval=400)