add assignment3

super468 · Mar 6, 2018 · 703f09f · 703f09f
1 parent 56f860d
commit 703f09f
Show file tree

Hide file tree

Showing 3 changed files with 291 additions and 0 deletions.
diff --git a/assignment3/NeuralNet.py b/assignment3/NeuralNet.py
@@ -0,0 +1,283 @@
+#####################################################################################################################
+#   CS 6375.003 - Assignment 3, Neural Network Programming
+#   This is a starter code in Python 3.6 for a 2-hidden-layer neural network.
+#   You need to have numpy and pandas installed before running this code.
+#   Below are the meaning of symbols:
+#   train - training dataset - can be a link to a URL or a local file
+#         - you can assume the last column will the label column
+#   train - test dataset - can be a link to a URL or a local file
+#         - you can assume the last column will the label column
+#   h1 - number of neurons in the first hidden layer
+#   h2 - number of neurons in the second hidden layer
+#   X - vector of features for each instance
+#   y - output for each instance
+#   w01, delta01, X01 - weights, updates and outputs for connection from layer 0 (input) to layer 1 (first hidden)
+#   w12, delata12, X12 - weights, updates and outputs for connection from layer 1 (first hidden) to layer 2 (second hidden)
+#   w23, delta23, X23 - weights, updates and outputs for connection from layer 2 (second hidden) to layer 3 (output layer)
+#
+#   You need to complete all TODO marked sections
+#   You are free to modify this code in any way you want, but need to mention it in the README file.
+#
+#####################################################################################################################
+
+
+import numpy as np
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.metrics import accuracy_score
+
+class NeuralNet:
+
+    def __init__(self, train, header = True, h1 = 4, h2 = 2, activation = 'sigmoid'):
+        np.random.seed(1)
+        # train refers to the training dataset
+        # test refers to the testing dataset
+        # h1 and h2 represent the number of nodes in 1st and 2nd hidden layers
+        self.activation = activation
+        if header == False:
+            raw_input = pd.read_csv(train, na_values=['?', ' ?'], header=None)
+        else:
+            raw_input = pd.read_csv(train, na_values=['?', ' ?'])
+        # TODO: Remember to implement the preprocess method
+        dataset = self.preprocess(raw_input)
+        ncols = len(dataset.columns)
+        nrows = len(dataset.index)
+        X = dataset.iloc[:, 0:(ncols - 1)].values.reshape(nrows, ncols - 1)
+        y = dataset.iloc[:, (ncols - 1)].values.reshape(nrows, 1)
+        enc = OneHotEncoder()
+        y = enc.fit_transform(y).toarray()
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
+        self.X = X_train
+        self.y = y_train
+        self.X_t = X_test
+        self.y_t = y_test
+        #
+        # Find number of input and output layers from the dataset
+        #
+        input_layer_size = len(self.X[0])
+        if not isinstance(self.y[0], np.ndarray):
+            output_layer_size = 1
+        else:
+            output_layer_size = len(self.y[0])
+
+        # assign random weights to matrices in network
+        # number of weights connecting layers = (no. of nodes in previous layer) x (no. of nodes in following layer)
+        self.w01 = 2 * np.random.random((input_layer_size, h1)) - 1
+        self.X01 = self.X
+        self.delta01 = np.zeros((input_layer_size, h1))
+        self.w12 = 2 * np.random.random((h1, h2)) - 1
+        self.X12 = np.zeros((len(self.X), h1))
+        self.delta12 = np.zeros((h1, h2))
+        self.w23 = 2 * np.random.random((h2, output_layer_size)) - 1
+        self.X23 = np.zeros((len(self.X), h2))
+        self.delta23 = np.zeros((h2, output_layer_size))
+        self.deltaOut = np.zeros((output_layer_size, 1))
+    #
+    # TODO: I have coded the sigmoid activation function, you need to do the same for tanh and ReLu
+    #
+
+    def __activation(self, x, activation="sigmoid"):
+        if activation == "sigmoid":
+            self.__sigmoid(self, x)
+        if activation == "tanh":
+            self.__tanh(self, x)
+        if activation == "ReLu":
+            self.__ReLu(self, x)
+
+    #
+    # TODO: Define the function for tanh, ReLu and their derivatives
+    #
+
+    def __activation_derivative(self, x, activation="sigmoid"):
+        if activation == "sigmoid":
+            self.__sigmoid_derivative(self, x)
+
+        if activation == 'tanh':
+            self.__tanh_derivative(self, x)
+
+        if activation == 'ReLu':
+            self.__ReLu_derivative(self, x)
+
+    def __sigmoid(self, x):
+        return 1 / (1 + np.exp(-x))
+
+    # derivative of sigmoid function, indicates confidence about existing weight
+
+    def __sigmoid_derivative(self, x):
+        return x * (1. - x)
+
+    def __tanh(self, x):
+        return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))
+
+    def __tanh_derivative(self, x):
+        return 1. - x * x
+
+    def __ReLu(self, x):
+        return x * (x > 0)
+
+    def __ReLu_derivative(self, x):
+        return 1. * (x > 0)
+
+    #
+    # TODO: Write code for pre-processing the dataset, which would include standardization, normalization,
+    #   categorical to numerical, etc
+    #
+
+    def preprocess(self, X):
+
+        # label encode
+        X[X.select_dtypes(['object']).columns] = X.select_dtypes(['object']).apply(lambda x: x.astype('category'))
+        X[X.select_dtypes(['category']).columns] = X.select_dtypes(['category']).apply(lambda x: x.cat.codes)
+        # fill the missing value
+        X = X.fillna(X.mean())
+        print(X)
+        # Standardize data
+        scaler = StandardScaler()
+        X.iloc[:, :-1] = scaler.fit_transform(X.iloc[:, :-1])
+        print(X)
+        return X
+
+    # Below is the training function
+
+    def train(self, max_iterations = 1000, learning_rate = 0.05):
+        for iteration in range(max_iterations):
+            out = self.forward_pass()
+            error = 0.5 * np.power((out - self.y), 2)
+            self.backward_pass(out, self.activation)
+            update_layer2 = learning_rate * self.X23.T.dot(self.deltaOut)
+            update_layer1 = learning_rate * self.X12.T.dot(self.delta23)
+            update_input = learning_rate * self.X01.T.dot(self.delta12)
+
+            self.w23 += update_layer2
+            self.w12 += update_layer1
+            self.w01 += update_input
+
+        print("After " + str(max_iterations) + " iterations, the total error is " + str(np.sum(error)))
+        print("The final weight vectors are (starting from input to output layers)")
+        print(self.w01)
+        print(self.w12)
+        print(self.w23)
+
+    def forward_pass(self):
+        # pass our inputs through our neural network
+        if self.activation == 'sigmoid':
+            in1 = np.dot(self.X, self.w01)
+            self.X12 = self.__sigmoid(in1)
+            in2 = np.dot(self.X12, self.w12)
+            self.X23 = self.__sigmoid(in2)
+            in3 = np.dot(self.X23, self.w23)
+            out = self.__sigmoid(in3)
+        if self.activation == 'tanh':
+            in1 = np.dot(self.X, self.w01)
+            self.X12 = self.__tanh(in1)
+            in2 = np.dot(self.X12, self.w12)
+            self.X23 = self.__tanh(in2)
+            in3 = np.dot(self.X23, self.w23)
+            out = self.__tanh(in3)
+        if self.activation == 'ReLu':
+            in1 = np.dot(self.X, self.w01)
+            self.X12 = self.__ReLu(in1)
+            in2 = np.dot(self.X12, self.w12)
+            self.X23 = self.__ReLu(in2)
+            in3 = np.dot(self.X23, self.w23)
+            out = self.__ReLu(in3)
+        return out
+
+
+
+    def backward_pass(self, out, activation):
+        # pass our inputs through our neural network
+        self.compute_output_delta(out, activation)
+        self.compute_hidden_layer2_delta(activation)
+        self.compute_hidden_layer1_delta(activation)
+
+    # TODO: Implement other activation functions
+
+    def compute_output_delta(self, out, activation="sigmoid"):
+        if activation == "sigmoid":
+            delta_output = (self.y - out) * (self.__sigmoid_derivative(out))
+        if activation == "tanh":
+            delta_output = (self.y - out) * (self.__tanh_derivative(out))
+        if activation == "ReLu":
+            delta_output = (self.y - out) * (self.__ReLu_derivative(out))
+
+        self.deltaOut = delta_output
+
+    # TODO: Implement other activation functions
+
+    def compute_hidden_layer2_delta(self, activation="sigmoid"):
+        if activation == "sigmoid":
+            delta_hidden_layer2 = (self.deltaOut.dot(self.w23.T)) * (self.__sigmoid_derivative(self.X23))
+        if activation == "tanh":
+            delta_hidden_layer2 = (self.deltaOut.dot(self.w23.T)) * (self.__tanh_derivative(self.X23))
+        if activation == "ReLu":
+            delta_hidden_layer2 = (self.deltaOut.dot(self.w23.T)) * (self.__ReLu_derivative(self.X23))
+
+        self.delta23 = delta_hidden_layer2
+
+    # TODO: Implement other activation functions
+
+    def compute_hidden_layer1_delta(self, activation="sigmoid"):
+        if activation == "sigmoid":
+            delta_hidden_layer1 = (self.delta23.dot(self.w12.T)) * (self.__sigmoid_derivative(self.X12))
+        if activation == "tanh":
+            delta_hidden_layer1 = (self.delta23.dot(self.w12.T)) * (self.__tanh_derivative(self.X12))
+        if activation == "ReLu":
+            delta_hidden_layer1 = (self.delta23.dot(self.w12.T)) * (self.__ReLu_derivative(self.X12))
+
+        self.delta12 = delta_hidden_layer1
+
+    # TODO: Implement other activation functions
+
+    def compute_input_layer_delta(self, activation="sigmoid"):
+        if activation == "sigmoid":
+            delta_input_layer = np.multiply(self.__sigmoid_derivative(self.X01), self.delta01.dot(self.w01.T))
+        if activation == "tanh":
+            delta_input_layer = np.multiply(self.__tanh_derivative(self.X01), self.delta01.dot(self.w01.T))
+        if activation == "ReLu":
+            delta_input_layer = np.multiply(self.__ReLu_derivative(self.X01), self.delta01.dot(self.w01.T))
+
+        self.delta01 = delta_input_layer
+
+    # TODO: Implement the predict function for applying the trained model on the  test dataset.
+    # You can assume that the test dataset has the same format as the training dataset
+    # You have to output the test error from this function
+
+    def predict(self, test=None, header = True):
+        if test == None:
+            self.X = self.X_t
+            self.y = self.y_t
+        else:
+            if header == False:
+                raw_input = pd.read_csv(test, na_values='?', header=None)
+            else:
+                raw_input = pd.read_csv(test, na_values='?')
+            test_dataset = self.preprocess(raw_input)
+            ncols = len(test_dataset.columns)
+            nrows = len(test_dataset.index)
+            self.X = test_dataset.iloc[:, 0:(ncols -1)].values.reshape(nrows, ncols-1)
+            self.y = test_dataset.iloc[:, (ncols-1)].values.reshape(nrows, 1)
+            enc = OneHotEncoder()
+            self.y = enc.fit_transform(self.y).toarray()
+        out = self.forward_pass()
+        print("real output")
+        print(out)
+        out = (out == out.max(axis=1, keepdims=1)).astype(int)
+        print("noramlize")
+        print(out)
+        print("target")
+        print(self.y)
+        return accuracy_score(self.y, out)
+
+if __name__ == "__main__":
+    url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data'
+    #url = 'train.csv'
+    neural_network = NeuralNet(url, False, activation='tanh')
+    neural_network.train()
+    testError = neural_network.predict()
+    print("accuracy")
+    print(testError)
+
+
diff --git a/assignment3/test.csv b/assignment3/test.csv
@@ -0,0 +1,3 @@
+X1,X2,X3,y
+0,0,0,1
+1,1,1,0
diff --git a/assignment3/train.csv b/assignment3/train.csv
@@ -0,0 +1,5 @@
+X1,X2,X3,y
+0,0,1,0
+0,1,1,1
+1,0,1,1
+1,1,1,0