|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | + |
| 4 | +import numpy as np |
| 5 | +import random |
| 6 | +import matplotlib |
| 7 | +import os, struct |
| 8 | +from array import array as pyarray |
| 9 | +from numpy import append, array, int8, uint8, zeros |
| 10 | +matplotlib.use('TkAgg') |
| 11 | +import matplotlib.pyplot as pyplot |
| 12 | +import gzip |
| 13 | +import shutil |
| 14 | + |
| 15 | +class NeuralNet(object): |
| 16 | + def __init__(self, sizes): |
| 17 | + self.sizes = sizes |
| 18 | + self.num_layers = len(sizes) |
| 19 | + self.w = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])] |
| 20 | + self.b = [np.random.randn(y, 1) for y in sizes[1:]] |
| 21 | + |
| 22 | + def sigmoid(self, z): |
| 23 | + return 1.0 / (1.0 + np.exp(-z)) |
| 24 | + |
| 25 | + def sigmoid_prime(self, z): |
| 26 | + sigmoid = self.sigmoid(z) |
| 27 | + return sigmoid * (1 - sigmoid) |
| 28 | + |
| 29 | + def farward(self, x): |
| 30 | + for b, w in zip(self.b, self.w): |
| 31 | + x = self.sigmoid(np.dot(w, x) + b) |
| 32 | + return x |
| 33 | + |
| 34 | + # epoches: training times, eta: learning rate |
| 35 | + def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None): |
| 36 | + if test_data: |
| 37 | + n_test = len(test_data) |
| 38 | + |
| 39 | + n = len(training_data) |
| 40 | + print(epochs) |
| 41 | + for i in range(epochs): |
| 42 | + random.shuffle(training_data) |
| 43 | + mini_batchs = [training_data[k: k + mini_batch_size] for k in range(0, n, mini_batch_size)] |
| 44 | + for ind, mini_batch in enumerate(mini_batchs): |
| 45 | + print("batch {}".format(ind)) |
| 46 | + self.update_mini_batch(mini_batch, eta) |
| 47 | + if test_data: |
| 48 | + print("Epoch {0}: {1} / {2}".format(i, self.evaluate(test_data), n_test)) |
| 49 | + else: |
| 50 | + print("Epoch {0} complete".format(i)) |
| 51 | + |
| 52 | + def backprop(self, x, y): |
| 53 | + nabla_b = [np.zeros(b.shape) for b in self.b] |
| 54 | + nabla_w = [np.zeros(w.shape) for w in self.w] |
| 55 | + activation = x |
| 56 | + activations = [x] |
| 57 | + zs = [] |
| 58 | + |
| 59 | + for b, w in zip(self.b, self.w): |
| 60 | + z = np.dot(w, activation) + b |
| 61 | + zs.append(z) |
| 62 | + activation = self.sigmoid(z) |
| 63 | + activations.append(activation) |
| 64 | + |
| 65 | + delta = self.cost_derivative(activations[-1], y) * self.sigmoid_prime(zs[-1]) |
| 66 | + nabla_b[-1] = delta |
| 67 | + nabla_w[-1] = np.dot(delta, activations[-2].transpose()) |
| 68 | + |
| 69 | + for l in range(2, self.num_layers): |
| 70 | + z = zs[-l] |
| 71 | + sp = self.sigmoid_prime(z) |
| 72 | + delta = np.dot(self.w[-l+1].transpose(), delta) * sp |
| 73 | + nabla_b[-l] = delta |
| 74 | + nabla_w[-l] = np.dot(delta, activations[-l-1].transpose()) |
| 75 | + return (nabla_b, nabla_w) |
| 76 | + |
| 77 | + def update_mini_batch(self, mini_batch, eta): |
| 78 | + nabla_b = [np.zeros(b.shape) for b in self.b] |
| 79 | + nabla_w = [np.zeros(w.shape) for w in self.w] |
| 80 | + for x, y in mini_batch: |
| 81 | + delta_nabla_b, delta_nabla_w = self.backprop(x, y) |
| 82 | + nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)] |
| 83 | + nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] |
| 84 | + self_w = [w - (eta / len(mini_batch)) * nw for w, nw in zip(self.w, nabla_w)] |
| 85 | + self_b = [b - (eta / len(mini_batch)) * nb for b, nb in zip(self.b, nabla_b)] |
| 86 | + |
| 87 | + def evaluate(self, test_data): |
| 88 | + test_results = [(np.argmax(self.farward(x)), y) for (x, y) in test_data] |
| 89 | + return sum(int(x == y) for (x, y) in test_results) |
| 90 | + |
| 91 | + def cost_derivative(self, output_activations, y): |
| 92 | + return (output_activations-y) |
| 93 | + |
| 94 | + def predict(self, data): |
| 95 | + value = self.farward(data) |
| 96 | + return value.tolist().index(max(value)) |
| 97 | + |
| 98 | + def save(self): |
| 99 | + pass |
| 100 | + |
| 101 | + def load(self): |
| 102 | + pass |
| 103 | + |
| 104 | + |
| 105 | +def load_mnist(dataset="training_data", digits=np.arange(10), path="../mnist"): |
| 106 | + |
| 107 | + if dataset == "training_data": |
| 108 | + fname_image = os.path.join(path, 'train-images-idx3-ubyte.gz') |
| 109 | + fname_label = os.path.join(path, 'train-labels-idx1-ubyte.gz') |
| 110 | + elif dataset == "testing_data": |
| 111 | + fname_image = os.path.join(path, 't10k-images-idx3-ubyte.gz') |
| 112 | + fname_label = os.path.join(path, 't10k-labels-idx1-ubyte.gz') |
| 113 | + else: |
| 114 | + raise ValueError("dataset must be 'training_data' or 'testing_data'") |
| 115 | + |
| 116 | + # flbl = open(fname_label, 'rb') |
| 117 | + flbl = gzip.open(fname_label, 'rb') |
| 118 | + # magic_nr, size = struct.unpack(">II", flbl.read(8)) |
| 119 | + magic_nr, size = struct.unpack(">2I", flbl.read(8)) |
| 120 | + lbl = pyarray("b", flbl.read()) |
| 121 | + # print(magic_nr, size) |
| 122 | + flbl.close() |
| 123 | + |
| 124 | + # fimg = open(fname_image, 'rb') |
| 125 | + fimg = gzip.open(fname_image, 'rb') |
| 126 | + # print(fimg.tell()) # refer to https://docs.python.org/2/tutorial/inputosizeutput.html |
| 127 | + magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) |
| 128 | + # magic_nr, size, rows, cols = struct.unpack(">4I", fimg.read(16)) |
| 129 | + # print(magic_nr, size, rows, cols) |
| 130 | + img = pyarray("B", fimg.read()) |
| 131 | + fimg.close() |
| 132 | + |
| 133 | + ind = [ k for k in range(size) if lbl[k] in digits ] |
| 134 | + N = len(ind) |
| 135 | + # print(N) |
| 136 | + |
| 137 | + images = zeros((N, rows, cols), dtype=uint8) |
| 138 | + labels = zeros((N, 1), dtype=int8) |
| 139 | + for i in range(N): |
| 140 | + images[i] = array(img[ ind[i] * rows * cols : (ind[i] + 1) * rows * cols ]).reshape((rows, cols)) |
| 141 | + labels[i] = lbl[ind[i]] |
| 142 | + |
| 143 | + return images, labels |
| 144 | + |
| 145 | +def load_samples(dataset="training_data"): |
| 146 | + |
| 147 | + image,label = load_mnist(dataset) |
| 148 | + |
| 149 | + X = [np.reshape(x, (28 * 28, 1)) for x in image] |
| 150 | + X = [x / 255.0 for x in X] |
| 151 | + # print(np.array(X).shape) |
| 152 | + |
| 153 | + # 5 -> [0,0,0,0,0,1.0,0,0,0]; 1 -> [0,1.0,0,0,0,0,0,0,0] |
| 154 | + def vectorized_Y(y): |
| 155 | + e = np.zeros((10, 1)) |
| 156 | + e[y] = 1.0 |
| 157 | + return e |
| 158 | + |
| 159 | + if dataset == "training_data": |
| 160 | + Y = [vectorized_Y(y) for y in label] |
| 161 | + pair = list(zip(X, Y)) |
| 162 | + return pair |
| 163 | + elif dataset == 'testing_data': |
| 164 | + pair = list(zip(X, label)) |
| 165 | + # print(pair[:1], len(pair[1])) |
| 166 | + return pair |
| 167 | + else: |
| 168 | + print('Something wrong') |
| 169 | + |
| 170 | + |
| 171 | +if __name__ == "__main__": |
| 172 | + net=NeuralNet([3,4,2]) |
| 173 | + # print('weight: ',net.w) |
| 174 | + # print('biases: ',net.b) |
| 175 | + |
| 176 | + # x = np.linspace(-8.0,8.0, 2000) |
| 177 | + # y = net.sigmoid(x) |
| 178 | + # pyplot.plot(x,y) |
| 179 | + # # pyplot.show() |
| 180 | + |
| 181 | + # array = np.arange(12) |
| 182 | + # random.shuffle(array) |
| 183 | + # array = array.reshape(3, 4) |
| 184 | + # print(array, np.argmax(array)) # max index |
| 185 | + # print("\nIndices of Max element : ", np.argmax(array, axis=0)) |
| 186 | + # print("\nIndices of Max element : ", np.argmax(array, axis=1)) |
| 187 | + |
| 188 | + # data = "" |
| 189 | + # with gzip.open('../mnist/t10k-labels-idx1-ubyte.gz', 'rb') as f_in: |
| 190 | + # data = f_in.read() |
| 191 | + # # print(data) |
| 192 | + # print(np.zeros(3)) |
| 193 | + # for item in array: |
| 194 | + # print(item) |
| 195 | + |
| 196 | + # print(np.arange(10)) |
| 197 | + # print(np.zeros((10, 1))) # 10 rows 1 col |
| 198 | + |
| 199 | + INPUT = 28*28 |
| 200 | + OUTPUT = 10 |
| 201 | + net = NeuralNet([INPUT, 40, OUTPUT]) |
| 202 | + |
| 203 | + train_set = load_samples(dataset='training_data') |
| 204 | + test_set = load_samples(dataset='testing_data') |
| 205 | + |
| 206 | + net.SGD(train_set, 13, 10000, 3.0, test_data=test_set) |
| 207 | + |
| 208 | + correct = 0; |
| 209 | + for test_feature in test_set: |
| 210 | + if net.predict(test_feature[0]) == test_feature[1][0]: |
| 211 | + correct += 1 |
| 212 | + print("accuracy: ", correct/len(test_set)) |
0 commit comments