|
| 1 | +#!/usr/bin/python |
| 2 | +import numpy as np |
| 3 | +import scipy.optimize as opt |
| 4 | +from scipy.io import loadmat |
| 5 | +import matplotlib.pyplot as plot |
| 6 | + |
| 7 | +def sigmoid(X): |
| 8 | + return 1./(1.+np.exp(-X)) |
| 9 | + |
| 10 | +def logistic_cost(theta, X, y): |
| 11 | + m = y.shape[0] |
| 12 | + if len(theta.shape) < 2: |
| 13 | + theta = theta[:,np.newaxis] |
| 14 | + h = sigmoid(np.dot(X,theta)) |
| 15 | + #THE LINES BELOW DO NOT WORK VERY WELL |
| 16 | + #POSSIBLE NUMERICAL ISSUES? |
| 17 | + #Direct versions with distributed minus |
| 18 | + #return np.ravel(1./m*np.sum(-y*np.log(h) - (1.-y)*np.log(1.-h))) |
| 19 | + #return np.ravel(1./m*(np.dot(-y.T,np.log(h)) - np.dot((1.-y.T),np.log(1.-h)))) |
| 20 | + |
| 21 | + #PARAMETERIZATIONS THAT WORK |
| 22 | + #return np.ravel(-1./m*(np.dot(y.T,np.log(h)) + np.dot((1.-y).T,np.log(1.-h)))) |
| 23 | + #Chose this one since it seems the most direct |
| 24 | + return np.ravel(-1./m*np.sum(y*np.log(h) + (1.-y)*np.log(1.-h))) |
| 25 | + |
| 26 | +def logistic_cost_grad(theta, X, y): |
| 27 | + m = y.shape[0] |
| 28 | + if len(theta.shape) < 2: |
| 29 | + theta = theta[:,np.newaxis] |
| 30 | + h = sigmoid(np.dot(X,theta)) |
| 31 | + return np.ravel(1./m*np.dot(X.T,h-y)) |
| 32 | + |
| 33 | +def logistic_cost_reg(theta, X, y, l=.1): |
| 34 | + m = y.shape[0] |
| 35 | + if len(theta.shape) < 2: |
| 36 | + theta = theta[:,np.newaxis] |
| 37 | + reg = l/(2.*m)*np.sum(theta[1:,:]**2.) |
| 38 | + return np.ravel(logistic_cost(theta,X,y) + reg) |
| 39 | + |
| 40 | +def logistic_cost_reg_grad(theta, X, y, l=.1): |
| 41 | + m = y.shape[0] |
| 42 | + if len(theta.shape) < 2: |
| 43 | + theta = theta[:,np.newaxis] |
| 44 | + reg = float(l)/m*np.sum(np.vstack((np.zeros(1), theta[1:,:]))) |
| 45 | + return np.ravel(logistic_cost_grad(theta,X,y) + reg) |
| 46 | + |
| 47 | +from sklearn import datasets |
| 48 | +digits = datasets.load_digits() |
| 49 | +#These digits are randomly ordered, if given an ORDERED set |
| 50 | +#of training data make sure to randomize before splitting |
| 51 | +#Normalize between 0 and 1 |
| 52 | +X = digits.data/255. |
| 53 | +#Subtract the mean, though it doesn't appear to make a big difference |
| 54 | +X = X - np.mean(X) |
| 55 | +y = digits.target.T[:,np.newaxis] |
| 56 | +#Add bias terms |
| 57 | +X = np.hstack((np.ones((X.shape[0],1)), X)) |
| 58 | +#Expect 10 labels for digits |
| 59 | +num_labels = np.amax(y)-np.amin(y)+1 |
| 60 | +all_theta = np.zeros((X.shape[1],num_labels)) |
| 61 | +for c in range(num_labels): |
| 62 | + #Initialize theta to zeros and pass into optimization routine |
| 63 | + theta0 = np.zeros((X.shape[1],)) |
| 64 | + #fmin_cg requires the cost and cost_grad functions to return flattened 1D arrays! |
| 65 | + #theta = opt.fmin_cg(logistic_cost, theta0, fprime=logistic_cost_grad, args=(X, (y == c)), maxiter=50) |
| 66 | + theta = opt.fmin_cg(logistic_cost_reg, theta0, fprime=logistic_cost_reg_grad, args=(X, (y == c)), maxiter=50) |
| 67 | + all_theta[:,c] = theta |
| 68 | + |
| 69 | +#We can use the builtin check_grad function to perform numerical gradient |
| 70 | +#checking, ensuring the gradient code is correct for the cost |
| 71 | +#print opt.check_grad(logistic_cost, logistic_cost_grad, theta, X, y) |
| 72 | +h = sigmoid(np.dot(X, all_theta)) |
| 73 | +pred = np.argmax(h,axis=1)[:,np.newaxis] |
| 74 | +print "Classification accuracy(%):" + `100*np.sum((y == pred))/float(len(y))` |
| 75 | + |
0 commit comments