Added code for conference

Kyle Kastner · Kyle Kastner · commit 578bec23dd52 · 2013-06-25T09:22:34.000-05:00
diff --git a/gradient_linear_regression.py b/gradient_linear_regression.py
@@ -0,0 +1,44 @@
+#!/usr/bin/python
+import numpy as np
+import matplotlib.pyplot as plot
+import math
+from sklearn import datasets
+
+X,y = datasets.make_regression(n_samples=1000,n_features=1,n_informative=1,noise=15,bias=1000)
+#Ensure y array is 2D
+y = y[:,np.newaxis]
+
+#Add bias terms
+X = np.hstack((np.ones((X.shape[0],1)), X))
+
+#Initialize theta to zeros
+theta = np.zeros((X.shape[1],1))
+alpha = 0.01
+iters = 1000
+
+def linear_cost(theta,X,y):
+    m = y.shape[0]
+    return 1./(2.*m)*np.sum((np.dot(X,theta)-y)**2.)
+
+def linear_cost_grad(theta,X,y):
+    m = y.shape[0]
+    return 1./m*np.dot(X.T,(y-np.dot(X,theta)))
+
+def gradient_descent(theta,X,y,alpha,iters):
+    m = y.shape[0]
+    all_cost = []
+    print theta.shape
+    for i in range(iters):
+        all_cost.append(linear_cost(theta,X,y))
+        theta += float(alpha)*linear_cost_grad(theta,X,y)
+    return theta,all_cost
+
+theta,all_cost = gradient_descent(theta,X,y,alpha,iters)
+plot.figure()
+plot.title("Cost vs. number of iterations")
+plot.plot(range(len(all_cost)),all_cost)
+plot.figure()
+plot.title("Linear Regression")
+plot.plot(X[:,1], y, 'rx')
+plot.plot(X[:,1], np.dot(X,theta))
+plot.show()
diff --git a/logistic_regression.py b/logistic_regression.py
@@ -0,0 +1,75 @@
+#!/usr/bin/python
+import numpy as np
+import scipy.optimize as opt
+from scipy.io import loadmat
+import matplotlib.pyplot as plot
+
+def sigmoid(X):
+    return 1./(1.+np.exp(-X))
+
+def logistic_cost(theta, X, y):
+    m = y.shape[0]
+    if len(theta.shape) < 2:
+        theta = theta[:,np.newaxis]
+    h = sigmoid(np.dot(X,theta))
+    #THE LINES BELOW DO NOT WORK VERY WELL
+    #POSSIBLE NUMERICAL ISSUES?
+    #Direct versions with distributed minus
+    #return np.ravel(1./m*np.sum(-y*np.log(h) - (1.-y)*np.log(1.-h)))
+    #return np.ravel(1./m*(np.dot(-y.T,np.log(h)) - np.dot((1.-y.T),np.log(1.-h))))
+
+    #PARAMETERIZATIONS THAT WORK
+    #return np.ravel(-1./m*(np.dot(y.T,np.log(h)) + np.dot((1.-y).T,np.log(1.-h))))
+    #Chose this one since it seems the most direct
+    return np.ravel(-1./m*np.sum(y*np.log(h) + (1.-y)*np.log(1.-h)))
+
+def logistic_cost_grad(theta, X, y):
+    m = y.shape[0]
+    if len(theta.shape) < 2:
+        theta = theta[:,np.newaxis]
+    h = sigmoid(np.dot(X,theta))
+    return np.ravel(1./m*np.dot(X.T,h-y))
+
+def logistic_cost_reg(theta, X, y, l=.1):
+    m = y.shape[0]
+    if len(theta.shape) < 2:
+        theta = theta[:,np.newaxis]
+    reg = l/(2.*m)*np.sum(theta[1:,:]**2.)
+    return np.ravel(logistic_cost(theta,X,y) + reg)
+
+def logistic_cost_reg_grad(theta, X, y, l=.1):
+    m = y.shape[0]
+    if len(theta.shape) < 2:
+        theta = theta[:,np.newaxis]
+    reg = float(l)/m*np.sum(np.vstack((np.zeros(1), theta[1:,:])))
+    return np.ravel(logistic_cost_grad(theta,X,y) + reg)
+
+from sklearn import datasets
+digits = datasets.load_digits()
+#These digits are randomly ordered, if given an ORDERED set
+#of training data make sure to randomize before splitting
+#Normalize between 0 and 1
+X = digits.data/255.
+#Subtract the mean, though it doesn't appear to make a big difference
+X = X - np.mean(X)
+y = digits.target.T[:,np.newaxis]
+#Add bias terms
+X = np.hstack((np.ones((X.shape[0],1)), X))
+#Expect 10 labels for digits
+num_labels = np.amax(y)-np.amin(y)+1
+all_theta = np.zeros((X.shape[1],num_labels))
+for c in range(num_labels):
+    #Initialize theta to zeros and pass into optimization routine
+    theta0 = np.zeros((X.shape[1],))
+    #fmin_cg requires the cost and cost_grad functions to return flattened 1D arrays!
+    #theta = opt.fmin_cg(logistic_cost, theta0, fprime=logistic_cost_grad, args=(X, (y == c)), maxiter=50)
+    theta = opt.fmin_cg(logistic_cost_reg, theta0, fprime=logistic_cost_reg_grad, args=(X, (y == c)), maxiter=50)
+    all_theta[:,c] = theta
+
+#We can use the builtin check_grad function to perform numerical gradient
+#checking, ensuring the gradient code is correct for the cost
+#print opt.check_grad(logistic_cost, logistic_cost_grad, theta, X, y)
+h = sigmoid(np.dot(X, all_theta))
+pred = np.argmax(h,axis=1)[:,np.newaxis]
+print "Classification accuracy(%):" + `100*np.sum((y == pred))/float(len(y))`
+
diff --git a/matrix_inverse_linear_regression.py b/matrix_inverse_linear_regression.py
@@ -0,0 +1,36 @@
+#!/usr/bin/python
+import numpy as np
+import matplotlib.pyplot as plot
+import math
+
+
+N = 10
+noise_var = B = 5
+xs = np.matrix(range(N)).T
+ys = np.square(xs) - 4*xs + 1
+wm = ys + np.sqrt(B)*np.random.randn(N,1)
+
+def gen_dft(m, n, N):
+    return np.exp(1j*-2*m*n/N)
+
+def gen_polynomial(x, m):
+    return np.power(x, m)
+
+N_basis = 3
+#basis_func = np.vectorize(gen_dft)
+#basis = basis_func(xs, np.arange(N_basis), N).T
+basis_func = np.vectorize(gen_polynomial)
+basis = basis_func(xs, np.arange(N_basis)).T
+test_data = t = basis*wm
+#Calculate the Moore-Penrose pseudoinverse using the following formula
+#maximum_likelihood = wml = np.linalg.inv(basis.T*basis)*basis.T*t
+#Direct calculation appears to have numerical instability issues...
+#Luckily the pinv method calculates Moore-Penrose pseudo inverse using SVD, which largely avoids the numerical issues
+maximum_likelihood = wml = np.linalg.pinv(basis)*t
+
+plot.figure()
+plot.title("Regression fit using polynomial basis function, number of basis functions = $" + `N_basis` + "$")
+plot.plot(ys, 'b')
+plot.plot(wm, 'ro')
+plot.plot(np.real(wml), 'g')
+plot.show()
diff --git a/two_unknowns.py b/two_unknowns.py
@@ -0,0 +1,36 @@
+#!/usr/bin/python
+import matplotlib.pyplot as plot
+import numpy as np
+
+total_obs = 1000
+primary_mean = 5.
+primary_var = 4.
+x = np.sqrt(primary_var)*np.random.randn(total_obs) + primary_mean
+f, axarr = plot.subplots(3)
+f.suptitle("Unknown mean ($\mu=$"+`primary_mean`+"), unknown variance ($\sigma^2=$"+`primary_var`+")")
+y0label = "Timeseries"
+y1label = "Estimate for mean"
+y2label = "Estimate for variance"
+axarr[0].set_ylabel(y0label)
+axarr[1].set_ylabel(y1label)
+axarr[2].set_ylabel(y2label)
+axarr[0].plot(x)
+prior_mean = 0.
+prior_var = 1.
+prior_kappa = 1.
+prior_v = 0.
+all_mean_guess = []
+all_var_guess = []
+for i in range(total_obs):
+    posterior_mean = (prior_kappa*prior_mean+x[i])/(prior_kappa + 1)
+    posterior_var = (prior_v*prior_var + prior_kappa/(prior_kappa + 1)*(x[i]-prior_mean)**2)/(prior_v + 1)
+    prior_kappa += 1
+    prior_v += 1
+    all_mean_guess.append(posterior_mean)
+    all_var_guess.append(posterior_var)
+    prior_mean = posterior_mean
+    prior_var = posterior_var
+
+axarr[1].plot(all_mean_guess)
+axarr[2].plot(all_var_guess)
+plot.show()
diff --git a/unknown_mean.py b/unknown_mean.py
@@ -0,0 +1,31 @@
+#!/usr/bin/python
+import numpy as np
+import matplotlib.pyplot as plot
+total_obs = 1000
+primary_mean = 5.
+primary_var = known_var = 4.
+x = np.sqrt(primary_var)*np.random.randn(total_obs) + primary_mean
+f, axarr = plot.subplots(3)
+f.suptitle("Unknown mean ($\mu=$"+`primary_mean`+"), known variance ($\sigma^2=$"+`known_var`+")")
+y0label = "Timeseries"
+y1label = "Estimate for mean"
+y2label = "Doubt in estimate"
+axarr[0].set_ylabel(y0label)
+axarr[1].set_ylabel(y1label)
+axarr[2].set_ylabel(y2label)
+axarr[0].plot(x)
+prior_mean = 0.
+prior_var = 1000000000000.
+all_mean_guess = []
+all_mean_doubt = []
+for i in range(total_obs):
+    posterior_mean_doubt = 1./(1./known_var+1./prior_var)
+    posterior_mean_guess = (prior_mean/prior_var+x[i]/known_var)*posterior_mean_doubt
+    all_mean_guess.append(posterior_mean_guess)
+    all_mean_doubt.append(posterior_mean_doubt)
+    prior_mean=posterior_mean_guess
+    prior_var=posterior_mean_doubt
+
+axarr[1].plot(all_mean_guess)
+axarr[2].plot(all_mean_doubt)
+plot.show()
diff --git a/unknown_variance.py b/unknown_variance.py
@@ -0,0 +1,36 @@
+#!/usr/bin/python
+import numpy as np
+import matplotlib.pyplot as plot
+total_obs = 1000
+primary_mean = known_mean = 5
+primary_var = 4
+x = np.sqrt(primary_var)*np.random.randn(total_obs) + primary_mean
+all_a = []
+all_b = []
+all_prec_guess = []
+all_prec_doubt = []
+prior_a=1/2.+1
+prior_b=1/2.*np.sum((x[0]-primary_mean)**2)
+f,axarr = plot.subplots(3)
+f.suptitle("Known mean ($\mu=$"+`known_mean`+"), unknown variance ($\sigma^2=$"+`primary_var`+"; $\lambda$="+`1./primary_var`+")")
+y0label = "Timeseries"
+y1label = "Estimate for precision"
+y2label = "Doubt in estimate"
+axarr[0].set_ylabel(y0label)
+axarr[1].set_ylabel(y1label)
+axarr[2].set_ylabel(y2label)
+
+axarr[0].plot(x)
+for i in range(1,total_obs):
+    posterior_a=prior_a+1/2.
+    posterior_b=prior_b+1/2.*np.sum((x[i]-known_mean)**2)
+    all_a.append(posterior_a)
+    all_b.append(posterior_b)
+    all_prec_guess.append(posterior_a/posterior_b)
+    all_prec_doubt.append(posterior_a/(posterior_b**2))
+    prior_a=posterior_a
+    prior_b=posterior_b
+
+axarr[1].plot(all_prec_guess)
+axarr[2].plot(all_prec_doubt)
+plot.show()