forked from lazyprogrammer/machine_learning_examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprobit.py
128 lines (104 loc) · 3.62 KB
/
probit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# probit regression
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm
# from scipy.stats import t
from sortedcontainers import SortedList
class ProbitRegression:
def fit(self, X, Y, sigma=1.5, lam=1, show_w=set(), Q=None):
# setup
N, D = X.shape
self.w = np.random.randn(D) / np.sqrt(D) # does not work if you don't scale first!
Eq = np.zeros(N)
idx1 = (Y == 1)
idx0 = (Y == 0)
A = lam*np.eye(D) + X.T.dot(X) / sigma**2
costs = []
for t in xrange(100):
# calculate ln(p(Y, w | X))
cdf = norm.cdf(X.dot(self.w) / sigma)
cost = -lam/2 * self.w.dot(self.w) + Y.dot(np.log(cdf)) + (1 - Y).dot(np.log(1 - cdf))
costs.append(cost)
# E step
Xw = X.dot(self.w)
pdf = norm.pdf(-Xw / sigma)
cdf = norm.cdf(-Xw / sigma)
Eq[idx1] = Xw[idx1] + sigma*(pdf[idx1] / (1 - cdf[idx1]))
Eq[idx0] = Xw[idx0] + sigma*(-pdf[idx0] / cdf[idx0])
# M step
b = X.T.dot(Eq) / sigma**2
self.w = np.linalg.solve(A, b)
if show_w and t in show_w:
plot_image(self.w, Q, "iteration: %s" % (t+1))
plt.plot(costs)
plt.show()
self.sigma = sigma
self.lam = lam
def predict_proba(self, X):
N, D = X.shape
return norm.cdf(X.dot(self.w) / self.sigma)
def predict(self, X):
return np.round(self.predict_proba(X))
def score(self, X, Y):
return np.mean(self.predict(X) == Y)
def confusion_matrix(self, X, Y):
P = self.predict(X)
M = np.zeros((2, 2))
M[0,0] = np.sum(P[Y == 0] == Y[Y == 0])
M[0,1] = np.sum(P[Y == 0] != Y[Y == 0])
M[1,0] = np.sum(P[Y == 1] != Y[Y == 1])
M[1,1] = np.sum(P[Y == 1] == Y[Y == 1])
return M
def get_3_misclassified(self, X, Y):
P = self.predict(X)
N = len(Y)
samples = np.random.choice(N, 3, replace=False, p=(P != Y)/float(np.sum(P != Y)))
return X[samples], Y[samples], P[samples]
def get_3_most_ambiguous(self, X, Y):
P = self.predict_proba(X)
N = len(X)
sl = SortedList(load=3) # stores (distance, sample index) tuples
for n in xrange(N):
p = P[n]
dist = np.abs(p - 0.5)
if len(sl) < 3:
sl.add( (dist, n) )
else:
if dist < sl[-1][0]:
del sl[-1]
sl.add( (dist, n) )
indexes = [v for k, v in sl]
return X[indexes], Y[indexes]
def plot_image(x, Q, title):
im = Q.dot(x)
plt.imshow(im.reshape(28,28), cmap='gray')
plt.title(title)
plt.show()
if __name__ == '__main__':
Xtrain = pd.read_csv('Xtrain.csv', header=None).as_matrix()
Xtest = pd.read_csv('Xtest.csv', header=None).as_matrix()
Ytrain = pd.read_csv('ytrain.csv', header=None).as_matrix().flatten()
# print "Ytrain.shape:", Ytrain.shape
Ytest = pd.read_csv('ytest.csv', header=None).as_matrix().flatten()
model = ProbitRegression()
model.fit(Xtrain, Ytrain)
print "train accuracy:", model.score(Xtrain, Ytrain)
print "test accuracy:", model.score(Xtest, Ytest)
# confusion matrix
M = model.confusion_matrix(Xtest, Ytest)
print "confusion matrix:"
print M
print "N:", len(Ytest)
print "sum(M):", M.sum()
# plot 3 misclassified
Q = pd.read_csv('Q.csv', header=None).as_matrix()
misclassified, targets, predictions = model.get_3_misclassified(Xtrain, Ytrain)
for x, y, p in zip(misclassified, targets, predictions):
plot_image(x, Q, 'misclassified target=%s prediction=%s' % (y, int(p)))
# ambiguous
ambiguous, targets = model.get_3_most_ambiguous(Xtrain, Ytrain)
for x, y in zip(ambiguous, targets):
plot_image(x, Q, 'ambiguous target=%s' % y)
# show w
model.fit(Xtrain, Ytrain, show_w=set([0, 4, 9, 24, 49, 99]), Q=Q)