-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpegasosSVM.py
149 lines (116 loc) · 3.49 KB
/
pegasosSVM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env python
# coding: utf-8
import pandas as pd, seaborn as sn, numpy as np, math, random, matplotlib.pyplot as plt, utils.mnist_reader as mnist_reader
from sklearn.datasets import make_classification
from scipy.spatial.distance import pdist, squareform
from sklearn.metrics.pairwise import rbf_kernel,polynomial_kernel,linear_kernel
from sklearn.metrics import confusion_matrix, accuracy_score,f1_score
from sklearn.svm import LinearSVC
def pegasosSolver(X,Y,lm,n_iter=100):
# the main pegasos solver code
C = len(Y)
# print(X)
W = np.zeros(len(X[0]))
it = 0
while it < n_iter:
#print(it)
eta=1.0/(lm*(it+1))
choice=random.randint(0,C-1)
x = X[choice]
out = np.dot(W.T,x)
y = Y[choice]
if y*out >= 1:
W = (1-eta*lm)*W
else:
W = (1-eta*lm)*W + (eta*y)*x
it = it + 1
return W
def train(X,Y,i,j):
# gives a binary classifier between ith and jth class
nX = list()
nY = list()
x = 0
# print("sssss",len(X[0]))
while x < len(X):
if Y[x] == i:
nX.append(X[x])
nY.append(1)
elif Y[x] == j:
nX.append(X[x])
nY.append(-1)
x = x + 1
# print(nX, nY)
W = pegasosSolver(nX,nY,lm=1,n_iter=1000000)
correct, total = 0.0, 0.0
i = 0
while i < len(nX):
if np.dot(W.T,nX[i])*nY[i] > 0:
correct = correct + 1
total= total + 1
i = i + 1
print("Classifier accuracy",correct/total*100,"\n")
return W
def test(x,Wij):
# testing for multiclass with the pairwise classifiers
# counters=np.array([0 for i in range(numOfClss)])
counters = np.zeros(numOfClss)
i=0
while i < numOfClss:
j=0
while j < i:
w = np.array(Wij[i][j])
if np.dot(w.T,x)>0:
counters[i] = counters[i] + 1
else:
counters[j] = counters[j] + 1
j = j + 1
i = i + 1
return np.argmax(counters)
def compareWithScikit(X_train, y_train, X_test, y_test):
clf = LinearSVC()
clf.fit(X_train,y_train)
outs = clf.predict(X_test)
print("COmparing with SKlearn linear SVC")
print("Accuracy",accuracy_score(y_test,outs))
X_train, y_train = mnist_reader.load_mnist('data/fashion', kind='train')
X_test, y_test = mnist_reader.load_mnist('data/fashion', kind='t10k')
numOfClss = 10
# making copies and adding bias to each term
X = X_train
Y = y_train
copyX = list()
testX = list()
i = 0
while i < len(Y):
copyX.append(np.append(X[i],1))
i = i + 1
i = 0
while i <len(y_test):
testX.append(np.append(X_test[i],1))
i = i + 1
X = np.array(copyX)
# pairwise classifiers
Wij=np.zeros((numOfClss,numOfClss,len(X[0]))).tolist()
# print(np.shape(Wij))
for i in range(10):
for j in range(i):
print("Training binary classifier between the classes",i,"and",j)
Wij[i][j]=train(X,Y,i,j)
# testing
out = list()
i = 0
while i < len(testX):
class_label=test(testX[i],Wij)
out.append(class_label)
i = i + 1
print("Accuracy",accuracy_score(y_test,out))
print("Confusion matrix:")
conf = confusion_matrix(y_test,out)
print(conf)
df_cm = pd.DataFrame(conf, range(numOfClss),range(numOfClss))
plt.figure(figsize = (numOfClss,7))
sn.set(font_scale=1.4)#for label size
sn.heatmap(df_cm, annot=True,annot_kws={"size": 16})
plt.show()
# comparing the performance with scikit-learn lib
compareWithScikit(X_train, y_train, X_test, y_test)