-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathAlgorithms.py
86 lines (71 loc) · 2.91 KB
/
Algorithms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import copy
import numpy as np
class LearningAlgorithm:
def Learn(self, features, featureVectors, classes, assignments):
print("Studying hard!")
def Predict(self, featureVectors):
print("You will die young.")
class MultinomialNaiveBayes(LearningAlgorithm):
def Learn(self, features, featureVectors, classes, assignments):
print("You're so naive.")
self._features = copy.deepcopy(features)
n = len(features)
m = len(featureVectors)
c = len(classes)
k = [0]*n
for i in range(0,n,1):
k[i] = len(features[i].discretization)
self.classProbabilities = [0]*c
self.featureProbabilities = [[]]*n
for i in range(0,n,1):
self.featureProbabilities[i] = [[]]*c
for i in range(0,n,1):
for j in range(0,c,1):
self.featureProbabilities[i][j] = [1]*k[i] # Initialize to 1 for Laplace smoothing
for l in range(0,c,1):
for i in range(0,m,1):
if assignments[i] == classes[l]:
self.classProbabilities[l] += 1
self.classProbabilities[l] /= m
numClassExamples = [0]*c
for i in range(0,c,1):
numClassExamples[i] = assignments.count(classes[i])
for j in range(0,n,1):
for b in range(0,k[j],1):
for l in range(0,c,1):
for i in range(0,m,1):
if b < k[j] - 1:
if (featureVectors[i][j] >= features[j].discretization[b] and featureVectors[i][j] < features[j].discretization[b+1] and assignments[i] == classes[l]):
self.featureProbabilities[j][l][b] += 1
else:
if (featureVectors[i][j] >= features[j].discretization[b] and assignments[i] == classes[l]):
self.featureProbabilities[j][l][b] += 1
self.featureProbabilities[j][l][b] = self.featureProbabilities[j][l][b] / (numClassExamples[l] + k[j])
print("All done!")
def Predict(self, featureVector):
n = len(self.featureProbabilities)
c = len(self.classProbabilities)
k = [0]*n
for i in range(0,n,1):
k[i] = len(self.featureProbabilities[i][0])
predictedProbabilities = [1]*c
for l in range(0,c,1):
pOfy = self.classProbabilities[l]
pOfxGiveny = 1
pOfx = 0
for ll in range(0,c,1):
probProd = 1
for j in range(0,n,1):
value = -1
for i in range(0,k[j],1):
if i < k[j] - 1:
if featureVector[j] >= self._features[j].discretization[i] and featureVector[j] < self._features[j].discretization[i+1]:
value = i
else:
if featureVector[j] >= self._features[j].discretization[i]:
value = i
probProd *= self.featureProbabilities[j][ll][value]
if l == ll: pOfxGiveny = probProd
pOfx += self.classProbabilities[ll]*probProd
predictedProbabilities[l] = pOfy*pOfxGiveny/pOfx
return predictedProbabilities