Skip to content

Commit ff80f3c

Browse files
committed
Naive Bayes Algo Code
Former-commit-id: 8df1f6cc17134b294e885f2395adaefd8a7c1a98
1 parent bef7635 commit ff80f3c

File tree

1 file changed

+98
-0
lines changed

1 file changed

+98
-0
lines changed

AlgorithmCode/NaiveBayes.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import csv
2+
import random
3+
import math
4+
5+
def loadCsv (filename):
6+
lines = csv.reader(open(filename,""))
7+
dataset = list(lines)
8+
for i in range(len(dataset)):
9+
dataset[i]= [float(x) for x in dataset[i]]
10+
return dataset
11+
12+
def splitDataset (dataset,splitRatio):
13+
trainSize = into(len(dataset)*splitRatio)
14+
trainSet = []
15+
copy = list(dataset)
16+
whilelen(trainset)<trainSize:
17+
index = random.randrange(len(copy))
18+
trainSet.append(copy.pop(index))
19+
return[trainset,copy]
20+
21+
def separateByClass(dataset):
22+
separated = {}
23+
for i in range(len(dataset)):
24+
vector= dataset[i]
25+
if(vector[-1] not in separated):
26+
separated[vector[-1]]= []
27+
separated[vector[-1]].append(vector)
28+
return separated
29+
def mean(numbers):
30+
return sum(numbers)/float(len(numbers))
31+
32+
def stdev(numbers)
33+
avg = mean(numbers)
34+
variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers-1))
35+
36+
def summarize(dataset):
37+
summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]
38+
del summaries[-1]
39+
return summaries
40+
41+
def summarizeByClass(dataset):
42+
separated = separateByClass(dataset)
43+
summaries = {}
44+
for classValue, instances in separated.iteritems():
45+
summaries[classValue] = summarize(instances)
46+
return summaries
47+
48+
def calculateProbability(x, mean, stdev):
49+
exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
50+
return (1 / (math.sqrt(2*math.pi) * stdev)) * exponent
51+
52+
def calculateClassProbabilities(summaries, inputVector):
53+
probabilities = {}
54+
for classValue, classSummaries in summaries.iteritems():
55+
probabilities[classValue] = 1
56+
for i in range(len(classSummaries)):
57+
mean, stdev = classSummaries[i]
58+
x = inputVector[i]
59+
probabilities[classValue] *= calculateProbability(x, mean, stdev)
60+
return probabilities
61+
62+
def predict(summaries, inputVector):
63+
probabilities = calculateClassProbabilities(summaries, inputVector)
64+
bestLabel, bestProb = None, -1
65+
for classValue, probability in probabilities.iteritems():
66+
if bestLabel is None or probability > bestProb:
67+
bestProb = probability
68+
bestLabel = classValue
69+
return bestLabel
70+
71+
def getPredictions(summaries, testSet):
72+
predictions = []
73+
for i in range(len(testSet)):
74+
result = predict(summaries, testSet[i])
75+
predictions.append(result)
76+
return predictions
77+
78+
def getAccuracy(testSet, predictions):
79+
correct = 0
80+
for i in range(len(testSet)):
81+
if testSet[i][-1] == predictions[i]:
82+
correct += 1
83+
return (correct/float(len(testSet))) * 100.0
84+
85+
def main():
86+
filename = ''
87+
splitRatio = 0.67
88+
dataset = loadCsv(filename)
89+
trainingSet, testSet = splitDataset(dataset, splitRatio)
90+
print('Split {0} rows into train={1} and test={2} rows').format(len(dataset), len(trainingSet), len(testSet))
91+
# prepare model
92+
summaries = summarizeByClass(trainingSet)
93+
# test model
94+
predictions = getPredictions(summaries, testSet)
95+
accuracy = getAccuracy(testSet, predictions)
96+
print('Accuracy: {0}%').format(accuracy)
97+
98+
main()

0 commit comments

Comments
 (0)