1
+ import csv
2
+ import random
3
+ import math
4
+
5
+ def loadCsv (filename ):
6
+ lines = csv .reader (open (filename ,"" ))
7
+ dataset = list (lines )
8
+ for i in range (len (dataset )):
9
+ dataset [i ]= [float (x ) for x in dataset [i ]]
10
+ return dataset
11
+
12
+ def splitDataset (dataset ,splitRatio ):
13
+ trainSize = into (len (dataset )* splitRatio )
14
+ trainSet = []
15
+ copy = list (dataset )
16
+ whilelen (trainset )< trainSize :
17
+ index = random .randrange (len (copy ))
18
+ trainSet .append (copy .pop (index ))
19
+ return [trainset ,copy ]
20
+
21
+ def separateByClass (dataset ):
22
+ separated = {}
23
+ for i in range (len (dataset )):
24
+ vector = dataset [i ]
25
+ if (vector [- 1 ] not in separated ):
26
+ separated [vector [- 1 ]]= []
27
+ separated [vector [- 1 ]].append (vector )
28
+ return separated
29
+ def mean (numbers ):
30
+ return sum (numbers )/ float (len (numbers ))
31
+
32
+ def stdev (numbers )
33
+ avg = mean (numbers )
34
+ variance = sum ([pow (x - avg ,2 ) for x in numbers ])/ float (len (numbers - 1 ))
35
+
36
+ def summarize (dataset ):
37
+ summaries = [(mean (attribute ), stdev (attribute )) for attribute in zip (* dataset )]
38
+ del summaries [- 1 ]
39
+ return summaries
40
+
41
+ def summarizeByClass (dataset ):
42
+ separated = separateByClass (dataset )
43
+ summaries = {}
44
+ for classValue , instances in separated .iteritems ():
45
+ summaries [classValue ] = summarize (instances )
46
+ return summaries
47
+
48
+ def calculateProbability (x , mean , stdev ):
49
+ exponent = math .exp (- (math .pow (x - mean ,2 )/ (2 * math .pow (stdev ,2 ))))
50
+ return (1 / (math .sqrt (2 * math .pi ) * stdev )) * exponent
51
+
52
+ def calculateClassProbabilities (summaries , inputVector ):
53
+ probabilities = {}
54
+ for classValue , classSummaries in summaries .iteritems ():
55
+ probabilities [classValue ] = 1
56
+ for i in range (len (classSummaries )):
57
+ mean , stdev = classSummaries [i ]
58
+ x = inputVector [i ]
59
+ probabilities [classValue ] *= calculateProbability (x , mean , stdev )
60
+ return probabilities
61
+
62
+ def predict (summaries , inputVector ):
63
+ probabilities = calculateClassProbabilities (summaries , inputVector )
64
+ bestLabel , bestProb = None , - 1
65
+ for classValue , probability in probabilities .iteritems ():
66
+ if bestLabel is None or probability > bestProb :
67
+ bestProb = probability
68
+ bestLabel = classValue
69
+ return bestLabel
70
+
71
+ def getPredictions (summaries , testSet ):
72
+ predictions = []
73
+ for i in range (len (testSet )):
74
+ result = predict (summaries , testSet [i ])
75
+ predictions .append (result )
76
+ return predictions
77
+
78
+ def getAccuracy (testSet , predictions ):
79
+ correct = 0
80
+ for i in range (len (testSet )):
81
+ if testSet [i ][- 1 ] == predictions [i ]:
82
+ correct += 1
83
+ return (correct / float (len (testSet ))) * 100.0
84
+
85
+ def main ():
86
+ filename = ''
87
+ splitRatio = 0.67
88
+ dataset = loadCsv (filename )
89
+ trainingSet , testSet = splitDataset (dataset , splitRatio )
90
+ print ('Split {0} rows into train={1} and test={2} rows' ).format (len (dataset ), len (trainingSet ), len (testSet ))
91
+ # prepare model
92
+ summaries = summarizeByClass (trainingSet )
93
+ # test model
94
+ predictions = getPredictions (summaries , testSet )
95
+ accuracy = getAccuracy (testSet , predictions )
96
+ print ('Accuracy: {0}%' ).format (accuracy )
97
+
98
+ main ()
0 commit comments