forked from armorleon/ProgrammerGuidToDataMining
-
Notifications
You must be signed in to change notification settings - Fork 0
/
testMedianAndASD.py
101 lines (78 loc) · 2.81 KB
/
testMedianAndASD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#
# Template -- please add code for the two functions
# getMedian
# getAbsoluteStandardDeviation
#
# also download the file athletesTrainingSet.txt, which you should
# put in the same folder as this file.
class Classifier:
def __init__(self, filename):
self.medianAndDeviation = []
# reading the data in from the file
f = open(filename)
lines = f.readlines()
f.close()
self.format = lines[0].strip().split('\t')
self.data = []
for line in lines[1:]:
fields = line.strip().split('\t')
ignore = []
vector = []
for i in range(len(fields)):
if self.format[i] == 'num':
vector.append(int(fields[i]))
elif self.format[i] == 'comment':
ignore.append(fields[i])
elif self.format[i] == 'class':
classification = fields[i]
self.data.append((classification, vector, ignore))
self.rawData = list(self.data)
##################################################
###
### FINISH THE FOLLOWING TWO METHODS
def getMedian(self, alist):
"""return median of alist"""
size = len(alist)
alist.sort()
"""TO BE DONE"""
if size == 0:
return 0
elif size%2 == 1:
return alist[size/2]
else:
return (alist[size/2] + alist[size/2 - 1])/2.0
def getAbsoluteStandardDeviation(self, alist, median):
"""given alist and median return absolute standard deviation"""
"""TO BE DONE"""
size = len(alist)
sum = 0
for f in alist:
sum += abs(f - median)
return sum/float(size)
###
###
##################################################
def unitTest():
list1 = [54, 72, 78, 49, 65, 63, 75, 67, 54]
list2 = [54, 72, 78, 49, 65, 63, 75, 67, 54, 68]
list3 = [69]
list4 = [69, 72]
classifier = Classifier('athletesTrainingSet.txt')
m1 = classifier.getMedian(list1)
m2 = classifier.getMedian(list2)
m3 = classifier.getMedian(list3)
m4 = classifier.getMedian(list4)
asd1 = classifier.getAbsoluteStandardDeviation(list1, m1)
asd2 = classifier.getAbsoluteStandardDeviation(list2, m2)
asd3 = classifier.getAbsoluteStandardDeviation(list3, m3)
asd4 = classifier.getAbsoluteStandardDeviation(list4, m4)
assert(round(m1, 3) == 65)
assert(round(m2, 3) == 66)
assert(round(m3, 3) == 69)
assert(round(m4, 3) == 70.5)
assert(round(asd1, 3) == 8)
assert(round(asd2, 3) == 7.5)
assert(round(asd3, 3) == 0)
assert(round(asd4, 3) == 1.5)
print("getMedian and getAbsoluteStandardDeviation work correctly")
unitTest()