1
+ import Learn as lr
2
+ import Data as dta
3
+ import Perspective as pst
4
+
5
+ import copy
6
+
7
+ from sklearn .naive_bayes import GaussianNB
8
+ from sklearn import tree
9
+ from sklearn .metrics import accuracy_score
10
+ from sklearn import svm
11
+
12
+
13
+ df = dta .load_data ("/home/sean/Downloads/Research/Projects/MPML Library/botnet_train3.csv" , 50000 )
14
+
15
+ thresh = 5
16
+ # Algo = GaussianNB()
17
+ # Algo = tree.DecisionTreeClassifier()
18
+ Algo = svm .SVC (gamma = 'scale' ,probability = True )
19
+
20
+ # dta.convert_discrete(df,thresh)
21
+
22
+ # perspectiveList = pst.generatePerspectives(df,"class")
23
+
24
+ # lr.MPML(df,GaussianNB(),"class",thresh,perspectiveList)
25
+
26
+ # Write a function that removes one perspective at a time and records the result without each.
27
+
28
+ def analysePerspective (DataFrame ,target ,instNum ):
29
+
30
+ print (df .iloc [instNum ])
31
+
32
+ models = []
33
+ pridictions = []
34
+ y = 0
35
+ y_hat = 0
36
+ d = 0
37
+
38
+ y2 = 0
39
+ y2_hat = 0
40
+ d2 = [0 ,0 ]
41
+
42
+ impactRatings = []
43
+
44
+ perspectiveList = pst .generatePerspectives (DataFrame ,target )
45
+ # print ("\nResult with all Perspectives")
46
+ models = lr .MPML (df ,Algo ,"class" ,thresh ,perspectiveList )
47
+
48
+ new_df = lr .instancePrediction (perspectiveList ,target ,models )
49
+ y = lr .majorityVote (new_df )
50
+ print ("\n --------------------------------------------------------------------------------------" )
51
+
52
+ #this returns the avg confidence for the instance
53
+ print ("Majority vote Acuracy with all perspective = {}" .format (y ))
54
+ print ("Results for instance #{}" .format (instNum ))
55
+ print ("1 is Not | 2 is Bot" )
56
+ print ("\n ======================================================" )
57
+ print (new_df .iloc [instNum ])
58
+
59
+ y2 = new_df .iloc [instNum ][- 1 ]
60
+
61
+
62
+ for x in range (0 ,len (models )):
63
+ x_train , x_test , y_train , y_test = dta .data_setup (perspectiveList [x ],"class" )
64
+ pridictions .append (accuracy_score (y_test ,models [x ].predict (x_test )))
65
+
66
+
67
+ for i in range (0 ,len (models )):
68
+
69
+ print ("\n --------------------------------------------------------------------------------------" )
70
+
71
+ print ("Result without Perspective {}" .format (i ))
72
+
73
+ modles2 = copy .deepcopy (models )
74
+ perspectiveList2 = copy .deepcopy (perspectiveList )
75
+ del modles2 [i ]
76
+ del perspectiveList2 [i ]
77
+
78
+ print ("Persective {} Acuracy on it's own = " .format (i )+ str (pridictions [i ])+ "" )
79
+ new_df = lr .instancePrediction (perspectiveList2 ,target ,modles2 )
80
+ y_hat = lr .majorityVote (new_df )
81
+
82
+ print ("Majority vote Acuracy without this perspective = {}" .format (y_hat ))
83
+
84
+ d = y - y_hat
85
+ impactRatings .append (d )
86
+ print ("Majority vote Impact Score = {}" .format (d ))
87
+
88
+ print ("Confidence Leverl without perspective {}" .format (i ))
89
+ print (new_df .iloc [instNum ][- 1 ])
90
+
91
+ y2_hat = new_df .iloc [instNum ][- 1 ]
92
+
93
+ # print ()
94
+ # lr.combinePerspectives (target,GaussianNB(),new_df)
95
+
96
+ # print (max(impactRatings))
97
+
98
+ # for i in range(0,len(models)):
99
+
100
+ perspective = perspectiveList [i ].drop (target , axis = 1 ).values
101
+ print ("\n ======================================================" )
102
+ print ("Current Perspetive Prediction and Confidence:" )
103
+ print ("Prediction = {}" .format (models [i ].predict (list ((perspective [instNum ]).reshape (1 ,- 1 )))[0 ]))
104
+ print ("Confidence = {}" .format ((models [i ].predict_proba (list ((perspective [instNum ]).reshape (1 ,- 1 )))[0 ])* 100 ))
105
+
106
+ print ("\n Confidence impact score: y - ŷ = d" )
107
+ d2 [0 ] = (y2 [0 ] - y2_hat [0 ])
108
+ d2 [1 ] = (y2 [1 ] - y2_hat [1 ])
109
+ print (d2 )
110
+
111
+ analyseFeatures (perspectiveList ,"class" ,Algo ,instNum )
112
+
113
+
114
+
115
+ #Write a function that will give the confidence and pridiction results given a single model and instance.
116
+
117
+ def analyseFeatures (perspectiveList ,target ,clf ,instNum ):
118
+
119
+ confidence = []
120
+ y = 0
121
+ y_hat = 0
122
+ d = [0 ,0 ]
123
+
124
+ for i in range (0 ,len (perspectiveList )):
125
+
126
+ perspectiveList2 = perspectiveList
127
+
128
+ print ("\n @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" )
129
+ print ("Confidence with all features" )
130
+
131
+ dta .convert_discrete (perspectiveList2 [i ],thresh )
132
+ perspective0 = perspectiveList2 [i ].drop (target , axis = 1 ).values
133
+
134
+ x_train , x_test , y_train , y_test = dta .data_setup (perspectiveList2 [i ],target )
135
+
136
+ clf = clf .fit (x_train ,y_train )
137
+
138
+ print (clf .predict (list ((perspective0 [instNum ]).reshape (1 ,- 1 )))[0 ])
139
+ confidence = clf .predict_proba (list ((perspective0 [instNum ]).reshape (1 ,- 1 )))[0 ]* 100
140
+
141
+ print (confidence )
142
+
143
+ y = confidence
144
+
145
+ for feature in perspectiveList2 [i ].columns :
146
+
147
+ perspectiveList3 = copy .deepcopy (perspectiveList2 )
148
+
149
+
150
+
151
+ if (feature != target ): # to not use class label as a feature
152
+ print ("\n ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" )
153
+
154
+ # print (perspectiveList[i].columns)
155
+
156
+ print ("Confidence of P{} without feature - {}" .format (i ,feature ))
157
+
158
+ del perspectiveList3 [i ][feature ]
159
+
160
+ # print (perspectiveList3[i].columns)
161
+
162
+ dta .convert_discrete (perspectiveList3 [i ],thresh )
163
+ perspective = perspectiveList3 [i ].drop (target , axis = 1 ).values
164
+
165
+ x_train , x_test , y_train , y_test = dta .data_setup (perspectiveList3 [i ],target )
166
+
167
+ clf = clf .fit (x_train ,y_train )
168
+
169
+ # print (clf.predict(list((perspective[instNum]).reshape(1,-1)))[0])
170
+
171
+ y_hat = clf .predict_proba (list ((perspective [instNum ]).reshape (1 ,- 1 )))[0 ]* 100
172
+
173
+ # print (clf.predict_proba(list((perspective[instNum]).reshape(1,-1)))[0]*100)
174
+
175
+
176
+ print ("\n Confidence impact score: y - ŷ = d for features" )
177
+ d [0 ] = (y [0 ] - y_hat [0 ])
178
+ d [1 ] = (y [1 ] - y_hat [1 ])
179
+ print (d )
180
+
181
+ print ("\n ******************************************************************************" )
182
+ print ("Relations of P{} features - {}" .format (i ,feature ))
183
+
184
+ for f2 in perspectiveList3 [i ].columns :
185
+ relation = pst .getFeaturesRelations (feature ,f2 )
186
+ print ("{} & {} => {}" .format (feature ,f2 ,relation ))
187
+
188
+
189
+ analysePerspective (df ,"class" ,345 )
190
+
191
+
192
+ # Make this function more effecent by allowing it to not re-create all the models all the time. Find a way to build the modles once then iterate through them to get the result on each dataset.
0 commit comments