forked from emreg00/toolbox
-
Notifications
You must be signed in to change notification settings - Fork 0
/
classifier_evaluation.py
313 lines (287 loc) · 13.6 KB
/
classifier_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
from toolbox import guild_utilities, selection_utilities
from sklearn.metrics import roc_auc_score, average_precision_score
from selection_utilities import generate_samples_from_list_without_replacement
import numpy
def get_balanced_auc(predictions_true, predictions_false, replicable = None):
if replicable is not None:
predictions_t, predictions_f = balance_predictions(predictions_true, predictions_false, n_random_negative_folds = None, replicable = replicable)
auc = get_auc(predictions_t, predictions_f)
sd = 0
values_balanced_t = []
values_balanced_f = []
else:
n_random = 100 #!
values_balanced_t = None
values_balanced_f = None
values = numpy.empty(n_random)
for i in xrange(n_random):
#predictions_t, predictions_f = balance_predictions(predictions_true, predictions_false, n_random_negative_folds = None, replicable = replicable)
predictions_t, predictions_f = balance_predictions(predictions_true, predictions_false, n_random_negative_folds = 1, replicable = replicable) #!
#predictions_t, predictions_f = predictions_true, [ numpy.mean(predictions_false) ] * len(predictions_false) #!
values[i] = get_auc(predictions_t, predictions_f)
if values_balanced_t is None or values_balanced_f is None:
values_balanced_t = numpy.array(predictions_t)
values_balanced_f = numpy.array(predictions_f)
else:
values_balanced_t += predictions_t
values_balanced_f += predictions_f
auc = numpy.mean(values)
sd = numpy.std(values)
values_balanced_t /= n_random
values_balanced_f /= n_random
return auc, sd, values_balanced_t, values_balanced_f
def get_auc(predictions_true, predictions_false):
predictions = predictions_true + predictions_false
labels = [ 1 ] * len(predictions_true) + [ 0 ] * len(predictions_false)
y_scores = numpy.array(predictions) # [0.1, 0.4, 0.35, 0.8]
y_true = numpy.array(labels) # [0, 0, 1, 1]
auc = roc_auc_score(y_true, y_scores)
#fpr, tpr, thresholds = metrics.roc_curve(y_true, y_scores, pos_label=1)
#auc = metrics.auc(fpr, tpr)
return auc
def get_auprc(predictions_true, predictions_false):
predictions = predictions_true + predictions_false
labels = [ 1 ] * len(predictions_true) + [ 0 ] * len(predictions_false)
y_scores = numpy.array(predictions)
y_true = numpy.array(labels)
auprc = average_precision_score(y_true, y_scores)
return auprc
def balance_predictions(predictions_true, predictions_false, n_random_negative_folds = None, replicable=123):
"""
n_random_negative_folds: Number of negative scores to be averaged to be assigned as negative instance.
If None calculated to cover as much as non-seed scores as possible
"""
assert len(predictions_true) != len(predictions_false)
swap = False
if len(predictions_false) < len(predictions_true):
swap = True
predictions = predictions_true
predictions_true = predictions_false
predictions_false = predictions
negative_sample_size = len(predictions_true)
negative_scores = [ 0.0 ] * negative_sample_size
n_fold = 0
for sample in generate_samples_from_list_without_replacement(predictions_false, negative_sample_size, n_random_negative_folds, replicable = replicable):
if len(sample) < negative_sample_size: # last fold
continue
n_fold += 1
for i, val in enumerate(sample):
negative_scores[i] += val
predictions_false = map(lambda x: x/n_fold, negative_scores)
if swap:
return predictions_false, predictions_true
return predictions_true, predictions_false
def create_R_script(file_name, absolute_dir, title=None, only=None, show_spread=False, vertical_average=False, append=False):
if title is not None:
plot_title = title
if append:
f = open(file_name, "a")
else:
f = open(file_name, "w")
f.write("library(ROCR)\n")
f.write("v<-read.table(\"%spredictions.dat\")\n" % absolute_dir)
f.write("l<-read.table(\"%slabels.dat\")\n" % absolute_dir)
f.write("pred<-prediction(v, l)\n")
f.write("average<-function(vals) {\n")
f.write("\tm<-matrix(sapply(vals, function(x){mean(x)}),nrow=length(vals),byrow=T)\n")
f.write("\tm[m==Inf]<-NA\n")
f.write("\tmax(colMeans(m),na.rm=T)\n")
f.write("}\n")
if only == "auc":
f.write("perfAUC<-performance(pred, \"auc\")\n")
f.write("e=c(); n=c(); x=0; for ( i in perfAUC@y.values ) { x<-x+1; e[x] <- i; n[x]<-x }\n")
if append:
f.write("sink(\"%s%s_auc.txt\", append=TRUE, split=TRUE)\n" % (absolute_dir, title))
else:
f.write("sink(\"%sauc.txt\", append=TRUE, split=TRUE)\n" % absolute_dir)
f.write("paste(format(mean(e), digits=3), format(sd(e), digits=3), sep=\" \")\n")
f.write("sink()\n")
f.close()
return
elif only == "cutoff":
if title is None:
plot_title = "Precision vs Sensitivity"
f.write("perfPPV<-performance(pred, \"ppv\")\n")
f.write("perfSens<-performance(pred, \"sens\")\n")
if vertical_average:
if show_spread:
f.write("d<-average(perfPPV@x.values)\n")
f.write("plot(perfPPV, lwd=2, col=2, ylab=\"Percentage\", main=\"%s\", avg=\"vertical\", plotCI.col=2, spread.estimate=\"stddev\", show.spread.at=seq(0,d,by=d/6))\n" % plot_title)
else:
f.write("plot(perfPPV, lwd=2, col=2, ylab=\"Percentage\", main=\"%s\", avg=\"vertical\")\n" % plot_title)
else:
f.write("plot(perfPPV, lwd=2, col=2, ylab=\"Percentage\", main=\"%s\", xlim=c(0,0.4), ylim=c(0,1))\n" % plot_title)
if vertical_average:
if show_spread:
f.write("d<-average(perfSens@x.values)\n")
f.write("plot(perfSens, lwd=2, col=3, avg=\"vertical\", plotCI.col=3, spread.estimate=\"stddev\", show.spread.at=seq(0,d,by=d/6), add=TRUE)\n")
else:
f.write("plot(perfSens, lwd=2, col=3, avg=\"vertical\", add=TRUE)\n")
else:
f.write("plot(perfSens, lwd=2, col=3, add=TRUE)\n")
f.write("perf<-performance(pred, \"prbe\")\n")
f.write("legend(\"bottomright\", c(\"Precision\", \"Sensitivity\", paste(\"(\", format(average(perf@x.values), digits=2), format(average(perf@y.values), digits=2), \")\", sep=\" \")), lty=c(1,1,0), col=c(2,3,1))\n")
f.close()
return
# ROC
f.write("perfROC<-performance(pred, \"tpr\", \"fpr\")\n")
f.write("png(\"%sroc.png\")\n" % absolute_dir)
if title is None:
plot_title = "ROC curve"
if show_spread:
f.write("plot(perfROC, lwd=2, col=2, xlab=\"False Positive Rate\", ylab=\"True Positive Rate\", main=\"%s\", avg=\"vertical\", plotCI.col=2, spread.estimate=\"stddev\", show.spread.at=seq(0,1,by=0.20))\n" % plot_title)
else:
f.write("plot(perfROC, lwd=2, col=2, xlab=\"False Positive Rate\", ylab=\"True Positive Rate\", main=\"%s\", avg=\"vertical\")\n" % plot_title)
f.write("legend(\"bottomright\", c(\"(Avg. over xval folds)\"), lty=c(1), col=c(2))\n")
f.write("dev.off()\n")
# Cutoff (PPV - Sens)
f.write("perfPPV<-performance(pred, \"ppv\")\n")
f.write("perfSens<-performance(pred, \"sens\")\n")
f.write("png(\"%scutoff.png\")\n" % absolute_dir)
if title is None:
plot_title = "Precision vs Sensitivity"
if show_spread:
f.write("d<-average(perfPPV@x.values)\n")
f.write("plot(perfPPV, lwd=2, col=2, ylab=\"Percentage\", main=\"%s\", avg=\"vertical\", plotCI.col=2, spread.estimate=\"stddev\", show.spread.at=seq(0,d,by=d/6))\n" % plot_title)
else:
f.write("plot(perfPPV, lwd=2, col=2, ylab=\"Percentage\", main=\"%s\", avg=\"vertical\")\n" % plot_title)
if show_spread:
f.write("d<-average(perfSens@x.values)\n")
f.write("plot(perfSens, lwd=2, col=3, avg=\"vertical\", plotCI.col=3, spread.estimate=\"stddev\", show.spread.at=seq(0,d,by=d/6), add=TRUE)\n")
else:
f.write("plot(perfSens, lwd=2, col=3, avg=\"vertical\", add=TRUE)\n")
f.write("perf<-performance(pred, \"prbe\")\n")
f.write("legend(\"bottomright\", c(\"Precision\", \"Sensitivity\", paste(\"(\", format(average(perf@x.values), digits=2), format(average(perf@y.values), digits=2), \")\", sep=\" \")), lty=c(1,1,0), col=c(2,3,1))\n")
f.write("dev.off()\n")
# AUC
if title is None:
plot_title = "Area Under ROC Curve (AUC)"
f.write("png(\"%sauc.png\")\n" % absolute_dir)
f.write("perfAUC<-performance(pred, \"auc\")\n")
f.write("e=c(); n=c(); x=0; for ( i in perfAUC@y.values ) { x<-x+1; e[x] <- i; n[x]<-x }; barplot(e, names=n, ylim=c(0,1),ylab= \"AUC\",xlab=\"Fold\", main=\"%s\")\n" % plot_title)
f.write("legend(\"topright\", c(paste(\"(Avg: \", format(mean(e), digits=3), \")\",sep=\"\")), lty=c(), col=c())\n")
f.write("dev.off()\n")
f.write("sink(\"%sauc.txt\", append=TRUE, split=TRUE)\n" % absolute_dir)
f.write("paste(format(mean(e), digits=3), format(sd(e), digits=3), sep=\" \")\n")
f.write("sink()\n")
f.close()
#os.system("R CMD BATCH %s" % "*.R")
return
def create_ROCR_files(list_node_scores_and_labels, file_predictions, file_labels):
"""
list_node_scores_and_labels: list of node (score, label) tuple (corresponding to each validation node) list (corresponding to xval fold)
"""
f_pred = open(file_predictions, 'w')
f_lab = open(file_labels, 'w')
firstTime = True
for i, node_scores_and_labels in enumerate(zip(*list_node_scores_and_labels)):
if i == 0:
for j in xrange(len(node_scores_and_labels)):
f_pred.write("\tFold" + str(j+1))
f_lab.write("\tFold" + str(j+1))
f_pred.write("\n")
f_lab.write("\n")
f_pred.write("%d"%(i+1))
f_lab.write("%d"%(i+1))
for (score, label) in node_scores_and_labels:
f_pred.write("\t" + str(score))
f_lab.write("\t" + str(label))
f_pred.write("\n")
f_lab.write("\n")
f_pred.close()
f_lab.close()
return
def get_validation_node_scores_and_labels(file_result, file_seed_test_scores, file_node_scores, n_random_negative_folds = None, n_negatives = None, default_score = 0, replicable = 123, candidates_file = None, previous_negative_sample_size=None):
"""
Returns a list of scores and labels [ ([0-1], [01]) ] for validation
file_result: File to parse output scores
file_seed_test_scores: File to parse test seeds
file_node_scores: File to parse all non seeds
n_negatives: Number of negative instanaces
If None the same as number of test nodes
n_random_negative_folds: Number of non-seed scores to be averaged to be assigned as negative instance
If None calculated to cover as much as non-seed scores as possible
If 0 all negative data is used
default_score: All nodes that have a higher score than this score in file_node_scores will be considered as seeds
"""
from guild_utilities import get_node_to_score, get_nodes
node_to_score = get_node_to_score(file_result)
test_nodes = get_nodes(file_seed_test_scores)
initial_to_score = get_node_to_score(file_node_scores)
non_seeds = set([ node for node, score in initial_to_score.iteritems() if score==default_score ])
node_validation_data = [ (node_to_score[node], 1) for node in test_nodes ]
if candidates_file is not None:
candidates = get_nodes(candidates_file)
node_to_score = dict([ (node, node_to_score[node]) for node in candidates ])
non_seeds = list(non_seeds & candidates)
if n_random_negative_folds == 0:
negative_sample_size = None
node_validation_data.extend([(node_to_score[node], 0) for node in set(node_to_score.keys()) & non_seeds ])
else:
n_actual_folds = 0
if n_negatives is None:
n_negatives = len(test_nodes)
negative_sample_size = n_negatives
if previous_negative_sample_size is not None:
if previous_negative_sample_size > negative_sample_size:
negative_sample_size = previous_negative_sample_size
negative_scores = [ 0 ] * negative_sample_size
non_seeds = list(non_seeds)
for sample in generate_samples_from_list_without_replacement(non_seeds, negative_sample_size, n_random_negative_folds, replicable = replicable):
for i, node in enumerate(sample):
negative_scores[i] += node_to_score[node]
n_actual_folds += 1
node_validation_data.extend(map(lambda x: (x/n_actual_folds, 0), negative_scores))
return node_validation_data, negative_sample_size
def calculate_performance_metric_counts_using_random_negatives(node_to_score, setNodeTest, non_seeds, score_threshold, n_random_negative_folds = None, replicable=123):
from selection_utilities import generate_samples_from_list_without_replacement
(nTP, nFP, nFN, nTN) = (0.0, 0.0, 0.0, 0.0)
for id, score in node_to_score.iteritems(): # if candidates based - for each candidate
if id in setNodeTest: # in the initial association file
if score >= score_threshold:
nTP += 1
else:
nFN += 1
if n_random_negative_folds == 0:
for id, score in node_to_score.iteritems():
if id in non_seeds:
if score >= score_threshold:
nFP += 1
else:
nTN += 1
else:
n_actual_folds = 0
for sample in generate_samples_from_list_without_replacement(non_seeds, len(setNodeTest), n_random_negative_folds, replicable = replicable):
setNegative = set(sample)
n_actual_folds += 1
for id, score in node_to_score.iteritems():
if id in setNegative:
if score >= score_threshold:
nFP += 1
else:
nTN += 1
nFP /= n_actual_folds
nTN /= n_actual_folds
return (nTP, nFP, nFN, nTN)
def calculatePerformance(nTP, nFP, nFN, nTN):
try:
acc = (nTP + nTN) / (nTP + nFP + nTN + nFN)
except ZeroDivisionError:
acc = None
try:
sens = nTP / (nTP + nFN)
except:
sens = None
try:
spec = nTN / (nTN + nFP)
except:
spec = None
try:
ppv = nTP / (nTP + nFP)
except:
ppv = None
#if spec is not None:
# return (sens, (1-spec))
#else:
# return (sens, None)
return (acc, sens, spec, ppv)