forked from scikit-learn-contrib/imbalanced-learn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_metrics.py
77 lines (59 loc) · 2.55 KB
/
plot_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""
=======================================
Metrics specific to imbalanced learning
=======================================
Specific metrics have been developed to evaluate classifier which
has been trained using imbalanced data. `imblearn` provides mainly
two additional metrics which are not implemented in `sklearn`: (i)
geometric mean and (ii) index balanced accuracy.
"""
# Authors: Guillaume Lemaitre <g.lemaitre58@gmail.com>
# License: MIT
from sklearn import datasets
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from imblearn import over_sampling as os
from imblearn import pipeline as pl
from imblearn.metrics import (geometric_mean_score,
make_index_balanced_accuracy)
print(__doc__)
RANDOM_STATE = 42
# Generate a dataset
X, y = datasets.make_classification(n_classes=3, class_sep=2,
weights=[0.1, 0.9], n_informative=10,
n_redundant=1, flip_y=0, n_features=20,
n_clusters_per_class=4, n_samples=5000,
random_state=RANDOM_STATE)
pipeline = pl.make_pipeline(os.SMOTE(random_state=RANDOM_STATE),
LinearSVC(random_state=RANDOM_STATE))
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y,
random_state=RANDOM_STATE)
# Train the classifier with balancing
pipeline.fit(X_train, y_train)
# Test the classifier and get the prediction
y_pred_bal = pipeline.predict(X_test)
###############################################################################
# The geometric mean corresponds to the square root of the product of the
# sensitivity and specificity. Combining the two metrics should account for
# the balancing of the dataset.
print('The geometric mean is {}'.format(geometric_mean_score(
y_test,
y_pred_bal)))
###############################################################################
# The index balanced accuracy can transform any metric to be used in
# imbalanced learning problems.
alpha = 0.1
geo_mean = make_index_balanced_accuracy(alpha=alpha, squared=True)(
geometric_mean_score)
print('The IBA using alpha = {} and the geometric mean: {}'.format(
alpha, geo_mean(
y_test,
y_pred_bal)))
alpha = 0.5
geo_mean = make_index_balanced_accuracy(alpha=alpha, squared=True)(
geometric_mean_score)
print('The IBA using alpha = {} and the geometric mean: {}'.format(
alpha, geo_mean(
y_test,
y_pred_bal)))