-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmcm.py
executable file
·88 lines (69 loc) · 3.45 KB
/
mcm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""mcm"""
import pandas as pd
def mcm(tn, fp, fn, tp):
"""Let be a confusion matrix like this:
N P
+----+----+
| | |
| TN | FP |
| | |
+----+----+
| | |
| FN | TP |
| | |
+----+----+
The observed values by columns and the expected values
by rows and the positive class in right column.
With these definitions, the TN, FP, FN and TP values are that order.
Parameters
----------
TN : integer
True Negatives (TN) is the total number of outcomes where the model correctly predicts the negative class.
FP : integer
False Positives (FP) is the total number of outcomes where the model incorrectly predicts the positive class.
FN : integer
False Negatives (FN) is the total number of outcomes where the model incorrectly predicts the negative class.
TP : integer
True Positives (TP) is the total number of outcomes where the model correctly predicts the positive class.
Returns
-------
sum : DataFrame
DataFrame with several metrics
Notes
-----
https://en.wikipedia.org/wiki/Confusion_matrix
https://developer.lsst.io/python/numpydoc.html
https://www.mathworks.com/help/risk/explore-fairness-metrics-for-credit-scoring-model.html
Examples
--------
data = pd.DataFrame({
'y_true': ['Positive']*47 + ['Negative']*18,
'y_pred': ['Positive']*37 + ['Negative']*10 + ['Positive']*5 + ['Negative']*13})
tn, fp, fn, tp = confusion_matrix(y_true = data.y_true,
y_pred = data.y_pred,
labels = ['Negative',
'Positive']).ravel()
"""
df_mcm = []
df_mcm.append(['Sensitivity', npy.nan if (tp + fn) == 0 else tp / (tp + fn)])
df_mcm.append(['Recall', npy.nan if (tp + fn) == 0 else tp / (tp + fn)])
df_mcm.append(['True Positive rate (TPR)', npy.nan if (tp + fn) == 0 else tp / (tp + fn)])
df_mcm.append(['Specificity', npy.nan if (tn + tp) == 0 else tn / (tn + fp)])
df_mcm.append(['True Negative Rate (TNR)', npy.nan if (tn + fp) == 0 else tn / (tn + fp)])
df_mcm.append(['Precision', npy.nan if (tp + fp) == 0 else tp / (tp + fp)])
df_mcm.append(['Positive Predictive Value (PPV)', npy.nan if (tp + fp) == 0 else tp / (tp + fp)])
df_mcm.append(['Negative Predictive Value (NPV)', npy.nan if (tn + fn) == 0 else tn / (tn + fn)])
df_mcm.append(['False Negative Rate (FNR)', npy.nan if (fn + tp) == 0 else fn / (fn + tp)])
df_mcm.append(['False Positive Rate (FPR)', npy.nan if (fp + tn) == 0 else fp / (fp + tn)])
df_mcm.append(['False Discovery Rate (FDR)', npy.nan if (fp + tp) == 0 else fp / (fp + tp)])
df_mcm.append(['Rate of Positive Predictions (PRR)', npy.nan if (tn + tp + fn + fp) == 0 else (fp + tp) / (tn + tp + fn + fp)])
df_mcm.append(['Rate of Negative Predictions (RNP)', npy.nan if (tn + tp + fn + fp) == 0 else (fn + tn) / (tn + tp + fn + fp)])
df_mcm.append(['Accuracy', npy.nan if (tn + tp + fn + fp) == 0 else (tp + tn) / (tp + tn + fp + fn)])
df_mcm.append(['F1 Score', npy.nan if (2*tp + fp + fn) == 0 else 2*tp / (2*tp + fp + fn)])
tpr = tp / (tp + fn)
fpr = fp / (fp + tn)
fnr = fn / (fn + tp)
tnr = tn / (tn + fp)
df_mcm.append(['Positive Likelihood Ratio (LR+)', tpr / fpr])
df_mcm.append(['Negative Likelihood Ratio (LR-)', fnr / tnr])
return pd.DataFrame(df_mcm, columns = ['Metric', 'Value'])