mcm.py

"""mcm"""
import pandas as pd
def mcm(tn, fp, fn, tp):
    """Let be a confusion matrix like this:


      N    P
    +----+----+
    |    |    |
    | TN | FP |
    |    |    |
    +----+----+
    |    |    |
    | FN | TP |
    |    |    |
    +----+----+

    The observed values by columns and the expected values
    by rows and the positive class in right column.
    With these definitions, the TN, FP, FN and TP values are that order.


    Parameters
    ----------
    TN : integer
         True Negatives (TN) is the total number of outcomes where the model correctly predicts the negative class.
    FP : integer
         False Positives (FP) is the total number of outcomes where the model incorrectly predicts the positive class.
    FN : integer
         False Negatives (FN) is the total number of outcomes where the model incorrectly predicts the negative class.
    TP : integer
         True Positives (TP) is the total number of outcomes where the model correctly predicts the positive class.

    Returns
    -------
    sum : DataFrame
          DataFrame with several metrics

    Notes
    -----
    https://en.wikipedia.org/wiki/Confusion_matrix
    https://developer.lsst.io/python/numpydoc.html
    https://www.mathworks.com/help/risk/explore-fairness-metrics-for-credit-scoring-model.html

    Examples
    --------
    data = pd.DataFrame({
    'y_true': ['Positive']*47 + ['Negative']*18,
    'y_pred': ['Positive']*37 + ['Negative']*10 + ['Positive']*5 + ['Negative']*13})

    tn, fp, fn, tp = confusion_matrix(y_true = data.y_true,
                                  y_pred = data.y_pred,
                                  labels = ['Negative',
                                  'Positive']).ravel()

    """
    df_mcm = []

    df_mcm.append(['Sensitivity', npy.nan if (tp + fn) == 0 else tp / (tp + fn)])
    df_mcm.append(['Recall', npy.nan if (tp + fn) == 0 else tp / (tp + fn)])
    df_mcm.append(['True Positive rate (TPR)', npy.nan if (tp + fn) == 0 else tp / (tp + fn)])
    df_mcm.append(['Specificity', npy.nan if (tn + tp) == 0 else tn / (tn + fp)])
    df_mcm.append(['True Negative Rate (TNR)', npy.nan if (tn + fp) == 0 else tn / (tn + fp)])

    df_mcm.append(['Precision', npy.nan if (tp + fp) == 0 else tp / (tp + fp)])
    df_mcm.append(['Positive Predictive Value (PPV)', npy.nan if (tp + fp) == 0 else tp / (tp + fp)])
    df_mcm.append(['Negative Predictive Value (NPV)', npy.nan if (tn + fn) == 0 else tn / (tn + fn)])

    df_mcm.append(['False Negative Rate (FNR)', npy.nan if (fn + tp) == 0 else fn / (fn + tp)])
    df_mcm.append(['False Positive Rate (FPR)', npy.nan if (fp + tn) == 0 else fp / (fp + tn)])
    df_mcm.append(['False Discovery Rate (FDR)', npy.nan if (fp + tp) == 0 else fp / (fp + tp)])

    df_mcm.append(['Rate of Positive Predictions (PRR)', npy.nan if (tn + tp + fn + fp) == 0 else (fp + tp) / (tn + tp + fn + fp)])
    df_mcm.append(['Rate of Negative Predictions (RNP)', npy.nan if (tn + tp + fn + fp) == 0 else (fn + tn) / (tn + tp + fn + fp)])

    df_mcm.append(['Accuracy', npy.nan if (tn + tp + fn + fp) == 0 else (tp + tn) / (tp + tn + fp + fn)])
    df_mcm.append(['F1 Score', npy.nan if (2*tp + fp + fn) == 0 else 2*tp / (2*tp + fp + fn)])

    tpr = tp / (tp + fn)
    fpr = fp / (fp + tn)

    fnr = fn / (fn + tp)
    tnr = tn / (tn + fp)

    df_mcm.append(['Positive Likelihood Ratio (LR+)', tpr / fpr])
    df_mcm.append(['Negative Likelihood Ratio (LR-)', fnr / tnr])

    return pd.DataFrame(df_mcm, columns = ['Metric', 'Value'])