Find errors in DL

matteo-rizzo · Sep 26, 2023 · f4f3d9d · f4f3d9d
1 parent 0f8c276
commit f4f3d9d
Show file tree

Hide file tree

Showing 4 changed files with 66 additions and 7 deletions.
diff --git a/src/nlp/deep_learning/find_negatives.py b/src/nlp/deep_learning/find_negatives.py
@@ -0,0 +1,55 @@
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+from src.cv.classifiers.deep_learning.functional.yaml_manager import load_yaml
+from src.nlp.dataset import train_val_test, wrong_predictions, compute_metrics
+from src.nlp.deep_learning.pipeline import create_hf_pipeline
+
+if __name__ == "__main__":
+    out = Path("dumps") / "nlp_models" / "error_reports" / "DL"
+
+    config: dict = load_yaml("src/nlp/params/deep_learning.yml")
+    bs: int = config["training"]["test_batch_size"]
+    target_label: str = config["testing"]["target_label"]
+    use_gpu: bool = config["use_gpu"]
+    add_synthetic: bool = True  # config["add_synthetic"]
+
+    print("*** Predicting misogyny ")
+    pipe_m = create_hf_pipeline(config["testing"]["task_m_model_name"], device=0 if use_gpu else "cpu", batch_size=bs, top_k=None)
+    dataset_m = train_val_test(target="M", add_synthetic_train=add_synthetic)
+    x_data = dataset_m["test"]["x"] + dataset_m["test_synt"]["x"]
+    y_data = dataset_m["test"]["y"] + dataset_m["test_synt"]["y"]
+
+    raw_results = pipe_m(x_data)
+    # Rework results, make a list of dicts with {label: score}
+    r_dict: list[dict[str, float]] = [dict([tuple(a.values()) for a in row]) for row in raw_results]
+
+    other_label: str = [k for k in r_dict[0].keys() if k != target_label][0]
+    results = [1 if e[target_label] > e[other_label] else 0 for e in r_dict]
+    print("Metrics on RAW and SYNTHETIC datasets combined")
+    compute_metrics(y_pred=results, y_true=y_data, sk_classifier_name=pipe_m.model.__class__.__name__)
+
+    # Predict scores with the model on test data
+    m_scores = [e[target_label] for e in r_dict]
+    assert [1 if e > .5 else 0 for e in m_scores] == results, "Results and scores do not match"
+
+    # Tokenize dataset, then extract non-zero entries from vectorizer to get the effective features (words) that are considered
+    # TODO
+
+    # Find out which are wrong predictions
+    error_df: pd.DataFrame = wrong_predictions(y_pred=np.asarray(m_scores, dtype=float), y_true=np.asarray(y_data, dtype=int), threshold=.5)
+
+    # Concatenate to wrong samples its input text
+    input_df = pd.DataFrame({"original_text": x_data})  # "features": x_cleaned})
+    input_df = input_df.iloc[error_df["indices"], :].reset_index(drop=True)
+    error_df = pd.concat([error_df, input_df], axis=1)  # concat columns (same number of rows)
+
+    # Separate errors in FP and FN and write reports to file
+    error_df_fp = error_df[error_df["type"] == "fp"]
+    error_df_fn = error_df[error_df["type"] == "fn"]
+
+    out.mkdir(parents=True, exist_ok=True)
+    error_df_fp.to_csv(out / "errors_fp.csv", index=False)
+    error_df_fn.to_csv(out / "errors_fn.csv", index=False)
diff --git a/src/nlp/deep_learning/run_classification.py b/src/nlp/deep_learning/run_classification.py
@@ -1,3 +1,5 @@
+from pprint import pprint
+
 import pandas as pd
 import torch.cuda
 
@@ -19,7 +21,7 @@
     results = pipe_m(dataset_m["test"]["x"])
     results = [1 if e[0]["label"] == target_label else 0 for e in results]
     metrics = compute_metrics(y_pred=results, y_true=dataset_m["test"]["y"])
-    print(metrics)
+    pprint(metrics)
     m_f1 = metrics["f1"]
 
     match task:

diff --git a/src/nlp/simple_model/find_negatives.py b/src/nlp/simple_model/find_negatives.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import RidgeClassifier
 
 from src.cv.classifiers.deep_learning.functional.yaml_manager import load_yaml
-from src.nlp.dataset import train_val_test, wrong_predictions
+from src.nlp.dataset import train_val_test, wrong_predictions, compute_metrics
 from src.nlp.simple_model.pipeline import naive_classifier, predict_scores
 
 classifier_type = RidgeClassifier
@@ -20,12 +20,14 @@
 
     # Create dataset
     data = train_val_test(target="M", add_synthetic_train=synthetic_add)
-
-    # Train model
-    _, pipe_m = naive_classifier(classifier_type(**clf_params), data, return_pipe=True, predict=False)
-
     x_data = data["test"]["x"] + data["test_synt"]["x"]
     y_data = data["test"]["y"] + data["test_synt"]["y"]
+    # Add synthetic test to the test set samples
+    data["test"]["x"] = x_data
+    # Train model
+    predictions_, pipe_m = naive_classifier(classifier_type(**clf_params), data, return_pipe=True, predict=True)
+    print("Metrics on RAW and SYNTHETIC datasets combined")
+    compute_metrics(predictions_, y_data, classifier_type.__name__)
 
     # Tokenize dataset, then extract non-zero entries from vectorizer to get the effective features (words) that are considered
     x_tokenized = pipe_m["vectorizer"].transform(x_data)
@@ -34,6 +36,7 @@
 
     # Predict scores with the model on test data
     m_scores = predict_scores(pipe_m, x_data)
+    assert np.array_equal(np.where(m_scores > .0, 1, 0), predictions_), "Results and scores do not match"
 
     # Find out which are wrong predictions
     error_df: pd.DataFrame = wrong_predictions(y_pred=m_scores, y_true=np.asarray(y_data, dtype=int), threshold=.0)

diff --git a/src/nlp/simple_model/run_classification.py b/src/nlp/simple_model/run_classification.py
@@ -1,4 +1,3 @@
-# Read data
 from pathlib import Path
 
 import pandas as pd