Skip to content

Commit

Permalink
Find errors in DL
Browse files Browse the repository at this point in the history
  • Loading branch information
alezanga committed Sep 26, 2023
1 parent 0f8c276 commit f4f3d9d
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 7 deletions.
55 changes: 55 additions & 0 deletions src/nlp/deep_learning/find_negatives.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from pathlib import Path

import numpy as np
import pandas as pd

from src.cv.classifiers.deep_learning.functional.yaml_manager import load_yaml
from src.nlp.dataset import train_val_test, wrong_predictions, compute_metrics
from src.nlp.deep_learning.pipeline import create_hf_pipeline

if __name__ == "__main__":
out = Path("dumps") / "nlp_models" / "error_reports" / "DL"

config: dict = load_yaml("src/nlp/params/deep_learning.yml")
bs: int = config["training"]["test_batch_size"]
target_label: str = config["testing"]["target_label"]
use_gpu: bool = config["use_gpu"]
add_synthetic: bool = True # config["add_synthetic"]

print("*** Predicting misogyny ")
pipe_m = create_hf_pipeline(config["testing"]["task_m_model_name"], device=0 if use_gpu else "cpu", batch_size=bs, top_k=None)
dataset_m = train_val_test(target="M", add_synthetic_train=add_synthetic)
x_data = dataset_m["test"]["x"] + dataset_m["test_synt"]["x"]
y_data = dataset_m["test"]["y"] + dataset_m["test_synt"]["y"]

raw_results = pipe_m(x_data)
# Rework results, make a list of dicts with {label: score}
r_dict: list[dict[str, float]] = [dict([tuple(a.values()) for a in row]) for row in raw_results]

other_label: str = [k for k in r_dict[0].keys() if k != target_label][0]
results = [1 if e[target_label] > e[other_label] else 0 for e in r_dict]
print("Metrics on RAW and SYNTHETIC datasets combined")
compute_metrics(y_pred=results, y_true=y_data, sk_classifier_name=pipe_m.model.__class__.__name__)

# Predict scores with the model on test data
m_scores = [e[target_label] for e in r_dict]
assert [1 if e > .5 else 0 for e in m_scores] == results, "Results and scores do not match"

# Tokenize dataset, then extract non-zero entries from vectorizer to get the effective features (words) that are considered
# TODO

# Find out which are wrong predictions
error_df: pd.DataFrame = wrong_predictions(y_pred=np.asarray(m_scores, dtype=float), y_true=np.asarray(y_data, dtype=int), threshold=.5)

# Concatenate to wrong samples its input text
input_df = pd.DataFrame({"original_text": x_data}) # "features": x_cleaned})
input_df = input_df.iloc[error_df["indices"], :].reset_index(drop=True)
error_df = pd.concat([error_df, input_df], axis=1) # concat columns (same number of rows)

# Separate errors in FP and FN and write reports to file
error_df_fp = error_df[error_df["type"] == "fp"]
error_df_fn = error_df[error_df["type"] == "fn"]

out.mkdir(parents=True, exist_ok=True)
error_df_fp.to_csv(out / "errors_fp.csv", index=False)
error_df_fn.to_csv(out / "errors_fn.csv", index=False)
4 changes: 3 additions & 1 deletion src/nlp/deep_learning/run_classification.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from pprint import pprint

import pandas as pd
import torch.cuda

Expand All @@ -19,7 +21,7 @@
results = pipe_m(dataset_m["test"]["x"])
results = [1 if e[0]["label"] == target_label else 0 for e in results]
metrics = compute_metrics(y_pred=results, y_true=dataset_m["test"]["y"])
print(metrics)
pprint(metrics)
m_f1 = metrics["f1"]

match task:
Expand Down
13 changes: 8 additions & 5 deletions src/nlp/simple_model/find_negatives.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sklearn.linear_model import RidgeClassifier

from src.cv.classifiers.deep_learning.functional.yaml_manager import load_yaml
from src.nlp.dataset import train_val_test, wrong_predictions
from src.nlp.dataset import train_val_test, wrong_predictions, compute_metrics
from src.nlp.simple_model.pipeline import naive_classifier, predict_scores

classifier_type = RidgeClassifier
Expand All @@ -20,12 +20,14 @@

# Create dataset
data = train_val_test(target="M", add_synthetic_train=synthetic_add)

# Train model
_, pipe_m = naive_classifier(classifier_type(**clf_params), data, return_pipe=True, predict=False)

x_data = data["test"]["x"] + data["test_synt"]["x"]
y_data = data["test"]["y"] + data["test_synt"]["y"]
# Add synthetic test to the test set samples
data["test"]["x"] = x_data
# Train model
predictions_, pipe_m = naive_classifier(classifier_type(**clf_params), data, return_pipe=True, predict=True)
print("Metrics on RAW and SYNTHETIC datasets combined")
compute_metrics(predictions_, y_data, classifier_type.__name__)

# Tokenize dataset, then extract non-zero entries from vectorizer to get the effective features (words) that are considered
x_tokenized = pipe_m["vectorizer"].transform(x_data)
Expand All @@ -34,6 +36,7 @@

# Predict scores with the model on test data
m_scores = predict_scores(pipe_m, x_data)
assert np.array_equal(np.where(m_scores > .0, 1, 0), predictions_), "Results and scores do not match"

# Find out which are wrong predictions
error_df: pd.DataFrame = wrong_predictions(y_pred=m_scores, y_true=np.asarray(y_data, dtype=int), threshold=.0)
Expand Down
1 change: 0 additions & 1 deletion src/nlp/simple_model/run_classification.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# Read data
from pathlib import Path

import pandas as pd
Expand Down

0 comments on commit f4f3d9d

Please sign in to comment.