Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detectron model health #664

Merged
merged 10 commits into from
Jul 17, 2024
269 changes: 178 additions & 91 deletions cyclops/monitor/tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,23 @@
import pandas as pd
import sklearn
from datasets import Dataset, DatasetDict, concatenate_datasets
from datasets.utils.logging import disable_progress_bar
from scipy.special import expit as sigmoid
from scipy.special import softmax
from sklearn.base import BaseEstimator

from cyclops.data.transforms import Lambdad
from cyclops.data.utils import apply_transforms
from cyclops.models.catalog import wrap_model
from cyclops.models.utils import is_pytorch_model, is_sklearn_model
from cyclops.models.wrappers import PTModel, SKModel
from cyclops.monitor.utils import DetectronModule, DummyCriterion, get_args
from cyclops.utils.optional import import_optional_module


disable_progress_bar()


if TYPE_CHECKING:
import torch
from alibi_detect.cd import (
Expand Down Expand Up @@ -705,33 +711,55 @@
self.model = base_model
else:
self.model = model
if isinstance(base_model, nn.Module):
if is_pytorch_model(base_model):
self.base_model = wrap_model(
base_model,
batch_size=batch_size,
)
self.base_model.initialize()
else:
self.base_model.save_model(
"saved_models/DetectronModule/pretrained_model.pt", log=False
)
if transforms:
self.transforms = partial(apply_transforms, transforms=transforms)
model_transforms = transforms
model_transforms.transforms = model_transforms.transforms + (

Check warning on line 726 in cyclops/monitor/tester.py

View check run for this annotation

Codecov / codecov/patch

cyclops/monitor/tester.py#L724-L726

Added lines #L724 - L726 were not covered by tests
Lambdad(
keys=("mask", "labels"),
func=lambda x: np.array(x),
allow_missing_keys=True,
),
)
self.model_transforms = partial(

Check warning on line 733 in cyclops/monitor/tester.py

View check run for this annotation

Codecov / codecov/patch

cyclops/monitor/tester.py#L733

Added line #L733 was not covered by tests
apply_transforms,
transforms=model_transforms,
)
else:
self.transforms = None
self.model_transforms = None
elif is_sklearn_model(base_model):

Check warning on line 740 in cyclops/monitor/tester.py

View check run for this annotation

Codecov / codecov/patch

cyclops/monitor/tester.py#L740

Added line #L740 was not covered by tests
self.base_model = wrap_model(base_model)
self.base_model.initialize()
self.feature_column = feature_column
if transforms:
self.transforms = partial(apply_transforms, transforms=transforms)
model_transforms = transforms
model_transforms.transforms = model_transforms.transforms + (
Lambdad(
keys=("mask", "labels"),
func=lambda x: np.array(x),
allow_missing_keys=True,
),
self.base_model.save_model(

Check warning on line 742 in cyclops/monitor/tester.py

View check run for this annotation

Codecov / codecov/patch

cyclops/monitor/tester.py#L742

Added line #L742 was not covered by tests
"saved_models/DetectronModule/pretrained_model.pkl", log=False
)
self.model_transforms = partial(
apply_transforms,
transforms=model_transforms,
self.transforms = transforms
self.model_transforms = transforms
elif isinstance(base_model, SKModel):
self.base_model = base_model
self.base_model.save_model(

Check warning on line 749 in cyclops/monitor/tester.py

View check run for this annotation

Codecov / codecov/patch

cyclops/monitor/tester.py#L745-L749

Added lines #L745 - L749 were not covered by tests
"saved_models/DetectronModule/pretrained_model.pkl", log=False
)
else:
self.transforms = transforms
self.model_transforms = transforms
elif isinstance(base_model, PTModel):
self.base_model = base_model
self.base_model.save_model(

Check warning on line 756 in cyclops/monitor/tester.py

View check run for this annotation

Codecov / codecov/patch

cyclops/monitor/tester.py#L754-L756

Added lines #L754 - L756 were not covered by tests
"saved_models/DetectronModule/pretrained_model.pt", log=False
)
else:
raise ValueError("base_model must be a PyTorch or sklearn model.")

Check warning on line 760 in cyclops/monitor/tester.py

View check run for this annotation

Codecov / codecov/patch

cyclops/monitor/tester.py#L760

Added line #L760 was not covered by tests

self.feature_column = feature_column
self.splits_mapping = splits_mapping
self.num_runs = num_runs
self.sample_size = sample_size
Expand All @@ -741,8 +769,7 @@
self.lr = lr
self.num_workers = num_workers
self.task = task
if save_dir is None:
self.save_dir = "detectron"
self.save_dir = "detectron" if save_dir is None else save_dir

self.fit(X_s)

Expand All @@ -759,24 +786,35 @@
for seed in range(self.num_runs):
# train ensemble of for split 'p*'
for e in range(1, self.ensemble_size + 1):
if is_pytorch_model(self.base_model.model):
self.base_model.load_model(
"saved_models/DetectronModule/pretrained_model.pt", log=False
)
elif is_sklearn_model(self.base_model.model):
self.base_model.load_model(

Check warning on line 794 in cyclops/monitor/tester.py

View check run for this annotation

Codecov / codecov/patch

cyclops/monitor/tester.py#L793-L794

Added lines #L793 - L794 were not covered by tests
"saved_models/DetectronModule/pretrained_model.pkl", log=False
)
alpha = 1 / (len(X_s) * self.sample_size + 1)
model = wrap_model(
DetectronModule(
self.model,
feature_column=self.feature_column,
alpha=alpha,
),
batch_size=self.batch_size,
criterion=DummyCriterion,
max_epochs=self.max_epochs_per_model,
lr=self.lr,
num_workers=self.num_workers,
save_dir=self.save_dir,
concatenate_features=False,
)
if is_pytorch_model(self.base_model.model):
model = wrap_model(
DetectronModule(
self.model,
feature_column=self.feature_column,
alpha=alpha,
),
batch_size=self.batch_size,
criterion=DummyCriterion,
max_epochs=self.max_epochs_per_model,
lr=self.lr,
num_workers=self.num_workers,
save_dir=self.save_dir,
concatenate_features=False,
)
model.initialize()
elif is_sklearn_model(self.base_model.model):
model = self.base_model

Check warning on line 815 in cyclops/monitor/tester.py

View check run for this annotation

Codecov / codecov/patch

cyclops/monitor/tester.py#L814-L815

Added lines #L814 - L815 were not covered by tests
if isinstance(X_s, (Dataset, DatasetDict)):
# create p/p* splits

p = (
X_s[self.splits_mapping["train"]]
.shuffle()
Expand Down Expand Up @@ -808,26 +846,39 @@
np.array(pstar_pseudolabels),
)
pstar = pstar.add_column("labels", pstar_pseudolabels.tolist())
if is_sklearn_model(self.base_model.model):
pstar = pstar.map(

Check warning on line 850 in cyclops/monitor/tester.py

View check run for this annotation

Codecov / codecov/patch

cyclops/monitor/tester.py#L850

Added line #L850 was not covered by tests
lambda x: x.update({"labels": int(1 - x["labels"])})
)

p_pstar = concatenate_datasets([p, pstar], axis=0)
p_pstar = p_pstar.train_test_split(test_size=0.5, shuffle=True)

train_features = [self.feature_column]
train_features.extend(["labels", "mask"])
model.fit(
X=p_pstar,
feature_columns=train_features,
target_columns="mask", # placeholder, not used in dummycriterion
transforms=self.model_transforms,
splits_mapping={"train": "train", "validation": "test"},
)
if is_pytorch_model(self.base_model.model):
train_features = [self.feature_column]
train_features.extend(["labels", "mask"])
model.fit(
X=p_pstar,
feature_columns=train_features,
target_columns="mask", # placeholder, not used in dummycriterion
transforms=self.model_transforms,
splits_mapping={"train": "train", "validation": "test"},
)
model.load_model(
os.path.join(
self.save_dir,
"saved_models/DetectronModule/best_model.pt",
),
log=False,
)
elif is_sklearn_model(self.base_model.model):
model.fit(

Check warning on line 875 in cyclops/monitor/tester.py

View check run for this annotation

Codecov / codecov/patch

cyclops/monitor/tester.py#L874-L875

Added lines #L874 - L875 were not covered by tests
X=p_pstar,
feature_columns=self.feature_column,
target_columns="labels",
transforms=self.model_transforms,
)

model.load_model(
os.path.join(
self.save_dir,
"saved_models/DetectronModule/best_model.pt",
),
)
pstar_logits = model.predict(
X=pstar,
feature_columns=self.feature_column,
Expand Down Expand Up @@ -862,22 +913,33 @@
for seed in range(self.num_runs):
# train ensemble of for split 'p*'
for e in range(1, self.ensemble_size + 1):
if is_pytorch_model(self.base_model.model):
self.base_model.load_model(
"saved_models/DetectronModule/pretrained_model.pt", log=False
)
elif is_sklearn_model(self.base_model.model):
self.base_model.load_model(

Check warning on line 921 in cyclops/monitor/tester.py

View check run for this annotation

Codecov / codecov/patch

cyclops/monitor/tester.py#L920-L921

Added lines #L920 - L921 were not covered by tests
"saved_models/DetectronModule/pretrained_model.pkl", log=False
)
alpha = 1 / (len(X_t) * self.sample_size + 1)
model = wrap_model(
DetectronModule(
self.model,
feature_column=self.feature_column,
alpha=alpha,
),
batch_size=self.batch_size,
criterion=DummyCriterion,
max_epochs=self.max_epochs_per_model,
lr=self.lr,
num_workers=self.num_workers,
save_dir=self.save_dir,
concatenate_features=False,
)
model.initialize()
if is_pytorch_model(self.base_model.model):
model = wrap_model(
DetectronModule(
self.model,
feature_column=self.feature_column,
alpha=alpha,
),
batch_size=self.batch_size,
criterion=DummyCriterion,
max_epochs=self.max_epochs_per_model,
lr=self.lr,
num_workers=self.num_workers,
save_dir=self.save_dir,
concatenate_features=False,
)
model.initialize()
elif is_sklearn_model(self.base_model.model):
model = self.base_model

Check warning on line 942 in cyclops/monitor/tester.py

View check run for this annotation

Codecov / codecov/patch

cyclops/monitor/tester.py#L941-L942

Added lines #L941 - L942 were not covered by tests
if isinstance(X_t, (Dataset, DatasetDict)):
# create p/q splits
p = (
Expand Down Expand Up @@ -908,24 +970,36 @@
)
q_pseudolabels = self.format_pseudolabels(np.array(q_pseudolabels))
q = q.add_column("labels", q_pseudolabels.tolist())
if is_sklearn_model(self.base_model.model):
q = q.map(lambda x: x.update({"labels": int(1 - x["labels"])}))

Check warning on line 974 in cyclops/monitor/tester.py

View check run for this annotation

Codecov / codecov/patch

cyclops/monitor/tester.py#L974

Added line #L974 was not covered by tests
p_q = concatenate_datasets([p, q], axis=0)
p_q = p_q.train_test_split(test_size=0.5, shuffle=True)
train_features = [self.feature_column]
train_features.extend(["labels", "mask"])
model.fit(
X=p_q,
feature_columns=train_features,
target_columns="mask", # placeholder, not used in dummycriterion
transforms=self.model_transforms,
splits_mapping={"train": "train", "validation": "test"},
)

model.load_model(
os.path.join(
self.save_dir,
"saved_models/DetectronModule/best_model.pt",
),
)
if is_pytorch_model(self.base_model.model):
train_features = [self.feature_column]
train_features.extend(["labels", "mask"])
model.fit(
X=p_q,
feature_columns=train_features,
target_columns="mask", # placeholder, not used in dummycriterion
transforms=self.model_transforms,
splits_mapping={"train": "train", "validation": "test"},
)

model.load_model(
os.path.join(
self.save_dir,
"saved_models/DetectronModule/best_model.pt",
),
log=False,
)
elif is_sklearn_model(self.base_model.model):
model.fit(

Check warning on line 997 in cyclops/monitor/tester.py

View check run for this annotation

Codecov / codecov/patch

cyclops/monitor/tester.py#L996-L997

Added lines #L996 - L997 were not covered by tests
X=p_q,
feature_columns=self.feature_column,
target_columns="labels",
transforms=self.model_transforms,
)
q_logits = model.predict(
X=q,
feature_columns=self.feature_column,
Expand All @@ -950,18 +1024,21 @@

def format_pseudolabels(self, labels):
"""Format pseudolabels."""
if self.task in ("binary", "multilabel"):
labels = (
(labels > 0.5).astype("float32")
if ((labels <= 1).all() and (labels >= 0).all())
else (sigmoid(labels) > 0.5).astype("float32")
)
elif self.task == "multiclass":
labels = (
labels.argmax(dim=-1)
if np.isclose(labels.sum(axis=-1), 1).all()
else softmax(labels, axis=-1).argmax(axis=-1)
)
if is_pytorch_model(self.base_model.model):
if self.task in ("binary", "multilabel"):
labels = (

Check warning on line 1029 in cyclops/monitor/tester.py

View check run for this annotation

Codecov / codecov/patch

cyclops/monitor/tester.py#L1029

Added line #L1029 was not covered by tests
(labels > 0.5).astype("float32")
if ((labels <= 1).all() and (labels >= 0).all())
else (sigmoid(labels) > 0.5).astype("float32")
)
elif self.task == "multiclass":
labels = (
labels.argmax(dim=-1)
if np.isclose(labels.sum(axis=-1), 1).all()
else softmax(labels, axis=-1).argmax(axis=-1)
)
elif is_sklearn_model(self.base_model.model):
return labels

Check warning on line 1041 in cyclops/monitor/tester.py

View check run for this annotation

Codecov / codecov/patch

cyclops/monitor/tester.py#L1040-L1041

Added lines #L1040 - L1041 were not covered by tests
else:
raise ValueError(
f"Task must be either 'binary', 'multiclass' or 'multilabel', got {self.task} instead.",
Expand Down Expand Up @@ -1015,15 +1092,25 @@
test_count = self.counts("test", max_ensemble_size)[0]
cdf = self.ecdf(cal_counts)
p_value = cdf(test_count)
self.model_health = self.get_model_health(max_ensemble_size)
return {
"data": {
"model_health": self.model_health,
"p_val": p_value,
"distance": test_count,
"cal_record": self.cal_record,
"test_record": self.test_record,
},
}

def get_model_health(self, max_ensemble_size=None) -> float:
"""Get model health."""
self.cal_counts = self.counts("calibration", max_ensemble_size)
self.test_count = self.counts("test", max_ensemble_size)[0]
self.baseline = self.cal_counts.mean()
self.model_health = self.test_count / self.baseline
return min(1, self.model_health)

@staticmethod
def split_dataset(X: Union[Dataset, DatasetDict]) -> DatasetDict:
"""Split dataset into train and test splits."""
Expand Down
Loading