Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port over miscellaneous changes from SIGMOD branch #582

Merged
merged 16 commits into from
Jul 30, 2024
Merged
1 change: 1 addition & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ omit =
**/*/__init__.py
**/generated/*
**/modyn/tests/*
**/extra_local_eval/*.py
1 change: 1 addition & 0 deletions modyn/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from .resnet50.resnet50 import ResNet50 # noqa: F401
from .resnet152.resnet152 import ResNet152 # noqa: F401
from .rho_loss_twin_model.rho_loss_twin_model import RHOLOSSTwinModel # noqa: F401
from .smallyearbooknet.smallyearbooknet import SmallYearbookNet # noqa: F401
from .yearbooknet.yearbooknet import YearbookNet # noqa: F401

files = os.listdir(os.path.dirname(__file__))
Expand Down
2 changes: 2 additions & 0 deletions modyn/models/resnet152/resnet152.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ def __init__(self, model_configuration: dict[str, Any]) -> None:
# We need to initialize the model with the number of classees
# in the pretrained weights
model_configuration["num_classes"] = len(weights.meta["categories"])

if "use_pretrained" in model_configuration: # no matter if True or False
del model_configuration["use_pretrained"] # don't want to forward this to torchvision

super().__init__(Bottleneck, [3, 8, 36, 3], **model_configuration) # type: ignore
Expand Down
2 changes: 2 additions & 0 deletions modyn/models/resnet18/resnet18.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ def __init__(self, model_configuration: dict[str, Any]) -> None:
# We need to initialize the model with the number of classees
# in the pretrained weights
model_configuration["num_classes"] = len(weights.meta["categories"])

if "use_pretrained" in model_configuration: # no matter if True or False
del model_configuration["use_pretrained"] # don't want to forward this to torchvision

super().__init__(BasicBlock, [2, 2, 2, 2], **model_configuration) # type: ignore
Expand Down
2 changes: 2 additions & 0 deletions modyn/models/resnet50/resnet50.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ def __init__(self, model_configuration: dict[str, Any]) -> None:
# We need to initialize the model with the number of classees
# in the pretrained weights
model_configuration["num_classes"] = len(weights.meta["categories"])

if "use_pretrained" in model_configuration: # no matter if True or False
del model_configuration["use_pretrained"] # don't want to forward this to torchvision

super().__init__(Bottleneck, [3, 4, 6, 3], **model_configuration) # type: ignore
Expand Down
9 changes: 9 additions & 0 deletions modyn/models/smallyearbooknet/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""
Small CNN for Yearbook dataset
"""

import os

files = os.listdir(os.path.dirname(__file__))
files.remove("__init__.py")
__all__ = [f[:-3] for f in files if f.endswith(".py")]
45 changes: 45 additions & 0 deletions modyn/models/smallyearbooknet/smallyearbooknet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from typing import Any

import torch
from modyn.models.coreset_methods_support import CoresetSupportingModule
from torch import nn


class SmallYearbookNet:
"""
Adapted from WildTime.
Here you can find the original implementation:
https://github.com/huaxiuyao/Wild-Time/blob/main/wildtime/networks/yearbook.py
Can be used for experiments on RHO-LOSS as the IL model.
"""

# pylint: disable-next=unused-argument
def __init__(self, model_configuration: dict[str, Any], device: str, amp: bool) -> None:
self.model = SmallYearbookNetModel(**model_configuration)
self.model.to(device)

Check warning on line 19 in modyn/models/smallyearbooknet/smallyearbooknet.py

View check run for this annotation

Codecov / codecov/patch

modyn/models/smallyearbooknet/smallyearbooknet.py#L18-L19

Added lines #L18 - L19 were not covered by tests


class SmallYearbookNetModel(CoresetSupportingModule):
def __init__(self, num_input_channels: int, num_classes: int) -> None:
super().__init__()
self.enc = nn.Sequential(

Check warning on line 25 in modyn/models/smallyearbooknet/smallyearbooknet.py

View check run for this annotation

Codecov / codecov/patch

modyn/models/smallyearbooknet/smallyearbooknet.py#L24-L25

Added lines #L24 - L25 were not covered by tests
self.conv_block(num_input_channels, 16),
self.conv_block(16, 16),
self.conv_block(16, 16),
)
self.hid_dim = 16
self.classifier = nn.Linear(16, num_classes)

Check warning on line 31 in modyn/models/smallyearbooknet/smallyearbooknet.py

View check run for this annotation

Codecov / codecov/patch

modyn/models/smallyearbooknet/smallyearbooknet.py#L30-L31

Added lines #L30 - L31 were not covered by tests

def conv_block(self, in_channels: int, out_channels: int) -> nn.Module:
return nn.Sequential(

Check warning on line 34 in modyn/models/smallyearbooknet/smallyearbooknet.py

View check run for this annotation

Codecov / codecov/patch

modyn/models/smallyearbooknet/smallyearbooknet.py#L34

Added line #L34 was not covered by tests
nn.Conv2d(in_channels, out_channels, 3, padding=1), nn.BatchNorm2d(out_channels), nn.ReLU(), nn.MaxPool2d(2)
)

def forward(self, data: torch.Tensor) -> torch.Tensor:
data = self.enc(data)
data = torch.mean(data, dim=(2, 3))
data = self.embedding_recorder(data)
return self.classifier(data)

Check warning on line 42 in modyn/models/smallyearbooknet/smallyearbooknet.py

View check run for this annotation

Codecov / codecov/patch

modyn/models/smallyearbooknet/smallyearbooknet.py#L39-L42

Added lines #L39 - L42 were not covered by tests

def get_last_layer(self) -> nn.Module:
return self.classifier

Check warning on line 45 in modyn/models/smallyearbooknet/smallyearbooknet.py

View check run for this annotation

Codecov / codecov/patch

modyn/models/smallyearbooknet/smallyearbooknet.py#L45

Added line #L45 was not covered by tests
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,14 @@ def init_from_path(cls, pipeline_logdir: Path) -> "EvaluationExecutor":
context = pickle.loads((snapshot_dir / "context.pcl").read_bytes())

grpc_handler = GRPCHandler(eval_state_config.config.model_dump(by_alias=True))
grpc_handler.init_cluster_connection()
executor = EvaluationExecutor(
eval_state_config.pipeline_id,
eval_state_config.eval_dir,
eval_state_config.config,
eval_state_config.pipeline,
grpc_handler,
)
executor.grpc.init_cluster_connection()
executor.context = context
return executor

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,8 @@ def _handle_triggers(
s.triggers.append(trigger_index)
s.current_sample_index += len(trigger_data)

self.logs.materialize(s.log_directory, mode="increment") # materialize after every trigger

if s.maximum_triggers is not None and len(s.triggers) >= s.maximum_triggers:
break

Expand Down
8 changes: 6 additions & 2 deletions modyn/supervisor/internal/triggers/drift/alibi_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,12 @@ def detect_drift(
) -> dict[str, MetricResult]:
assert isinstance(embeddings_ref, (np.ndarray, torch.Tensor))
assert isinstance(embeddings_cur, (np.ndarray, torch.Tensor))
embeddings_ref = embeddings_ref.numpy() if isinstance(embeddings_ref, torch.Tensor) else embeddings_ref
embeddings_cur = embeddings_cur.numpy() if isinstance(embeddings_cur, torch.Tensor) else embeddings_cur
embeddings_ref = (
embeddings_ref.detach().cpu().numpy() if isinstance(embeddings_ref, torch.Tensor) else embeddings_ref
)
embeddings_cur = (
embeddings_cur.detach().cpu().numpy() if isinstance(embeddings_cur, torch.Tensor) else embeddings_cur
)

results: dict[str, MetricResult] = {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import grpc
from modyn.common.ftp import download_trained_model
from modyn.config.schema.system.config import ModynConfig
from modyn.metadata_database.metadata_database_connection import MetadataDatabaseConnection
from modyn.metadata_database.models import TrainedModel

Expand All @@ -24,12 +25,13 @@

def __init__(
self,
modyn_config: dict,
modyn_config: ModynConfig,
pipeline_id: int,
base_dir: pathlib.Path,
model_storage_address: str,
):
self.modyn_config = modyn_config
# TODO(MaxiBoether): Update this class to use the model
MaxiBoether marked this conversation as resolved.
Show resolved Hide resolved
self.modyn_config = modyn_config.model_dump(by_alias=True)

Check warning on line 34 in modyn/supervisor/internal/triggers/embedding_encoder_utils/embedding_encoder_downloader.py

View check run for this annotation

Codecov / codecov/patch

modyn/supervisor/internal/triggers/embedding_encoder_utils/embedding_encoder_downloader.py#L34

Added line #L34 was not covered by tests
self.pipeline_id = pipeline_id
self.base_dir = base_dir
assert self.base_dir.exists(), f"Temporary Directory {self.base_dir} should have been created."
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# pylint: skip-file
# pragma: no cover

"""Binary file wrapper."""
import os
from typing import Literal


class BinaryFileWrapper:
class BinaryFileWrapper: # pragma: no cover
"""Binary file wrapper.

Binary files store raw sample data in a row-oriented format. One file can contain multiple samples.
Expand All @@ -15,7 +16,9 @@ class BinaryFileWrapper:
offsetting the required number of bytes.
"""

def __init__(self, file_path: str, byteorder: Literal["little", "big"], record_size: int, label_size: int):
def __init__(
self, file_path: str, byteorder: Literal["little", "big"], record_size: int, label_size: int
): # pragma: no cover
"""Init binary file wrapper.

Args:
Expand All @@ -40,15 +43,16 @@ def __init__(self, file_path: str, byteorder: Literal["little", "big"], record_s
if self.file_size % self.record_size != 0:
raise ValueError("File does not contain exact number of records of size " + str(self.record_size))

def get_number_of_samples(self) -> int:
def get_number_of_samples(self) -> int: # pragma: no cover
"""Get number of samples in file.

Returns:
int: Number of samples in file
"""
return int(self.file_size / self.record_size)

def get_all_labels(self) -> list[int]:
def get_all_labels(self) -> list[int]: # pragma: no cover

with open(self.file_path, "rb") as file:
data = file.read()

Expand All @@ -61,7 +65,7 @@ def get_all_labels(self) -> list[int]:
]
return labels

def get_sample(self, index: int) -> bytes:
def get_sample(self, index: int) -> bytes: # pragma: no cover
"""Get the sample at the given index.
The indices are zero based.

Expand All @@ -76,10 +80,12 @@ def get_sample(self, index: int) -> bytes:
"""
return self.get_samples_from_indices([index])[0]

def get_samples(self, start: int, end: int) -> list[bytes]:
def get_samples(self, start: int, end: int) -> list[bytes]: # pragma: no cover

return self.get_samples_from_indices(list(range(start, end)))

def get_samples_from_indices(self, indices: list) -> list[bytes]:
def get_samples_from_indices(self, indices: list) -> list[bytes]: # pragma: no cover

with open(self.file_path, "rb") as file:
data = file.read()

Expand Down
Loading
Loading