eth-easl · robinholzi · Jul 30, 2024 · Jul 25, 2024 · Jul 26, 2024 · Jul 26, 2024
diff --git a/.coveragerc b/.coveragerc
@@ -4,3 +4,4 @@ omit =
     **/*/__init__.py
     **/generated/*
     **/modyn/tests/*
+    **/extra_local_eval/*.py
diff --git a/modyn/models/__init__.py b/modyn/models/__init__.py
@@ -12,6 +12,7 @@
 from .resnet50.resnet50 import ResNet50  # noqa: F401
 from .resnet152.resnet152 import ResNet152  # noqa: F401
 from .rho_loss_twin_model.rho_loss_twin_model import RHOLOSSTwinModel  # noqa: F401
+from .smallyearbooknet.smallyearbooknet import SmallYearbookNet  # noqa: F401
 from .yearbooknet.yearbooknet import YearbookNet  # noqa: F401
 
 files = os.listdir(os.path.dirname(__file__))

diff --git a/modyn/models/resnet152/resnet152.py b/modyn/models/resnet152/resnet152.py
@@ -26,6 +26,8 @@ def __init__(self, model_configuration: dict[str, Any]) -> None:
             # We need to initialize the model with the number of classees
             # in the pretrained weights
             model_configuration["num_classes"] = len(weights.meta["categories"])
+
+        if "use_pretrained" in model_configuration:  # no matter if True or False
             del model_configuration["use_pretrained"]  # don't want to forward this to torchvision
 
         super().__init__(Bottleneck, [3, 8, 36, 3], **model_configuration)  # type: ignore

diff --git a/modyn/models/resnet18/resnet18.py b/modyn/models/resnet18/resnet18.py
@@ -26,6 +26,8 @@ def __init__(self, model_configuration: dict[str, Any]) -> None:
             # We need to initialize the model with the number of classees
             # in the pretrained weights
             model_configuration["num_classes"] = len(weights.meta["categories"])
+
+        if "use_pretrained" in model_configuration:  # no matter if True or False
             del model_configuration["use_pretrained"]  # don't want to forward this to torchvision
 
         super().__init__(BasicBlock, [2, 2, 2, 2], **model_configuration)  # type: ignore

diff --git a/modyn/models/resnet50/resnet50.py b/modyn/models/resnet50/resnet50.py
@@ -26,6 +26,8 @@ def __init__(self, model_configuration: dict[str, Any]) -> None:
             # We need to initialize the model with the number of classees
             # in the pretrained weights
             model_configuration["num_classes"] = len(weights.meta["categories"])
+
+        if "use_pretrained" in model_configuration:  # no matter if True or False
             del model_configuration["use_pretrained"]  # don't want to forward this to torchvision
 
         super().__init__(Bottleneck, [3, 4, 6, 3], **model_configuration)  # type: ignore

diff --git a/modyn/models/smallyearbooknet/__init__.py b/modyn/models/smallyearbooknet/__init__.py
@@ -0,0 +1,9 @@
+"""
+Small CNN for Yearbook dataset
+"""
+
+import os
+
+files = os.listdir(os.path.dirname(__file__))
+files.remove("__init__.py")
+__all__ = [f[:-3] for f in files if f.endswith(".py")]
diff --git a/modyn/models/smallyearbooknet/smallyearbooknet.py b/modyn/models/smallyearbooknet/smallyearbooknet.py
@@ -0,0 +1,45 @@
+from typing import Any
+
+import torch
+from modyn.models.coreset_methods_support import CoresetSupportingModule
+from torch import nn
+
+
+class SmallYearbookNet:
+    """
+    Adapted from WildTime.
+    Here you can find the original implementation:
+    https://github.com/huaxiuyao/Wild-Time/blob/main/wildtime/networks/yearbook.py
+    Can be used for experiments on RHO-LOSS as the IL model.
+    """
+
+    # pylint: disable-next=unused-argument
+    def __init__(self, model_configuration: dict[str, Any], device: str, amp: bool) -> None:
+        self.model = SmallYearbookNetModel(**model_configuration)
+        self.model.to(device)
+
+
+class SmallYearbookNetModel(CoresetSupportingModule):
+    def __init__(self, num_input_channels: int, num_classes: int) -> None:
+        super().__init__()
+        self.enc = nn.Sequential(
+            self.conv_block(num_input_channels, 16),
+            self.conv_block(16, 16),
+            self.conv_block(16, 16),
+        )
+        self.hid_dim = 16
+        self.classifier = nn.Linear(16, num_classes)
+
+    def conv_block(self, in_channels: int, out_channels: int) -> nn.Module:
+        return nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, 3, padding=1), nn.BatchNorm2d(out_channels), nn.ReLU(), nn.MaxPool2d(2)
+        )
+
+    def forward(self, data: torch.Tensor) -> torch.Tensor:
+        data = self.enc(data)
+        data = torch.mean(data, dim=(2, 3))
+        data = self.embedding_recorder(data)
+        return self.classifier(data)
+
+    def get_last_layer(self) -> nn.Module:
+        return self.classifier
diff --git a/modyn/supervisor/internal/pipeline_executor/evaluation_executor.py b/modyn/supervisor/internal/pipeline_executor/evaluation_executor.py
@@ -103,14 +103,14 @@ def init_from_path(cls, pipeline_logdir: Path) -> "EvaluationExecutor":
         context = pickle.loads((snapshot_dir / "context.pcl").read_bytes())
 
         grpc_handler = GRPCHandler(eval_state_config.config.model_dump(by_alias=True))
-        grpc_handler.init_cluster_connection()
         executor = EvaluationExecutor(
             eval_state_config.pipeline_id,
             eval_state_config.eval_dir,
             eval_state_config.config,
             eval_state_config.pipeline,
             grpc_handler,
         )
+        executor.grpc.init_cluster_connection()
         executor.context = context
         return executor
 

diff --git a/modyn/supervisor/internal/pipeline_executor/pipeline_executor.py b/modyn/supervisor/internal/pipeline_executor/pipeline_executor.py
@@ -488,6 +488,8 @@ def _handle_triggers(
             s.triggers.append(trigger_index)
             s.current_sample_index += len(trigger_data)
 
+            self.logs.materialize(s.log_directory, mode="increment")  # materialize after every trigger
+
             if s.maximum_triggers is not None and len(s.triggers) >= s.maximum_triggers:
                 break
 

diff --git a/modyn/supervisor/internal/triggers/drift/alibi_detector.py b/modyn/supervisor/internal/triggers/drift/alibi_detector.py
@@ -41,8 +41,12 @@ def detect_drift(
     ) -> dict[str, MetricResult]:
         assert isinstance(embeddings_ref, (np.ndarray, torch.Tensor))
         assert isinstance(embeddings_cur, (np.ndarray, torch.Tensor))
-        embeddings_ref = embeddings_ref.numpy() if isinstance(embeddings_ref, torch.Tensor) else embeddings_ref
-        embeddings_cur = embeddings_cur.numpy() if isinstance(embeddings_cur, torch.Tensor) else embeddings_cur
+        embeddings_ref = (
+            embeddings_ref.detach().cpu().numpy() if isinstance(embeddings_ref, torch.Tensor) else embeddings_ref
+        )
+        embeddings_cur = (
+            embeddings_cur.detach().cpu().numpy() if isinstance(embeddings_cur, torch.Tensor) else embeddings_cur
+        )
 
         results: dict[str, MetricResult] = {}
 

diff --git a/modyn/supervisor/internal/triggers/embedding_encoder_utils/embedding_encoder_downloader.py b/modyn/supervisor/internal/triggers/embedding_encoder_utils/embedding_encoder_downloader.py
@@ -4,6 +4,7 @@
 
 import grpc
 from modyn.common.ftp import download_trained_model
+from modyn.config.schema.system.config import ModynConfig
 from modyn.metadata_database.metadata_database_connection import MetadataDatabaseConnection
 from modyn.metadata_database.models import TrainedModel
 
@@ -24,12 +25,13 @@
 
     def __init__(
         self,
-        modyn_config: dict,
+        modyn_config: ModynConfig,
         pipeline_id: int,
         base_dir: pathlib.Path,
         model_storage_address: str,
     ):
-        self.modyn_config = modyn_config
+        # TODO(MaxiBoether): Update this class to use the model
+        self.modyn_config = modyn_config.model_dump(by_alias=True)
         self.pipeline_id = pipeline_id
         self.base_dir = base_dir
         assert self.base_dir.exists(), f"Temporary Directory {self.base_dir} should have been created."

diff --git a/modyn/trainer_server/internal/dataset/extra_local_eval/binary_file_wrapper.py b/modyn/trainer_server/internal/dataset/extra_local_eval/binary_file_wrapper.py
@@ -1,11 +1,12 @@
 # pylint: skip-file
+# pragma: no cover
 
 """Binary file wrapper."""
 import os
 from typing import Literal
 
 
-class BinaryFileWrapper:
+class BinaryFileWrapper:  # pragma: no cover
     """Binary file wrapper.
 
     Binary files store raw sample data in a row-oriented format. One file can contain multiple samples.
@@ -15,7 +16,9 @@ class BinaryFileWrapper:
     offsetting the required number of bytes.
     """
 
-    def __init__(self, file_path: str, byteorder: Literal["little", "big"], record_size: int, label_size: int):
+    def __init__(
+        self, file_path: str, byteorder: Literal["little", "big"], record_size: int, label_size: int
+    ):  # pragma: no cover
         """Init binary file wrapper.
 
         Args:
@@ -40,15 +43,16 @@ def __init__(self, file_path: str, byteorder: Literal["little", "big"], record_s
         if self.file_size % self.record_size != 0:
             raise ValueError("File does not contain exact number of records of size " + str(self.record_size))
 
-    def get_number_of_samples(self) -> int:
+    def get_number_of_samples(self) -> int:  # pragma: no cover
         """Get number of samples in file.
 
         Returns:
             int: Number of samples in file
         """
         return int(self.file_size / self.record_size)
 
-    def get_all_labels(self) -> list[int]:
+    def get_all_labels(self) -> list[int]:  # pragma: no cover
+
         with open(self.file_path, "rb") as file:
             data = file.read()
 
@@ -61,7 +65,7 @@ def get_all_labels(self) -> list[int]:
         ]
         return labels
 
-    def get_sample(self, index: int) -> bytes:
+    def get_sample(self, index: int) -> bytes:  # pragma: no cover
         """Get the sample at the given index.
         The indices are zero based.
 
@@ -76,10 +80,12 @@ def get_sample(self, index: int) -> bytes:
         """
         return self.get_samples_from_indices([index])[0]
 
-    def get_samples(self, start: int, end: int) -> list[bytes]:
+    def get_samples(self, start: int, end: int) -> list[bytes]:  # pragma: no cover
+
         return self.get_samples_from_indices(list(range(start, end)))
 
-    def get_samples_from_indices(self, indices: list) -> list[bytes]:
+    def get_samples_from_indices(self, indices: list) -> list[bytes]:  # pragma: no cover
+
         with open(self.file_path, "rb") as file:
             data = file.read()