eth-easl · robinholzi · Sep 2, 2024 · Aug 18, 2024 · Aug 26, 2024 · Aug 26, 2024
diff --git a/modyn/common/ftp/ftp_utils.py b/modyn/common/ftp/ftp_utils.py
@@ -103,7 +103,7 @@ def delete_file(hostname: str, port: int, user: str, password: str, remote_file_
     Returns:
     """
     ftp = FTP()
-    ftp.connect(hostname, port, timeout=3)
+    ftp.connect(hostname, port, timeout=5 * 60)
     ftp.login(user, password)
     ftp.delete(str(remote_file_path))
     ftp.close()

diff --git a/modyn/evaluator/internal/metrics/f1_score.py b/modyn/evaluator/internal/metrics/f1_score.py
@@ -70,4 +70,9 @@ def get_evaluation_result(self) -> float:
         return float(np.average(f1_scores, weights=total_labels_per_class / total_samples))
 
     def get_name(self) -> str:
+        if self.config.average == "binary":
+            # "F1-binary" cannot fully identify the metric
+            # we can have different F1 binary metrics with different positive labels
+            # Therefore the metric name should include the positive label.
+            return f"F1-{self.config.average}-{self.config.pos_label}"
         return f"F1-{self.config.average}"
diff --git a/modyn/evaluator/internal/metrics/roc_auc.py b/modyn/evaluator/internal/metrics/roc_auc.py
@@ -33,7 +33,10 @@ def _dataset_evaluated_callback(self, y_true: torch.Tensor, y_pred: torch.Tensor
         desc_score_indices = torch.argsort(y_pred, descending=True)
         y_score = y_pred[desc_score_indices]
         y_true = y_true[desc_score_indices]
-        distinct_value_indices = torch.nonzero(y_score[1:] - y_score[:-1], as_tuple=False).squeeze()
+        # we only need to squeeze the second dimension;
+        # otherwise if there is only one non-zero element in (y_score[1:] - y_score[:-1]),
+        # after squeezing it will become a scalar, which will cause an error in torch.cat
+        distinct_value_indices = torch.nonzero(y_score[1:] - y_score[:-1], as_tuple=False).squeeze(dim=1)
         threshold_idxs = torch.cat([distinct_value_indices, torch.tensor([y_true.numel() - 1])])
         tps = torch.cumsum(y_true, dim=0)[threshold_idxs]
         fps = 1 + threshold_idxs - tps

diff --git a/modyn/tests/evaluator/internal/metrics/test_roc_auc.py b/modyn/tests/evaluator/internal/metrics/test_roc_auc.py
@@ -39,6 +39,14 @@ def test_roc_auc_metric():
     assert get_evaluation_result(y_true, y_score) == pytest.approx(2 / 3)
 
 
+def test_roc_auc_with_two_entries():
+    y_true = np.array([0, 1])
+    y_score = np.array([0.1, 0.6])
+    # this is to test that we correctly squeeze the dimension in _dataset_evaluated_callback()
+    # we expect no exception
+    get_evaluation_result(y_true, y_score)
+
+
 def test_roc_auc_invalid():
     with pytest.raises(TypeError):
         get_evaluation_result(np.array([1, 1, 1]), np.array([0.2, 0.3]))