Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Adjustments for triggering experiments #595

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion modyn/common/ftp/ftp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def delete_file(hostname: str, port: int, user: str, password: str, remote_file_
Returns:
"""
ftp = FTP()
ftp.connect(hostname, port, timeout=3)
ftp.connect(hostname, port, timeout=5 * 60)
ftp.login(user, password)
ftp.delete(str(remote_file_path))
ftp.close()
Expand Down
5 changes: 5 additions & 0 deletions modyn/evaluator/internal/metrics/f1_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,9 @@ def get_evaluation_result(self) -> float:
return float(np.average(f1_scores, weights=total_labels_per_class / total_samples))

def get_name(self) -> str:
if self.config.average == "binary":
# "F1-binary" cannot fully identify the metric
# we can have different F1 binary metrics with different positive labels
# Therefore the metric name should include the positive label.
return f"F1-{self.config.average}-{self.config.pos_label}"
return f"F1-{self.config.average}"
5 changes: 4 additions & 1 deletion modyn/evaluator/internal/metrics/roc_auc.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ def _dataset_evaluated_callback(self, y_true: torch.Tensor, y_pred: torch.Tensor
desc_score_indices = torch.argsort(y_pred, descending=True)
y_score = y_pred[desc_score_indices]
y_true = y_true[desc_score_indices]
distinct_value_indices = torch.nonzero(y_score[1:] - y_score[:-1], as_tuple=False).squeeze()
# we only need to squeeze the second dimension;
# otherwise if there is only one non-zero element in (y_score[1:] - y_score[:-1]),
# after squeezing it will become a scalar, which will cause an error in torch.cat
distinct_value_indices = torch.nonzero(y_score[1:] - y_score[:-1], as_tuple=False).squeeze(dim=1)
threshold_idxs = torch.cat([distinct_value_indices, torch.tensor([y_true.numel() - 1])])
tps = torch.cumsum(y_true, dim=0)[threshold_idxs]
fps = 1 + threshold_idxs - tps
Expand Down
8 changes: 8 additions & 0 deletions modyn/tests/evaluator/internal/metrics/test_roc_auc.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ def test_roc_auc_metric():
assert get_evaluation_result(y_true, y_score) == pytest.approx(2 / 3)


def test_roc_auc_with_two_entries():
y_true = np.array([0, 1])
y_score = np.array([0.1, 0.6])
# this is to test that we correctly squeeze the dimension in _dataset_evaluated_callback()
# we expect no exception
get_evaluation_result(y_true, y_score)


def test_roc_auc_invalid():
with pytest.raises(TypeError):
get_evaluation_result(np.array([1, 1, 1]), np.array([0.2, 0.3]))
Expand Down