Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support multiple scoring methods of UnifiedMetric #4

Merged
merged 2 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Add unit tests for XCOMET
  • Loading branch information
de9uch1 committed Jul 8, 2024
commit 930991936be0af4fd2ffccea1e924f032609b4cf
11 changes: 10 additions & 1 deletion mbrs/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest
import torch

from mbrs.metrics import MetricCOMET, MetricCOMETQE
from mbrs.metrics import MetricCOMET, MetricCOMETQE, MetricXCOMET


@pytest.fixture(scope="session")
Expand All @@ -11,3 +12,11 @@ def metric_comet():
@pytest.fixture(scope="session")
def metric_cometqe():
return MetricCOMETQE(MetricCOMETQE.Config())


@pytest.mark.skipif(
not torch.cuda.is_available(), reason="CUDA is not available on this machine."
)
@pytest.fixture(scope="session")
def metric_xcomet():
return MetricXCOMET(MetricXCOMET.Config())
73 changes: 72 additions & 1 deletion mbrs/metrics/xcomet_test.py
Original file line number Diff line number Diff line change
@@ -1 +1,72 @@
# TODO(deguchi): Add unit tests for XCOMET
import pytest
import torch

from .xcomet import MetricXCOMET

SOURCE = "これはテストです"
HYPOTHESES = [
"this is a test",
"another test",
"this is a fest",
"Producția de zahăr primă va fi exprimată în ceea ce privește zahărul alb;",
]
REFERENCES = [
"ref",
"this is a test",
"producţia de zahăr brut se exprimă în zahăr alb;",
]
SCORES = torch.Tensor(
[
[0.97671, 1.00000, 0.49054],
[0.94399, 0.99120, 0.43007],
[0.71786, 0.71210, 0.30775],
[0.21788, 0.22079, 0.61004],
]
)


@pytest.mark.skipif(
not torch.cuda.is_available(), reason="CUDA is not available on this machine."
)
class TestMetricXCOMET:
def test_score(self, metric_xcomet: MetricXCOMET):
for i, hyp in enumerate(HYPOTHESES):
for j, ref in enumerate(REFERENCES):
assert torch.isclose(
SCORES[i, j],
torch.tensor(metric_xcomet.score(hyp, ref, SOURCE)),
atol=0.0005 / 100,
)

def test_scores(self, metric_xcomet: MetricXCOMET):
hyps = ["another test", "this is a test", "this is an test"]
refs = ["another test", "this is a fest", "this is a test"]
src = SOURCE

torch.testing.assert_close(
metric_xcomet.scores(hyps, refs, src).cpu().float(),
torch.FloatTensor([1.00000, 0.90545, 1.00000]),
atol=0.0005 / 100,
rtol=1e-6,
)
torch.testing.assert_close(
metric_xcomet.scores(hyps, source=src).cpu().float(),
torch.FloatTensor([0.99120, 0.99120, 0.99120]),
atol=0.0005 / 100,
rtol=1e-6,
)
torch.testing.assert_close(
metric_xcomet.scores(hyps, references=refs).cpu().float(),
torch.FloatTensor([1.00000, 0.77420, 1.00000]),
atol=0.0005 / 100,
rtol=1e-6,
)

def test_expected_scores(self, metric_xcomet: MetricXCOMET):
expected_scores = metric_xcomet.expected_scores(HYPOTHESES, REFERENCES, SOURCE)
torch.testing.assert_close(
expected_scores,
SCORES.mean(dim=1).to(metric_xcomet.device),
atol=0.0005 / 100,
rtol=1e-6,
)