metrics (PaddlePaddle#5265)

sijunhe · web-flow · commit 74c2a4eb19c7 · 2023-03-16T19:18:04.000+08:00
diff --git a/tests/metrics/test_bleu.py b/tests/metrics/test_bleu.py
@@ -20,6 +20,7 @@
 class TestBLEU(unittest.TestCase):
     def test_metrics(self):
         bleu = BLEU()
+        bleu.reset()
         cand = ["The", "cat", "The", "cat", "on", "the", "mat"]
         ref_list = [["The", "cat", "is", "on", "the", "mat"], ["There", "is", "a", "cat", "on", "the", "mat"]]
         bleu.add_inst(cand, ref_list)
diff --git a/tests/metrics/test_chunk.py b/tests/metrics/test_chunk.py
@@ -14,19 +14,24 @@
 
 import unittest
 
+import paddle
+
 from paddlenlp.metrics import ChunkEvaluator
 
 
 class TestChunk(unittest.TestCase):
     def test_metrics(self):
-        num_infer_chunks = 10
-        num_label_chunks = 9
-        num_correct_chunks = 8
-
-        label_list = [1, 1, 0, 0, 1, 0, 1]
+        label_list = ["O", "B-Person", "I-Person"]
         evaluator = ChunkEvaluator(label_list)
-        evaluator.update(num_infer_chunks, num_label_chunks, num_correct_chunks)
+        evaluator.reset()
+        lengths = paddle.to_tensor([5])
+        predictions = paddle.to_tensor([[0, 1, 2, 1, 2]])
+        labels = paddle.to_tensor([[0, 1, 2, 1, 1]])
+        num_infer_chunks, num_label_chunks, num_correct_chunks = evaluator.compute(
+            lengths=lengths, predictions=predictions, labels=labels
+        )
+        evaluator.update(num_infer_chunks.numpy(), num_label_chunks.numpy(), num_correct_chunks.numpy())
         precision, recall, f1 = evaluator.accumulate()
-        self.assertEqual(precision, 0.8)
-        self.assertEqual(recall, 0.8888888888888888)
-        self.assertEqual(f1, 0.8421052631578948)
+        self.assertEqual(precision, 0.5)
+        self.assertEqual(recall, 0.3333333333333333)
+        self.assertEqual(f1, 0.4)
diff --git a/tests/metrics/test_rouge.py b/tests/metrics/test_rouge.py
@@ -20,12 +20,14 @@
 class TestRouge(unittest.TestCase):
     def test_rogue1(self):
         rouge1 = Rouge1()
+        rouge1.reset()
         cand = ["The", "cat", "The", "cat", "on", "the", "mat"]
         ref_list = [["The", "cat", "is", "on", "the", "mat"], ["There", "is", "a", "cat", "on", "the", "mat"]]
         self.assertEqual(rouge1.score(cand, ref_list), 0.07692307692307693)
 
     def test_roguel(self):
         rougel = RougeL()
+        rougel.reset()
         cand = ["The", "cat", "The", "cat", "on", "the", "mat"]
         ref_list = [["The", "cat", "is", "on", "the", "mat"], ["There", "is", "a", "cat", "on", "the", "mat"]]
         rougel.add_inst(cand, ref_list)
diff --git a/tests/metrics/test_span.py b/tests/metrics/test_span.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import paddle
+
+from paddlenlp.metrics import SpanEvaluator
+
+
+class TestSpanEvaluator(unittest.TestCase):
+    def test_metrics(self):
+        metric = SpanEvaluator()
+        metric.reset()
+        start_prob = paddle.to_tensor([[0.1, 0.1, 0.6, 0.2], [0.0, 0.9, 0.1, 0.0]])
+        end_prob = paddle.to_tensor([[0.1, 0.1, 0.2, 0.6], [0.0, 0.9, 0.1, 0.0]])
+        start_ids = paddle.to_tensor([[0, 0, 1, 0], [0, 0, 1, 0]])
+        end_ids = paddle.to_tensor([[0, 0, 0, 1], [0, 0, 1, 0]])
+        num_correct, num_infer, num_label = metric.compute(start_prob, end_prob, start_ids, end_ids)
+        metric.update(num_correct, num_infer, num_label)
+        precision, recall, f1 = metric.accumulate()
+        self.assertEqual(precision, 0.5)
+        self.assertEqual(recall, 0.5)
+        self.assertEqual(f1, 0.5)