fix extractive match to accept (A) (#746)

hynky1999 · web-flow · commit 74df0b1c04ab · 2025-05-19T17:57:29.000+02:00
diff --git a/src/lighteval/metrics/utils/extractive_match_utils.py b/src/lighteval/metrics/utils/extractive_match_utils.py
@@ -286,18 +286,22 @@ def lazy_indices_regex(
     translation_literal = TRANSLATION_LITERALS[language]
     # First get indices to predict
     indices = get_prefix(indices_config.prefix_for_extraction, translation_literal)[:len_choices]
-    indice_str_re = f"(?P<indices>{'|'.join([re.escape(i) for i in indices])})"
+    indices_escaped = [re.escape(i) for i in indices]
+    # We allow both (A) and A
+    indices_wrapped = [rf"(?:{i}|\({i}\))" for i in indices_escaped]
+    indice_str_re = f"(?P<indices>{'|'.join(indices_wrapped)})"
 
     # The answer keys are either surrounded with <space>**answer**., or '<space>answer.' or the same without the dot
     full_stop_re = rf"[{re.escape(translation_literal.full_stop)}\.]"
     comma_re = rf"[{re.escape(translation_literal.comma)}\,]"
     colon_re = rf"[{re.escape(translation_literal.colon)}\:]"
     space_re = re.escape(translation_literal.sentence_space)
 
-    answer_prefix_re = rf"(^|{space_re})(?:\*\*)?"
+    answer_prefix_re = rf"(?:^|{space_re})(?:\*\*)?"
     answer_suffix_re = rf"(?:\*\*)?(?:{full_stop_re}|{comma_re}|{colon_re}|{space_re}|$)"
     answer_re = f"{answer_prefix_re}{indice_str_re}{answer_suffix_re}"
     answer_re_start = rf"^(?:\*\*)?{indice_str_re}{answer_suffix_re}"
+    answer_re_line_start = rf"\n(?:\*\*)?{indice_str_re}{answer_suffix_re}"
 
     answer_word = f"(?i:{translation_literal.answer})"
 
@@ -320,8 +324,10 @@ def lazy_indices_regex(
             (f"{answer_word}{colon_re}.{{0,50}}?{answer_re}", 100),
             # Answer word patterns
             (f"{answer_word}.{{0,50}}?{answer_re}", 150),
-            # Start of line patterns
+            # Start of the string
             (answer_re_start, 200),
+            # Start of the line
+            (answer_re_line_start, 210),
         ]
     )
 
@@ -490,6 +496,15 @@ def extract_latex(
     return latex_exprs[0], latex_strs[0]
 
 
+def extract_indices(
+    match: re.Match, target_type: IndicesExtractionConfig, timeout_seconds: int
+) -> tuple[str | None, str]:
+    def normalize_index(index: str) -> str:
+        return index.replace("(", "").replace(")", "").strip()
+
+    return normalize_index(match.group("indices")), normalize_index(match.group("indices"))
+
+
 def extract_match(
     match: re.Match, target_type: ExtractionTarget, timeout_seconds: int
 ) -> tuple[Basic | MatrixBase | str | None, str]:
@@ -510,7 +525,7 @@ def extract_match(
     elif isinstance(target_type, ExprExtractionConfig):
         return extract_expr(match, timeout_seconds=timeout_seconds)
     elif isinstance(target_type, IndicesExtractionConfig):
-        return match.group("indices"), match.group("indices")
+        return extract_indices(match, target_type, timeout_seconds=timeout_seconds)
 
 
 def extract_target_from_pred(
diff --git a/tests/metrics/test_extractive_match.py b/tests/metrics/test_extractive_match.py
@@ -83,6 +83,12 @@ def compare_strings(
         ("D", "The answer: D, doesn't makese nsense for answer to be A or B", 1),
         # Test minimal answer format
         ("D", "D. it can't be A or B", 1),
+        ("(D) Alina", "D", 1),
+        ("(A) Cecile", "C", 0),
+        ("C Cecile", "C", 1),
+        ("Alina and the answer is\n(C) Cecile", "C", 1),
+        ("Alina and the answer is\nC Cecile", "C", 1),
+        ("A Peter\nCelina bum", "A", 1),
     ],
 )
 def test_extraction_abc(gold, pred, expected):