Skip to content

Commit 74df0b1

Browse files
authored
fix extractive match to accept (A) (#746)
1 parent c7f4ae9 commit 74df0b1

File tree

2 files changed

+25
-4
lines changed

2 files changed

+25
-4
lines changed

src/lighteval/metrics/utils/extractive_match_utils.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -286,18 +286,22 @@ def lazy_indices_regex(
286286
translation_literal = TRANSLATION_LITERALS[language]
287287
# First get indices to predict
288288
indices = get_prefix(indices_config.prefix_for_extraction, translation_literal)[:len_choices]
289-
indice_str_re = f"(?P<indices>{'|'.join([re.escape(i) for i in indices])})"
289+
indices_escaped = [re.escape(i) for i in indices]
290+
# We allow both (A) and A
291+
indices_wrapped = [rf"(?:{i}|\({i}\))" for i in indices_escaped]
292+
indice_str_re = f"(?P<indices>{'|'.join(indices_wrapped)})"
290293

291294
# The answer keys are either surrounded with <space>**answer**., or '<space>answer.' or the same without the dot
292295
full_stop_re = rf"[{re.escape(translation_literal.full_stop)}\.]"
293296
comma_re = rf"[{re.escape(translation_literal.comma)}\,]"
294297
colon_re = rf"[{re.escape(translation_literal.colon)}\:]"
295298
space_re = re.escape(translation_literal.sentence_space)
296299

297-
answer_prefix_re = rf"(^|{space_re})(?:\*\*)?"
300+
answer_prefix_re = rf"(?:^|{space_re})(?:\*\*)?"
298301
answer_suffix_re = rf"(?:\*\*)?(?:{full_stop_re}|{comma_re}|{colon_re}|{space_re}|$)"
299302
answer_re = f"{answer_prefix_re}{indice_str_re}{answer_suffix_re}"
300303
answer_re_start = rf"^(?:\*\*)?{indice_str_re}{answer_suffix_re}"
304+
answer_re_line_start = rf"\n(?:\*\*)?{indice_str_re}{answer_suffix_re}"
301305

302306
answer_word = f"(?i:{translation_literal.answer})"
303307

@@ -320,8 +324,10 @@ def lazy_indices_regex(
320324
(f"{answer_word}{colon_re}.{{0,50}}?{answer_re}", 100),
321325
# Answer word patterns
322326
(f"{answer_word}.{{0,50}}?{answer_re}", 150),
323-
# Start of line patterns
327+
# Start of the string
324328
(answer_re_start, 200),
329+
# Start of the line
330+
(answer_re_line_start, 210),
325331
]
326332
)
327333

@@ -490,6 +496,15 @@ def extract_latex(
490496
return latex_exprs[0], latex_strs[0]
491497

492498

499+
def extract_indices(
500+
match: re.Match, target_type: IndicesExtractionConfig, timeout_seconds: int
501+
) -> tuple[str | None, str]:
502+
def normalize_index(index: str) -> str:
503+
return index.replace("(", "").replace(")", "").strip()
504+
505+
return normalize_index(match.group("indices")), normalize_index(match.group("indices"))
506+
507+
493508
def extract_match(
494509
match: re.Match, target_type: ExtractionTarget, timeout_seconds: int
495510
) -> tuple[Basic | MatrixBase | str | None, str]:
@@ -510,7 +525,7 @@ def extract_match(
510525
elif isinstance(target_type, ExprExtractionConfig):
511526
return extract_expr(match, timeout_seconds=timeout_seconds)
512527
elif isinstance(target_type, IndicesExtractionConfig):
513-
return match.group("indices"), match.group("indices")
528+
return extract_indices(match, target_type, timeout_seconds=timeout_seconds)
514529

515530

516531
def extract_target_from_pred(

tests/metrics/test_extractive_match.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,12 @@ def compare_strings(
8383
("D", "The answer: D, doesn't makese nsense for answer to be A or B", 1),
8484
# Test minimal answer format
8585
("D", "D. it can't be A or B", 1),
86+
("(D) Alina", "D", 1),
87+
("(A) Cecile", "C", 0),
88+
("C Cecile", "C", 1),
89+
("Alina and the answer is\n(C) Cecile", "C", 1),
90+
("Alina and the answer is\nC Cecile", "C", 1),
91+
("A Peter\nCelina bum", "A", 1),
8692
],
8793
)
8894
def test_extraction_abc(gold, pred, expected):

0 commit comments

Comments
 (0)