Skip to content

Commit c9bd88c

Browse files
authored
Generalizing tests for smarter LLMs 2 (#1154)
1 parent b62e286 commit c9bd88c

File tree

4 files changed

+36
-17
lines changed

4 files changed

+36
-17
lines changed

packages/paper-qa-docling/tests/test_paperqa_docling.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from lmi.utils import bytes_to_string
1111
from paperqa import Doc, Docs
1212
from paperqa.readers import PDFParserFn, chunk_pdf
13-
from paperqa.utils import ImpossibleParsingError
13+
from paperqa.utils import ImpossibleParsingError, get_citation_ids
1414

1515
from paperqa_docling import parse_pdf_to_pages
1616

@@ -87,8 +87,8 @@ async def test_parse_pdf_to_pages() -> None:
8787
("What actions can the Crawler take?", [(("search", "expand", "stop"), 2)]),
8888
("What actions can the Selector take?", [(("select", "drop"), 2)]),
8989
(
90-
"How many User Query are there, and what do they do?",
91-
[(("two", "2"), 2), (("crawler", "selector"), 2)],
90+
"How many User Query blue boxes are there, and what are they connected to?",
91+
[(("two", "2"), 1), (("crawler", "selector"), 2)],
9292
),
9393
):
9494
session = await docs.aquery(query=query)
@@ -97,12 +97,17 @@ async def test_parse_pdf_to_pages() -> None:
9797
c.text.text == fig_1_text.text and c.text.media == fig_1_text.media
9898
for c in session.contexts
9999
), "Expected context to reuse Figure 1's text and media"
100+
# Remove citations so numeric assertions don't have false positives
101+
raw_answer_no_citations = session.raw_answer
102+
for key in get_citation_ids(session.raw_answer):
103+
raw_answer_no_citations = raw_answer_no_citations.replace(f"({key})", "")
100104
for substrings, min_count in cast(
101105
list[tuple[tuple[str, ...], int]], substrings_min_counts
102106
):
103107
assert (
104-
sum(x in session.answer.lower() for x in substrings) >= min_count
105-
), f"Expected {session.answer=} to have at {substrings} present"
108+
sum(x in raw_answer_no_citations.lower() for x in substrings)
109+
>= min_count
110+
), f"Expected {raw_answer_no_citations=} to have {substrings} present"
106111

107112
# Check the no-media behavior
108113
parsed_text_no_media = parse_pdf_to_pages(filepath, parse_media=False)

packages/paper-qa-pymupdf/tests/test_paperqa_pymupdf.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from lmi.utils import bytes_to_string
1010
from paperqa import Doc, Docs, Settings
1111
from paperqa.readers import PDFParserFn, chunk_pdf
12-
from paperqa.utils import ImpossibleParsingError
12+
from paperqa.utils import ImpossibleParsingError, get_citation_ids
1313

1414
from paperqa_pymupdf import parse_pdf_to_pages
1515

@@ -82,8 +82,8 @@ async def test_parse_pdf_to_pages() -> None:
8282
("What actions can the Crawler take?", [(("search", "expand", "stop"), 2)]),
8383
("What actions can the Selector take?", [(("select", "drop"), 2)]),
8484
(
85-
"How many User Query are there, and what do they do?",
86-
[(("two", "2"), 2), (("crawler", "selector"), 2)],
85+
"How many User Query blue boxes are there, and what are they connected to?",
86+
[(("two", "2"), 1), (("crawler", "selector"), 2)],
8787
),
8888
):
8989
session = await docs.aquery(query=query)
@@ -92,12 +92,17 @@ async def test_parse_pdf_to_pages() -> None:
9292
c.text.text == fig_1_text.text and c.text.media == fig_1_text.media
9393
for c in session.contexts
9494
), "Expected context to reuse Figure 1's text and media"
95+
# Remove citations so numeric assertions don't have false positives
96+
raw_answer_no_citations = session.raw_answer
97+
for key in get_citation_ids(session.raw_answer):
98+
raw_answer_no_citations = raw_answer_no_citations.replace(f"({key})", "")
9599
for substrings, min_count in cast(
96100
list[tuple[tuple[str, ...], int]], substrings_min_counts
97101
):
98102
assert (
99-
sum(x in session.answer.lower() for x in substrings) >= min_count
100-
), f"Expected {session.answer=} to have at {substrings} present"
103+
sum(x in raw_answer_no_citations.lower() for x in substrings)
104+
>= min_count
105+
), f"Expected {raw_answer_no_citations=} to have {substrings} present"
101106

102107
# Let's check the full page parsing behavior
103108
parsed_text_full_page = parse_pdf_to_pages(filepath, full_page=True)

packages/paper-qa-pypdf/tests/test_paperqa_pypdf.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from lmi.utils import bytes_to_string
1010
from paperqa import Doc, Docs
1111
from paperqa.readers import PDFParserFn, chunk_pdf
12-
from paperqa.utils import ImpossibleParsingError
12+
from paperqa.utils import ImpossibleParsingError, get_citation_ids
1313

1414
from paperqa_pypdf import parse_pdf_to_pages
1515

@@ -85,8 +85,8 @@ async def test_parse_pdf_to_pages() -> None:
8585
("What actions can the Crawler take?", [(("search", "expand", "stop"), 2)]),
8686
("What actions can the Selector take?", [(("select", "drop"), 2)]),
8787
(
88-
"How many User Query are there, and what do they do?",
89-
[(("two", "2"), 2), (("crawler", "selector"), 2)],
88+
"How many User Query blue boxes are there, and what are they connected to?",
89+
[(("two", "2"), 1), (("crawler", "selector"), 2)],
9090
),
9191
):
9292
session = await docs.aquery(query=query)
@@ -95,12 +95,17 @@ async def test_parse_pdf_to_pages() -> None:
9595
c.text.text == fig_1_text.text and c.text.media == fig_1_text.media
9696
for c in session.contexts
9797
), "Expected context to reuse Figure 1's text and media"
98+
# Remove citations so numeric assertions don't have false positives
99+
raw_answer_no_citations = session.raw_answer
100+
for key in get_citation_ids(session.raw_answer):
101+
raw_answer_no_citations = raw_answer_no_citations.replace(f"({key})", "")
98102
for substrings, min_count in cast(
99103
list[tuple[tuple[str, ...], int]], substrings_min_counts
100104
):
101105
assert (
102-
sum(x in session.answer.lower() for x in substrings) >= min_count
103-
), f"Expected {session.answer=} to have at {substrings} present"
106+
sum(x in raw_answer_no_citations.lower() for x in substrings)
107+
>= min_count
108+
), f"Expected {raw_answer_no_citations=} to have {substrings} present"
104109

105110
# Check the no-media behavior
106111
parsed_text_no_media = parse_pdf_to_pages(filepath, parse_media=False)

tests/test_paperqa.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1732,8 +1732,12 @@ async def test_images_corrupt(stub_data_dir: Path, caplog) -> None:
17321732
"What districts neighbor the Western Addition?", settings=settings
17331733
)
17341734
assert not session.contexts, "Expected no contexts to be made from a bad image."
1735-
assert (
1736-
"unsupported image" in caplog.text
1735+
assert any(
1736+
x in caplog.text.lower()
1737+
for x in (
1738+
"unsupported image", # OpenAI
1739+
"could not process image", # Anthropic
1740+
)
17371741
), "Expected a caught exception about an unsupported image."
17381742

17391743
# By suppressing the use of images, we can actually gather evidence now

0 commit comments

Comments
 (0)