From 2e69836e7795b1ed8c5772fe9bcf248f9b9e0671 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Fri, 27 Sep 2024 19:19:41 +0200 Subject: [PATCH] BUG: Cope with unbalanced delimiters in dictionary object (#2878) Closes #2877. --- pypdf/generic/_data_structures.py | 2 ++ tests/test_reader.py | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 2a004c15b..58a3477fa 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -570,6 +570,8 @@ def read_unsized_from_stream( try: try: key = read_object(stream, pdf) + if isinstance(key, NullObject): + break if not isinstance(key, NameObject): raise PdfReadError( f"Expecting a NameObject for key but found {key!r}" diff --git a/tests/test_reader.py b/tests/test_reader.py index 9fb898ab0..30da20adb 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -1703,3 +1703,12 @@ def test_space_in_names_to_continue_processing(caplog): reader = PdfReader(BytesIO(b), strict=True) with pytest.raises(PdfReadError): obj = reader.get_object(70) + + +@pytest.mark.enable_socket() +def test_unbalanced_brackets_in_dictionary_object(caplog): + """Cf #2877""" + url = "https://github.com/user-attachments/files/17162634/7f40cb209fb97d1782bffcefc5e7be40.pdf" + name = "iss2877.pdf" # reused + reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) + assert len(reader.pages) == 43 # note: /Count = 46 but 3 kids are None