From 5ddf4cb32505cb034496ac4be13747a61fb6ce46 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 17 Jul 2022 09:36:49 +0200 Subject: [PATCH] TST: Add MCVE showing outline title issue (#1123) See #1121 --- sample-files | 2 +- tests/test_reader.py | 62 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/sample-files b/sample-files index 6da0fbb53..31763905b 160000 --- a/sample-files +++ b/sample-files @@ -1 +1 @@ -Subproject commit 6da0fbb53f11bd5b8a4acf06e4d26e5e2bf5bf57 +Subproject commit 31763905b4a06014cbd23d2e03b7b5616661fed5 diff --git a/tests/test_reader.py b/tests/test_reader.py index 56bc2a70b..06d1cfdfb 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -17,6 +17,7 @@ PdfReadWarning, ) from PyPDF2.filters import _xobj_to_image +from PyPDF2.generic import Destination from . import get_pdf_from_url @@ -30,6 +31,7 @@ TESTS_ROOT = os.path.abspath(os.path.dirname(__file__)) PROJECT_ROOT = os.path.dirname(TESTS_ROOT) RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "resources") +EXTERNAL_ROOT = Path(PROJECT_ROOT) / "sample-files" @pytest.mark.parametrize( @@ -872,3 +874,63 @@ def test_outline_font_format(): name = "tika-924546.pdf" reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) assert reader.outlines[0].font_format == 2 + + +@pytest.mark.xfail(reason="#1121") +def test_outline_title_issue_1121(): + reader = PdfReader(EXTERNAL_ROOT / "014-outlines/mistitled_outlines_example.pdf") + + def get_titles_only(outlines, results=None): + if results is None: + results = [] + if isinstance(outlines, list): + for outline in outlines: + if isinstance(outline, Destination): + results.append(outline.title) + else: + results.append(get_titles_only(outline)) + else: + raise ValueError(f"got {type(outlines)}") + return results + + assert get_titles_only(reader.outlines) == [ + "First", + [ + "Second", + "Third", + "Fourth", + [ + "Fifth", + "Sixth", + ], + "Seventh", + [ + "Eighth", + "Ninth", + ], + ], + "Tenth", + [ + "Eleventh", + "Twelfth", + "Thirteenth", + "Fourteenth", + ], + "Fifteenth", + [ + "Sixteenth", + "Seventeenth", + ], + "Eighteenth", + "Nineteenth", + [ + "Twentieth", + "Twenty-first", + "Twenty-second", + "Twenty-third", + "Twenty-fourth", + "Twenty-fifth", + "Twenty-sixth", + "Twenty-seventh", + ], + ]