Skip to content

Commit

Permalink
ROB: Handle missing destinations in reader (#840)
Browse files Browse the repository at this point in the history
If a destination is missing, getDestinationPageNumber now returns -1
If `strict=False`, the first page is used as a fallback.

The code triggering the exception was

```python
from PyPDF2 import PdfFileReader

# https://github.com/mstamy2/PyPDF2/files/6045010/thyroid.pdf
with open("thyroid.pdf", "rb") as f:
   reader = PdfFileReader(f)
   bookmarks = pdf.getOutlines()
   for b in bookmarks:
       print(reader.getDestinationPageNumber(b) + 1)  # page count starts from 0
```

The error message was:
    PyPDF2.utils.PdfReadError: Unknown Destination Type: 0

Closes #604 
Closes #821
  • Loading branch information
pubpub-zz authored Apr 30, 2022
1 parent 5e86977 commit 3fe9e6e
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 1 deletion.
14 changes: 13 additions & 1 deletion PyPDF2/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,8 @@ def _getPageNumberByIndirect(self, indirectRef):
id2num[x.indirectRef.idnum] = i
self._pageId2Num = id2num

if isinstance(indirectRef, NullObject):
return -1
if isinstance(indirectRef, int):
idnum = indirectRef
else:
Expand Down Expand Up @@ -595,7 +597,17 @@ def getDestinationPageNumber(self, destination):
def _buildDestination(self, title, array):
page, typ = array[0:2]
array = array[2:]
return Destination(title, page, typ, *array)
try:
return Destination(title, page, typ, *array)
except PdfReadError:
warnings.warn("Unknown destination : " + title + " " + str(array))
if self.strict:
raise
else:
#create a link to first Page
return Destination(title, self.getPage(0).indirectRef,
TextStringObject("/Fit"))


def _buildOutline(self, node):
dest, title, outline = None, None, None
Expand Down
Binary file added Resources/issue-604.pdf
Binary file not shown.
36 changes: 36 additions & 0 deletions Tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,42 @@ def test_reader_properties():
assert reader.pageMode is None
assert reader.isEncrypted is False

@pytest.mark.parametrize(
"strict",
[(True), (False)],
)
def test_issue604(strict):
"""
Test with invalid destinations
"""
with open(os.path.join(RESOURCE_ROOT, "issue-604.pdf"), "rb") as f:
pdf = None
bookmarks = None
if strict:
with pytest.raises(PdfReadError) as exc:
pdf = PdfFileReader(f, strict=strict)
bookmarks = pdf.getOutlines()
if "Unknown Destination" not in exc.value.args[0]:
raise Exception("Expected exception not raised")
return # bookmarks not correct
else:
pdf = PdfFileReader(f, strict=strict)
bookmarks = pdf.getOutlines()

def getDestPages(x):
# print(x)
if isinstance(x,list):
r = [getDestPages(y) for y in x]
return r
else:
return pdf.getDestinationPageNumber(x) + 1

out = []
for (
b
) in bookmarks: # b can be destination or a list:preferred to just print them
out.append(getDestPages(b))
#print(out)

def test_decode_permissions():
reader = PdfFileReader(os.path.join(RESOURCE_ROOT, "crazyones.pdf"))
Expand Down

0 comments on commit 3fe9e6e

Please sign in to comment.