Skip to content

Commit

Permalink
ENH : Process XRefStm
Browse files Browse the repository at this point in the history
fixes py-pdf#1295
includes test file adjustment
  • Loading branch information
pubpub-zz committed Aug 28, 2022
1 parent 3b74312 commit 2dc76c0
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 4 deletions.
19 changes: 18 additions & 1 deletion PyPDF2/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1400,7 +1400,14 @@ def _read_standard_xref_table(self, stream: StreamType) -> None:
pass
else:
self.xref[generation][num] = offset
self.xref_free_entry[generation][num] = entry_type_b == b"f"
try:
self.xref_free_entry[generation][num] = entry_type_b == b"f"
except Exception:
pass
try:
self.xref_free_entry[65535][num] = entry_type_b == b"f"
except Exception:
pass
cnt += 1
num += 1
read_non_whitespace(stream)
Expand Down Expand Up @@ -1438,6 +1445,11 @@ def _read_xref_tables_and_trailers(
for key in trailer_keys:
if key in xrefstream and key not in self.trailer:
self.trailer[NameObject(key)] = xrefstream.raw_get(key)
if "/XRefStm" in xrefstream:
p = stream.tell()
stream.seek(int(xrefstream["/XRefStm"]) + 1, 0)
self._read_pdf15_xref_stream(stream)
stream.seek(p, 0)
if "/Prev" in xrefstream:
startxref = cast(int, xrefstream["/Prev"])
else:
Expand All @@ -1453,6 +1465,11 @@ def _read_xref(self, stream: StreamType) -> Optional[int]:
for key, value in new_trailer.items():
if key not in self.trailer:
self.trailer[key] = value
if "/XRefStm" in new_trailer:
p = stream.tell()
stream.seek(int(new_trailer["/XRefStm"]) + 1, 0)
self._read_pdf15_xref_stream(stream)
stream.seek(p, 0)
if "/Prev" in new_trailer:
startxref = new_trailer["/Prev"]
return startxref
Expand Down
2 changes: 1 addition & 1 deletion tests/test_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ def test_sweep_indirect_list_newobj_is_None(caplog):
merger.append(reader)
merger.write("tmp-merger-do-not-commit.pdf")
merger.close()
assert "Object 21 0 not defined." in caplog.text
# used to be: assert "Object 21 0 not defined." in caplog.text

reader2 = PdfReader("tmp-merger-do-not-commit.pdf")
reader2.pages
Expand Down
4 changes: 2 additions & 2 deletions tests/test_xmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,10 @@ def test_dc_subject():
def test_issue585():
url = "https://github.com/mstamy2/PyPDF2/files/5536984/test.pdf"
name = "mstamy2-5536984.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
with pytest.raises(PdfReadError) as exc:
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
reader.xmp_metadata
assert exc.value.args[0].startswith("XML in XmpInformation was invalid")
assert exc.value.args[0].startswith("Stream length not defined")


# def test_getter_bag():
Expand Down

0 comments on commit 2dc76c0

Please sign in to comment.