From dd2d69a8d89a1370753f1418b3e0df9a7908d928 Mon Sep 17 00:00:00 2001 From: Harry Karvonen Date: Sat, 16 Jul 2022 07:53:39 +0300 Subject: [PATCH] BUG: Prevent deduplication of PageObject (#1105) Make sure that PageObject is not deduplicated if it is not exactly same page object. Adobe Reader/Acrobat doesn't like it if same page is referred more than one time. Closes #1102 Co-authored-by: Harry Karvonen --- PyPDF2/_page.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/PyPDF2/_page.py b/PyPDF2/_page.py index 54ca9982d..340ee8d15 100644 --- a/PyPDF2/_page.py +++ b/PyPDF2/_page.py @@ -244,6 +244,11 @@ def __init__( self.pdf: Optional[PdfReader] = pdf self.indirect_ref = indirect_ref + def hash_value_data(self) -> bytes: + data = super().hash_value_data() + data += b"%d" % id(self) + return data + @staticmethod def create_blank_page( pdf: Optional[Any] = None, # PdfReader @@ -1287,7 +1292,9 @@ def process_operation(operator: bytes, operands: List) -> None: ) if isinstance(cmap[0], str): try: - t = tt.decode(cmap[0], "surrogatepass") # apply str encoding + t = tt.decode( + cmap[0], "surrogatepass" + ) # apply str encoding except Exception: # the data does not match the expectation, we use the alternative ; text extraction may not be good t = tt.decode( "utf-16-be" if cmap[0] == "charmap" else "charmap",