BUG: Prevent deduplication of PageObject (#1105)

Make sure that PageObject is not deduplicated if it is not exactly same page object. Adobe Reader/Acrobat doesn't like it if same page is referred more than one time. Closes #1102 Co-authored-by: Harry Karvonen <harry.karvonen@onebyte.fi>
py-pdf · Jul 16, 2022 · dd2d69a · dd2d69a
1 parent 9bbe827
commit dd2d69a
Showing 1 changed file with 8 additions and 1 deletion.
diff --git a/PyPDF2/_page.py b/PyPDF2/_page.py
@@ -244,6 +244,11 @@ def __init__(
         self.pdf: Optional[PdfReader] = pdf
         self.indirect_ref = indirect_ref
 
+    def hash_value_data(self) -> bytes:
+        data = super().hash_value_data()
+        data += b"%d" % id(self)
+        return data
+
     @staticmethod
     def create_blank_page(
         pdf: Optional[Any] = None,  # PdfReader
@@ -1287,7 +1292,9 @@ def process_operation(operator: bytes, operands: List) -> None:
                     )
                     if isinstance(cmap[0], str):
                         try:
-                            t = tt.decode(cmap[0], "surrogatepass")  # apply str encoding
+                            t = tt.decode(
+                                cmap[0], "surrogatepass"
+                            )  # apply str encoding
                         except Exception:  # the data does not match the expectation, we use the alternative ; text extraction may not be good
                             t = tt.decode(
                                 "utf-16-be" if cmap[0] == "charmap" else "charmap",