py-pdf · MartinThoma · Jul 27, 2022 · Jul 26, 2022 · Jul 26, 2022
diff --git a/PyPDF2/_page.py b/PyPDF2/_page.py
@@ -1133,9 +1133,11 @@ def _extract_text(
         if "/Font" in resources_dict:
             for f in cast(DictionaryObject, resources_dict["/Font"]):
                 cmaps[f] = build_char_map(f, space_width, obj)
-        cmap: Tuple[
-            Union[str, Dict[int, str]], Dict[str, str], str
-        ]  # (encoding,CMAP,font_name)
+        cmap: Tuple[Union[str, Dict[int, str]], Dict[str, str], str] = (
+            "charmap",
+            {},
+            "NotInitialized",
+        )  # (encoding,CMAP,font_name)
         try:
             content = (
                 obj[content_key].get_object() if isinstance(content_key, str) else obj
@@ -1211,10 +1213,28 @@ def process_operation(operator: bytes, operands: List) -> None:
             # table 4.7, page 219
             # cm_matrix calculation is a reserved for the moment
             elif operator == b"q":
-                cm_stack.append(cm_matrix)
+                cm_stack.append(
+                    (
+                        cm_matrix,
+                        cmap,
+                        font_size,
+                        char_scale,
+                        space_scale,
+                        _space_width,
+                        TL,
+                    )
+                )
             elif operator == b"Q":
                 try:
-                    cm_matrix = cm_stack.pop()
+                    (
+                        cm_matrix,
+                        cmap,
+                        font_size,
+                        char_scale,
+                        space_scale,
+                        _space_width,
+                        TL,
+                    ) = cm_stack.pop()
                 except Exception:
                     cm_matrix = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
             elif operator == b"cm":

diff --git a/tests/test_page.py b/tests/test_page.py
@@ -238,6 +238,15 @@ def test_extract_text_single_quote_op():
         page.extract_text()
 
 
+def test_iss_1142():
+    # check fix for problem of context save/restore (q/Q)
+    url = "https://github.com/py-pdf/PyPDF2/files/9150656/ST.2019.PDF"
+    name = "st2019.pdf"
+    reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
+    txt = reader.pages[3].extract_text()
+    assert txt.find("有限公司郑州分公司") > 0
+
+
 @pytest.mark.parametrize(
     ("url", "name"),
     [