py-pdf
diff --git a/‎PyPDF2/_cmap.py‎
Lines changed: 3 additions & 2 deletions b/‎PyPDF2/_cmap.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎PyPDF2/_page.py‎
Lines changed: 4 additions & 3 deletions b/‎PyPDF2/_page.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎PyPDF2/_reader.py‎
Lines changed: 21 additions & 20 deletions b/‎PyPDF2/_reader.py‎
Lines changed: 21 additions & 20 deletions
diff --git a/‎PyPDF2/_writer.py‎
Lines changed: 12 additions & 10 deletions b/‎PyPDF2/_writer.py‎
Lines changed: 12 additions & 10 deletions
diff --git a/‎PyPDF2/generic.py‎
Lines changed: 2 additions & 8 deletions b/‎PyPDF2/generic.py‎
Lines changed: 2 additions & 8 deletions
diff --git a/‎docs/user/suppress-warnings.md‎
Lines changed: 14 additions & 6 deletions b/‎docs/user/suppress-warnings.md‎
Lines changed: 14 additions & 6 deletions
diff --git a/‎tests/__init__.py‎
Lines changed: 20 additions & 0 deletions b/‎tests/__init__.py‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎tests/bench.py‎
Lines changed: 0 additions & 3 deletions b/‎tests/bench.py‎
Lines changed: 0 additions & 3 deletions
@@ -3,6 +3,7 @@
 from typing import Any, Dict, List, Tuple, Union, cast
 
 from ._codecs import adobe_glyphs, charset_encoding
+from ._utils import logger_warning
 from .errors import PdfReadWarning
 from .generic import DecodedStreamObject, DictionaryObject
 
@@ -330,9 +331,9 @@ def compute_space_width(
                     st += 1
                 w = w[2:]
             else:
-                warnings.warn(
+                logger_warning(
                     "unknown widths : \n" + (ft1["/W"]).__repr__(),
-                    PdfReadWarning,
+                    __name__,
                 )
                 break
         try:
 
@@ -51,11 +51,12 @@
     TransformationMatrixType,
     deprecate_no_replacement,
     deprecate_with_replacement,
+    logger_warning,
     matrix_multiply,
 )
 from .constants import PageAttributes as PG
 from .constants import Ressources as RES
-from .errors import PageSizeNotDefinedError, PdfReadWarning
+from .errors import PageSizeNotDefinedError
 from .generic import (
     ArrayObject,
     ContentStream,
@@ -1430,9 +1431,9 @@ def process_operation(operator: bytes, operands: List) -> None:
                         text = self.extract_xform_text(xobj[operands[0]], orientations, space_width)  # type: ignore
                         output += text
                 except Exception:
-                    warnings.warn(
+                    logger_warning(
                         f" impossible to decode XFormObject {operands[0]}",
-                        PdfReadWarning,
+                        __name__,
                     )
                 finally:
                     text = ""
 
@@ -30,7 +30,6 @@
 import os
 import re
 import struct
-import warnings
 import zlib
 from io import BytesIO
 from pathlib import Path
@@ -54,6 +53,7 @@
     b_,
     deprecate_no_replacement,
     deprecate_with_replacement,
+    logger_warning,
     read_non_whitespace,
     read_previous_line,
     read_until_whitespace,
@@ -70,7 +70,7 @@
 from .constants import PageAttributes as PG
 from .constants import PagesAttributes as PA
 from .constants import TrailerKeys as TK
-from .errors import PdfReadError, PdfReadWarning, PdfStreamError
+from .errors import PdfReadError, PdfStreamError
 from .generic import (
     ArrayObject,
     ContentStream,
@@ -258,10 +258,10 @@ def __init__(
             Dict[Any, Any]
         ] = None  # map page indirect_ref number to Page Number
         if hasattr(stream, "mode") and "b" not in stream.mode:  # type: ignore
-            warnings.warn(
+            logger_warning(
                 "PdfReader stream/file object is not in binary mode. "
                 "It may not be read correctly.",
-                PdfReadWarning,
+                __name__,
             )
         if isinstance(stream, (str, Path)):
             with open(stream, "rb") as fh:
@@ -836,7 +836,7 @@ def _build_destination(
             try:
                 return Destination(title, page, typ, *array)  # type: ignore
             except PdfReadError:
-                warnings.warn(f"Unknown destination: {title} {array}", PdfReadWarning)
+                logger_warning(f"Unknown destination: {title} {array}", __name__)
                 if self.strict:
                     raise
                 # create a link to first Page
@@ -1091,11 +1091,11 @@ def _get_object_from_stream(
             except PdfStreamError as exc:
                 # Stream object cannot be read. Normally, a critical error, but
                 # Adobe Reader doesn't complain, so continue (in strict mode?)
-                warnings.warn(
+                logger_warning(
                     f"Invalid stream (index {i}) within object "
                     f"{indirect_reference.idnum} {indirect_reference.generation}: "
                     f"{exc}",
-                    PdfReadWarning,
+                    __name__,
                 )
 
                 if self.strict:
@@ -1162,10 +1162,10 @@ def get_object(self, indirect_reference: IndirectObject) -> Optional[PdfObject]:
                     retval, indirect_reference.idnum, indirect_reference.generation
                 )
         else:
-            warnings.warn(
+            logger_warning(
                 f"Object {indirect_reference.idnum} {indirect_reference.generation} "
                 "not defined.",
-                PdfReadWarning,
+                __name__,
             )
             if self.strict:
                 raise PdfReadError("Could not find object.")
@@ -1207,9 +1207,9 @@ def read_object_header(self, stream: StreamType) -> Tuple[int, int]:
         read_non_whitespace(stream)
         stream.seek(-1, 1)
         if extra and self.strict:
-            warnings.warn(
+            logger_warning(
                 f"Superfluous whitespace found in object header {idnum} {generation}",  # type: ignore
-                PdfReadWarning,
+                __name__,
             )
         return int(idnum), int(generation)
 
@@ -1250,7 +1250,7 @@ def cache_indirect_object(
             if self.strict:
                 raise PdfReadError(msg)
             else:
-                warnings.warn(msg)
+                logger_warning(msg, __name__)
         self.resolved_objects[(generation, idnum)] = obj
         return obj
 
@@ -1276,8 +1276,8 @@ def read(self, stream: StreamType) -> None:
             if self.strict and xref_issue_nr:
                 raise PdfReadError("Broken xref table")
             else:
-                warnings.warn(
-                    f"incorrect startxref pointer({xref_issue_nr})", PdfReadWarning
+                logger_warning(
+                    f"incorrect startxref pointer({xref_issue_nr})", __name__
                 )
 
         # read all cross reference tables and their trailers
@@ -1335,7 +1335,7 @@ def _find_startxref_pos(self, stream: StreamType) -> int:
             if not line.startswith(b"startxref"):
                 raise PdfReadError("startxref not found")
             startxref = int(line[9:].strip())
-            warnings.warn("startxref on same line as offset", PdfReadWarning)
+            logger_warning("startxref on same line as offset", __name__)
         else:
             line = read_previous_line(stream)
             if line[:9] != b"startxref":
@@ -1355,9 +1355,9 @@ def _read_standard_xref_table(self, stream: StreamType) -> None:
             if firsttime and num != 0:
                 self.xref_index = num
                 if self.strict:
-                    warnings.warn(
+                    logger_warning(
                         "Xref table not zero-indexed. ID numbers for objects will be corrected.",
-                        PdfReadWarning,
+                        __name__,
                     )
                     # if table not zero indexed, could be due to error from when PDF was created
                     # which will lead to mismatched indices later on, only warned and corrected if self.strict==True
@@ -1474,9 +1474,10 @@ def _read_xref_other_error(
                     "/Prev=0 in the trailer (try opening with strict=False)"
                 )
             else:
-                warnings.warn(
+                logger_warning(
                     "/Prev=0 in the trailer - assuming there"
-                    " is no previous xref table"
+                    " is no previous xref table",
+                    __name__,
                 )
                 return None
         # bad xref character at startxref.  Let's see if we can find
@@ -1502,7 +1503,7 @@ def _read_xref_other_error(
         # no xref table found at specified location
         if "/Root" in self.trailer and not self.strict:
             # if Root has been already found, just raise warning
-            warnings.warn("Invalid parent xref., rebuild xref", PdfReadWarning)
+            logger_warning("Invalid parent xref., rebuild xref", __name__)
             try:
                 self._rebuild_xref_table(stream)
                 return None
 
@@ -35,7 +35,6 @@
 import struct
 import time
 import uuid
-import warnings
 from hashlib import md5
 from typing import (
     Any,
@@ -49,8 +48,6 @@
     cast,
 )
 
-from PyPDF2.errors import PdfReadWarning
-
 from ._page import PageObject, _VirtualList
 from ._reader import PdfReader
 from ._security import _alg33, _alg34, _alg35
@@ -60,6 +57,7 @@
     b_,
     deprecate_bookmark,
     deprecate_with_replacement,
+    logger_warning,
 )
 from .constants import AnnotationDictionaryAttributes
 from .constants import CatalogAttributes as CA
@@ -780,9 +778,10 @@ def write(self, stream: StreamType) -> None:
             the write method and the tell method, similar to a file object.
         """
         if hasattr(stream, "mode") and "b" not in stream.mode:
-            warnings.warn(
+            logger_warning(
                 f"File <{stream.name}> to write to is not in binary mode. "  # type: ignore
-                "It may not be written to correctly."
+                "It may not be written to correctly.",
+                __name__,
             )
 
         if not self._root:
@@ -966,10 +965,10 @@ def _resolve_indirect_object(self, data: IndirectObject) -> IndirectObject:
         real_obj = data.pdf.get_object(data)
 
         if real_obj is None:
-            warnings.warn(
+            logger_warning(
                 f"Unable to resolve [{data.__class__.__name__}: {data}], "
                 "returning NullObject instead",
-                PdfReadWarning,
+                __name__,
             )
             real_obj = NullObject()
 
@@ -1703,8 +1702,9 @@ def _set_page_layout(self, layout: Union[NameObject, LayoutType]) -> None:
         """
         if not isinstance(layout, NameObject):
             if layout not in self._valid_layouts:
-                warnings.warn(
-                    f"Layout should be one of: {'', ''.join(self._valid_layouts)}"
+                logger_warning(
+                    f"Layout should be one of: {'', ''.join(self._valid_layouts)}",
+                    __name__,
                 )
             layout = NameObject(layout)
         self._root_object.update({NameObject("/PageLayout"): layout})
@@ -1803,7 +1803,9 @@ def set_page_mode(self, mode: PagemodeType) -> None:
             mode_name: NameObject = mode
         else:
             if mode not in self._valid_modes:
-                warnings.warn(f"Mode should be one of: {', '.join(self._valid_modes)}")
+                logger_warning(
+                    f"Mode should be one of: {', '.join(self._valid_modes)}", __name__
+                )
             mode_name = NameObject(mode)
         self._root_object.update({NameObject("/PageMode"): mode_name})
 
 
@@ -35,7 +35,6 @@
 import hashlib
 import logging
 import re
-import warnings
 from enum import IntFlag
 from io import BytesIO
 from typing import (
@@ -74,12 +73,7 @@
 from .constants import StreamAttributes as SA
 from .constants import TypArguments as TA
 from .constants import TypFitArguments as TF
-from .errors import (
-    STREAM_TRUNCATED_PREMATURELY,
-    PdfReadError,
-    PdfReadWarning,
-    PdfStreamError,
-)
+from .errors import STREAM_TRUNCATED_PREMATURELY, PdfReadError, PdfStreamError
 
 logger = logging.getLogger(__name__)
 ObjectPrefix = b"/<[tf(n%"
@@ -813,7 +807,7 @@ def read_unsized_from_steam(stream: StreamType, pdf: Any) -> bytes:  # PdfReader
                 if pdf is not None and pdf.strict:
                     raise PdfReadError(msg)
                 else:
-                    warnings.warn(msg, PdfReadWarning)
+                    logger_warning(msg, __name__)
 
         pos = stream.tell()
         s = read_non_whitespace(stream)
 
@@ -1,12 +1,20 @@
-# Suppress Warnings and Log messages
+# Exceptions, Warnings, and Log messages
 
 PyPDF2 makes use of 3 mechanisms to show that something went wrong:
 
-* **Exceptions**: Error-cases the client should explicitly handle. In the
-   `strict=True` mode, most log messages will become exceptions. This can be
-   useful in applications where you can force to user to fix the broken PDF.
-* **Warnings**: Avoidable issues, such as using deprecated classes / functions / parameters
-* **Log messages**: Nothing the client can do, but they should know it happened.
+* **Log messages** are informative messages that can be used for post-mortem
+  analysis. Most of the time, users can ignore them. They come in different
+  *levels*, such as info / warning / error indicating the severity.
+  Examples are non-standard compliant PDF files which PyPDF2 can deal with.
+* **Warnings** are avoidable issues, such as using deprecated classes /
+  functions / parameters. Another example is missing capabilities of PyPDF2.
+  In those cases, PyPDF2 users should adjust their code. Warnings
+  are issued by the `warnings` module - those are different from the log-level
+  "warning".
+* **Exceptions** are error-cases that PyPDF2 users should explicitly handle.
+  In the `strict=True` mode, most log messages with the warning level will
+  become exceptions. This can be useful in applications where you can force to
+  user to fix the broken PDF.
 
 
 ## Exceptions
 
@@ -1,6 +1,7 @@
 import os
 import ssl
 import urllib.request
+from typing import List
 
 
 def get_pdf_from_url(url: str, name: str) -> bytes:
@@ -30,3 +31,22 @@ def get_pdf_from_url(url: str, name: str) -> bytes:
     with open(cache_path, "rb") as fp:
         data = fp.read()
     return data
+
+
+def _strip_position(line: str) -> str:
+    """
+    Remove the location information.
+
+    The message
+        WARNING  PyPDF2._reader:_utils.py:364 Xref table not zero-indexed.
+
+    becomes
+        Xref table not zero-indexed.
+    """
+    line = ".py".join(line.split(".py:")[1:])
+    line = " ".join(line.split(" ")[1:])
+    return line
+
+
+def normalize_warnings(caplog_text: str) -> List[str]:
+    return [_strip_position(line) for line in caplog_text.strip().split("\n")]
@@ -1,7 +1,5 @@
 import os
 
-import pytest
-
 import PyPDF2
 from PyPDF2 import PdfReader, Transformation
 from PyPDF2.generic import Destination
@@ -127,7 +125,6 @@ def text_extraction(pdf_path):
     return text
 
 
-@pytest.mark.filterwarnings("ignore::PyPDF2.errors.PdfReadWarning")
 def test_text_extraction(benchmark):
     file_path = os.path.join(SAMPLE_ROOT, "009-pdflatex-geotopo/GeoTopo.pdf")
     benchmark(text_extraction, file_path)