py-pdf · stefan6419846 · Jul 30, 2025 · Jul 30, 2025 · Jul 30, 2025 · Jul 30, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -19,16 +19,16 @@ repos:
         args: ['--maxkb=1000']
 
 -   repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.12.0
+    rev: v0.12.7
     hooks:
-    -   id: ruff
+    -   id: ruff-check
         args: ['--fix']
 
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v3.19.1
+    rev: v3.20.0
     hooks:
     -   id: pyupgrade
-        args: [--py38-plus]
+        args: [--py39-plus]
 
 -   repo: https://github.com/pre-commit/mirrors-mypy
     rev: 'v1.17.0'

diff --git a/make_release.py b/make_release.py
@@ -5,7 +5,6 @@
 import urllib.request
 from dataclasses import dataclass
 from datetime import datetime, timezone
-from typing import Dict, List, Tuple
 
 GH_ORG = "py-pdf"
 GH_PROJECT = "pypdf"
@@ -185,7 +184,7 @@ def write_changelog(new_changelog: str, changelog_path: str) -> None:
         fh.write(new_changelog)
 
 
-def get_formatted_changes(git_tag: str) -> Tuple[str, str]:
+def get_formatted_changes(git_tag: str) -> tuple[str, str]:
     """
     Format the changes done since the last tag.
 
@@ -277,7 +276,7 @@ def get_most_recent_git_tag() -> str:
     ).strip()
 
 
-def get_author_mapping(line_count: int) -> Dict[str, str]:
+def get_author_mapping(line_count: int) -> dict[str, str]:
     """
     Get the authors for each commit.
 
@@ -291,7 +290,7 @@ def get_author_mapping(line_count: int) -> Dict[str, str]:
     """
     per_page = min(line_count, 100)
     page = 1
-    mapping: Dict[str, str] = {}
+    mapping: dict[str, str] = {}
     for _ in range(0, line_count, per_page):
         with urllib.request.urlopen(
             f"https://api.github.com/repos/{GH_ORG}/{GH_PROJECT}/commits?per_page={per_page}&page={page}"
@@ -303,7 +302,7 @@ def get_author_mapping(line_count: int) -> Dict[str, str]:
     return mapping
 
 
-def get_git_commits_since_tag(git_tag: str) -> List[Change]:
+def get_git_commits_since_tag(git_tag: str) -> list[Change]:
     """
     Get all commits since the last tag.
 
@@ -334,7 +333,7 @@ def get_git_commits_since_tag(git_tag: str) -> List[Change]:
     return [parse_commit_line(line, authors) for line in lines if line != ""]
 
 
-def parse_commit_line(line: str, authors: Dict[str, str]) -> Change:
+def parse_commit_line(line: str, authors: dict[str, str]) -> Change:
     """
     Parse the first line of a git commit message.
 

diff --git a/pypdf/_cmap.py b/pypdf/_cmap.py
@@ -2,7 +2,7 @@
 from binascii import Error as BinasciiError
 from binascii import unhexlify
 from math import ceil
-from typing import Any, Dict, List, Tuple, Union, cast
+from typing import Any, Union, cast
 
 from ._codecs import adobe_glyphs, charset_encoding
 from ._utils import logger_error, logger_warning
@@ -19,7 +19,7 @@
 # code freely inspired from @twiggy ; see #711
 def build_char_map(
     font_name: str, space_width: float, obj: DictionaryObject
-) -> Tuple[str, float, Union[str, Dict[int, str]], Dict[Any, Any], DictionaryObject]:
+) -> tuple[str, float, Union[str, dict[int, str]], dict[Any, Any], DictionaryObject]:
     """
     Determine information about a font.
 
@@ -42,7 +42,7 @@ def build_char_map(
 
 def build_char_map_from_dict(
     space_width: float, ft: DictionaryObject
-) -> Tuple[str, float, Union[str, Dict[int, str]], Dict[Any, Any]]:
+) -> tuple[str, float, Union[str, dict[int, str]], dict[Any, Any]]:
     """
     Determine information about a font.
 
@@ -73,15 +73,15 @@ def build_char_map_from_dict(
 
 
 # used when missing data, e.g. font def missing
-unknown_char_map: Tuple[str, float, Union[str, Dict[int, str]], Dict[Any, Any]] = (
+unknown_char_map: tuple[str, float, Union[str, dict[int, str]], dict[Any, Any]] = (
     "Unknown",
     9999,
     dict.fromkeys(range(256), "�"),
     {},
 )
 
 
-_predefined_cmap: Dict[str, str] = {
+_predefined_cmap: dict[str, str] = {
     "/Identity-H": "utf-16-be",
     "/Identity-V": "utf-16-be",
     "/GB-EUC-H": "gbk",
@@ -104,7 +104,7 @@ def build_char_map_from_dict(
 }
 
 # manually extracted from http://mirrors.ctan.org/fonts/adobe/afm/Adobe-Core35_AFMs-229.tar.gz
-_default_fonts_space_width: Dict[str, int] = {
+_default_fonts_space_width: dict[str, int] = {
     "/Courier": 600,
     "/Courier-Bold": 600,
     "/Courier-BoldOblique": 600,
@@ -128,7 +128,7 @@ def build_char_map_from_dict(
 
 def get_encoding(
     ft: DictionaryObject
-) -> Tuple[Union[str, Dict[int, str]], Dict[Any, Any]]:
+) -> tuple[Union[str, dict[int, str]], dict[Any, Any]]:
     encoding = _parse_encoding(ft)
     map_dict, int_entry = _parse_to_unicode(ft)
 
@@ -146,8 +146,8 @@ def get_encoding(
 
 def _parse_encoding(
     ft: DictionaryObject
-) -> Union[str, Dict[int, str]]:
-    encoding: Union[str, List[str], Dict[int, str]] = []
+) -> Union[str, dict[int, str]]:
+    encoding: Union[str, list[str], dict[int, str]] = []
     if "/Encoding" not in ft:
         if "/BaseFont" in ft and cast(str, ft["/BaseFont"]) in charset_encoding:
             encoding = dict(
@@ -205,13 +205,13 @@ def _parse_encoding(
 
 def _parse_to_unicode(
     ft: DictionaryObject
-) -> Tuple[Dict[Any, Any], List[int]]:
+) -> tuple[dict[Any, Any], list[int]]:
     # will store all translation code
     # and map_dict[-1] we will have the number of bytes to convert
-    map_dict: Dict[Any, Any] = {}
+    map_dict: dict[Any, Any] = {}
 
     # will provide the list of cmap keys as int to correct encoding
-    int_entry: List[int] = []
+    int_entry: list[int] = []
 
     if "/ToUnicode" not in ft:
         if ft.get("/Subtype", "") == "/Type1":
@@ -220,7 +220,7 @@ def _parse_to_unicode(
     process_rg: bool = False
     process_char: bool = False
     multiline_rg: Union[
-        None, Tuple[int, int]
+        None, tuple[int, int]
     ] = None  # tuple = (current_char, remaining size) ; cf #1285 for example of file
     cm = prepare_cm(ft)
     for line in cm.split(b"\n"):
@@ -237,7 +237,7 @@ def _parse_to_unicode(
 
 
 def get_actual_str_key(
-    value_char: str, encoding: Union[str, Dict[int, str]], map_dict: Dict[Any, Any]
+    value_char: str, encoding: Union[str, dict[int, str]], map_dict: dict[Any, Any]
 ) -> str:
     key_dict = {}
     if isinstance(encoding, dict):
@@ -292,10 +292,10 @@ def process_cm_line(
     line: bytes,
     process_rg: bool,
     process_char: bool,
-    multiline_rg: Union[None, Tuple[int, int]],
-    map_dict: Dict[Any, Any],
-    int_entry: List[int],
-) -> Tuple[bool, bool, Union[None, Tuple[int, int]]]:
+    multiline_rg: Union[None, tuple[int, int]],
+    map_dict: dict[Any, Any],
+    int_entry: list[int],
+) -> tuple[bool, bool, Union[None, tuple[int, int]]]:
     if line == b"" or line[0] == 37:  # 37 = %
         return process_rg, process_char, multiline_rg
     line = line.replace(b"\t", b" ")
@@ -319,10 +319,10 @@ def process_cm_line(
 
 def parse_bfrange(
     line: bytes,
-    map_dict: Dict[Any, Any],
-    int_entry: List[int],
-    multiline_rg: Union[None, Tuple[int, int]],
-) -> Union[None, Tuple[int, int]]:
+    map_dict: dict[Any, Any],
+    int_entry: list[int],
+    multiline_rg: Union[None, tuple[int, int]],
+) -> Union[None, tuple[int, int]]:
     lst = [x for x in line.split(b" ") if x]
     closure_found = False
     if multiline_rg is not None:
@@ -377,7 +377,7 @@ def parse_bfrange(
     return None if closure_found else (a, b)
 
 
-def parse_bfchar(line: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) -> None:
+def parse_bfchar(line: bytes, map_dict: dict[Any, Any], int_entry: list[int]) -> None:
     lst = [x for x in line.split(b" ") if x]
     map_dict[-1] = len(lst[0]) // 2
     while len(lst) > 1:
@@ -401,8 +401,8 @@ def parse_bfchar(line: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) ->
 
 def build_font_width_map(
     ft: DictionaryObject, default_font_width: float
-) -> Dict[Any, float]:
-    font_width_map: Dict[Any, float] = {}
+) -> dict[Any, float]:
+    font_width_map: dict[Any, float] = {}
     st: int = 0
     en: int = 0
     try:
@@ -482,7 +482,7 @@ def build_font_width_map(
 
 
 def compute_space_width(
-    font_width_map: Dict[Any, float], space_char: str
+    font_width_map: dict[Any, float], space_char: str
 ) -> float:
     try:
         sp_width = font_width_map[space_char]
@@ -497,7 +497,7 @@ def compute_space_width(
 
 
 def compute_font_width(
-    font_width_map: Dict[Any, float],
+    font_width_map: dict[Any, float],
     char: str
 ) -> float:
     char_width: float = 0.0
@@ -513,9 +513,9 @@ def compute_font_width(
 
 def _type1_alternative(
     ft: DictionaryObject,
-    map_dict: Dict[Any, Any],
-    int_entry: List[int],
-) -> Tuple[Dict[Any, Any], List[int]]:
+    map_dict: dict[Any, Any],
+    int_entry: list[int],
+) -> tuple[dict[Any, Any], list[int]]:
     if "/FontDescriptor" not in ft:
         return map_dict, int_entry
     ft_desc = cast(DictionaryObject, ft["/FontDescriptor"]).get("/FontFile")

diff --git a/pypdf/_codecs/__init__.py b/pypdf/_codecs/__init__.py
@@ -1,14 +1,12 @@
-from typing import Dict, List
-
 from .adobe_glyphs import adobe_glyphs
 from .pdfdoc import _pdfdoc_encoding
 from .std import _std_encoding
 from .symbol import _symbol_encoding
 from .zapfding import _zapfding_encoding
 
 
-def fill_from_encoding(enc: str) -> List[str]:
-    lst: List[str] = []
+def fill_from_encoding(enc: str) -> list[str]:
+    lst: list[str] = []
     for x in range(256):
         try:
             lst += (bytes((x,)).decode(enc),)
@@ -17,8 +15,8 @@ def fill_from_encoding(enc: str) -> List[str]:
     return lst
 
 
-def rev_encoding(enc: List[str]) -> Dict[str, int]:
-    rev: Dict[str, int] = {}
+def rev_encoding(enc: list[str]) -> dict[str, int]:
+    rev: dict[str, int] = {}
     for i in range(256):
         char = enc[i]
         if char == "\u0000":
@@ -32,14 +30,14 @@ def rev_encoding(enc: List[str]) -> Dict[str, int]:
 _mac_encoding = fill_from_encoding("mac_roman")
 
 
-_win_encoding_rev: Dict[str, int] = rev_encoding(_win_encoding)
-_mac_encoding_rev: Dict[str, int] = rev_encoding(_mac_encoding)
-_symbol_encoding_rev: Dict[str, int] = rev_encoding(_symbol_encoding)
-_zapfding_encoding_rev: Dict[str, int] = rev_encoding(_zapfding_encoding)
-_pdfdoc_encoding_rev: Dict[str, int] = rev_encoding(_pdfdoc_encoding)
+_win_encoding_rev: dict[str, int] = rev_encoding(_win_encoding)
+_mac_encoding_rev: dict[str, int] = rev_encoding(_mac_encoding)
+_symbol_encoding_rev: dict[str, int] = rev_encoding(_symbol_encoding)
+_zapfding_encoding_rev: dict[str, int] = rev_encoding(_zapfding_encoding)
+_pdfdoc_encoding_rev: dict[str, int] = rev_encoding(_pdfdoc_encoding)
 
 
-charset_encoding: Dict[str, List[str]] = {
+charset_encoding: dict[str, list[str]] = {
     "/StandardEncoding": _std_encoding,
     "/WinAnsiEncoding": _win_encoding,
     "/MacRomanEncoding": _mac_encoding,

diff --git a/pypdf/_codecs/_codecs.py b/pypdf/_codecs/_codecs.py
@@ -7,7 +7,6 @@
 
 import io
 from abc import ABC, abstractmethod
-from typing import Dict, List
 
 from pypdf._utils import logger_warning
 
@@ -52,7 +51,7 @@ class LzwCodec(Codec):
 
     def _initialize_encoding_table(self) -> None:
         """Initialize the encoding table and state to initial conditions."""
-        self.encoding_table: Dict[bytes, int] = {bytes([i]): i for i in range(256)}
+        self.encoding_table: dict[bytes, int] = {bytes([i]): i for i in range(256)}
         self.next_code = self.EOD_MARKER + 1
         self.bits_per_code = self.INITIAL_BITS_PER_CODE
         self.max_code_value = (1 << self.bits_per_code) - 1
@@ -73,7 +72,7 @@ def encode(self, data: bytes) -> bytes:
 
         Taken from PDF 1.7 specs, "7.4.4.2 Details of LZW Encoding".
         """
-        result_codes: List[int] = []
+        result_codes: list[int] = []
 
         # The encoder shall begin by issuing a clear-table code
         result_codes.append(self.CLEAR_TABLE_MARKER)
@@ -109,7 +108,7 @@ def encode(self, data: bytes) -> bytes:
 
         return self._pack_codes_into_bytes(result_codes)
 
-    def _pack_codes_into_bytes(self, codes: List[int]) -> bytes:
+    def _pack_codes_into_bytes(self, codes: list[int]) -> bytes:
         """
         Convert the list of result codes into a continuous byte stream, with codes packed as per the code bit-width.
         The bit-width starts at 9 bits and expands as needed.