Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@ repos:
args: ['--maxkb=1000']

- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.12.0
rev: v0.12.7
hooks:
- id: ruff
- id: ruff-check
args: ['--fix']

- repo: https://github.com/asottile/pyupgrade
rev: v3.19.1
rev: v3.20.0
hooks:
- id: pyupgrade
args: [--py38-plus]
args: [--py39-plus]

- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.17.0'
Expand Down
11 changes: 5 additions & 6 deletions make_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import urllib.request
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Dict, List, Tuple

GH_ORG = "py-pdf"
GH_PROJECT = "pypdf"
Expand Down Expand Up @@ -185,7 +184,7 @@ def write_changelog(new_changelog: str, changelog_path: str) -> None:
fh.write(new_changelog)


def get_formatted_changes(git_tag: str) -> Tuple[str, str]:
def get_formatted_changes(git_tag: str) -> tuple[str, str]:
"""
Format the changes done since the last tag.

Expand Down Expand Up @@ -277,7 +276,7 @@ def get_most_recent_git_tag() -> str:
).strip()


def get_author_mapping(line_count: int) -> Dict[str, str]:
def get_author_mapping(line_count: int) -> dict[str, str]:
"""
Get the authors for each commit.

Expand All @@ -291,7 +290,7 @@ def get_author_mapping(line_count: int) -> Dict[str, str]:
"""
per_page = min(line_count, 100)
page = 1
mapping: Dict[str, str] = {}
mapping: dict[str, str] = {}
for _ in range(0, line_count, per_page):
with urllib.request.urlopen(
f"https://api.github.com/repos/{GH_ORG}/{GH_PROJECT}/commits?per_page={per_page}&page={page}"
Expand All @@ -303,7 +302,7 @@ def get_author_mapping(line_count: int) -> Dict[str, str]:
return mapping


def get_git_commits_since_tag(git_tag: str) -> List[Change]:
def get_git_commits_since_tag(git_tag: str) -> list[Change]:
"""
Get all commits since the last tag.

Expand Down Expand Up @@ -334,7 +333,7 @@ def get_git_commits_since_tag(git_tag: str) -> List[Change]:
return [parse_commit_line(line, authors) for line in lines if line != ""]


def parse_commit_line(line: str, authors: Dict[str, str]) -> Change:
def parse_commit_line(line: str, authors: dict[str, str]) -> Change:
"""
Parse the first line of a git commit message.

Expand Down
60 changes: 30 additions & 30 deletions pypdf/_cmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from binascii import Error as BinasciiError
from binascii import unhexlify
from math import ceil
from typing import Any, Dict, List, Tuple, Union, cast
from typing import Any, Union, cast

from ._codecs import adobe_glyphs, charset_encoding
from ._utils import logger_error, logger_warning
Expand All @@ -19,7 +19,7 @@
# code freely inspired from @twiggy ; see #711
def build_char_map(
font_name: str, space_width: float, obj: DictionaryObject
) -> Tuple[str, float, Union[str, Dict[int, str]], Dict[Any, Any], DictionaryObject]:
) -> tuple[str, float, Union[str, dict[int, str]], dict[Any, Any], DictionaryObject]:
"""
Determine information about a font.

Expand All @@ -42,7 +42,7 @@ def build_char_map(

def build_char_map_from_dict(
space_width: float, ft: DictionaryObject
) -> Tuple[str, float, Union[str, Dict[int, str]], Dict[Any, Any]]:
) -> tuple[str, float, Union[str, dict[int, str]], dict[Any, Any]]:
"""
Determine information about a font.

Expand Down Expand Up @@ -73,15 +73,15 @@ def build_char_map_from_dict(


# used when missing data, e.g. font def missing
unknown_char_map: Tuple[str, float, Union[str, Dict[int, str]], Dict[Any, Any]] = (
unknown_char_map: tuple[str, float, Union[str, dict[int, str]], dict[Any, Any]] = (
"Unknown",
9999,
dict.fromkeys(range(256), "�"),
{},
)


_predefined_cmap: Dict[str, str] = {
_predefined_cmap: dict[str, str] = {
"/Identity-H": "utf-16-be",
"/Identity-V": "utf-16-be",
"/GB-EUC-H": "gbk",
Expand All @@ -104,7 +104,7 @@ def build_char_map_from_dict(
}

# manually extracted from http://mirrors.ctan.org/fonts/adobe/afm/Adobe-Core35_AFMs-229.tar.gz
_default_fonts_space_width: Dict[str, int] = {
_default_fonts_space_width: dict[str, int] = {
"/Courier": 600,
"/Courier-Bold": 600,
"/Courier-BoldOblique": 600,
Expand All @@ -128,7 +128,7 @@ def build_char_map_from_dict(

def get_encoding(
ft: DictionaryObject
) -> Tuple[Union[str, Dict[int, str]], Dict[Any, Any]]:
) -> tuple[Union[str, dict[int, str]], dict[Any, Any]]:
encoding = _parse_encoding(ft)
map_dict, int_entry = _parse_to_unicode(ft)

Expand All @@ -146,8 +146,8 @@ def get_encoding(

def _parse_encoding(
ft: DictionaryObject
) -> Union[str, Dict[int, str]]:
encoding: Union[str, List[str], Dict[int, str]] = []
) -> Union[str, dict[int, str]]:
encoding: Union[str, list[str], dict[int, str]] = []
if "/Encoding" not in ft:
if "/BaseFont" in ft and cast(str, ft["/BaseFont"]) in charset_encoding:
encoding = dict(
Expand Down Expand Up @@ -205,13 +205,13 @@ def _parse_encoding(

def _parse_to_unicode(
ft: DictionaryObject
) -> Tuple[Dict[Any, Any], List[int]]:
) -> tuple[dict[Any, Any], list[int]]:
# will store all translation code
# and map_dict[-1] we will have the number of bytes to convert
map_dict: Dict[Any, Any] = {}
map_dict: dict[Any, Any] = {}

# will provide the list of cmap keys as int to correct encoding
int_entry: List[int] = []
int_entry: list[int] = []

if "/ToUnicode" not in ft:
if ft.get("/Subtype", "") == "/Type1":
Expand All @@ -220,7 +220,7 @@ def _parse_to_unicode(
process_rg: bool = False
process_char: bool = False
multiline_rg: Union[
None, Tuple[int, int]
None, tuple[int, int]
] = None # tuple = (current_char, remaining size) ; cf #1285 for example of file
cm = prepare_cm(ft)
for line in cm.split(b"\n"):
Expand All @@ -237,7 +237,7 @@ def _parse_to_unicode(


def get_actual_str_key(
value_char: str, encoding: Union[str, Dict[int, str]], map_dict: Dict[Any, Any]
value_char: str, encoding: Union[str, dict[int, str]], map_dict: dict[Any, Any]
) -> str:
key_dict = {}
if isinstance(encoding, dict):
Expand Down Expand Up @@ -292,10 +292,10 @@ def process_cm_line(
line: bytes,
process_rg: bool,
process_char: bool,
multiline_rg: Union[None, Tuple[int, int]],
map_dict: Dict[Any, Any],
int_entry: List[int],
) -> Tuple[bool, bool, Union[None, Tuple[int, int]]]:
multiline_rg: Union[None, tuple[int, int]],
map_dict: dict[Any, Any],
int_entry: list[int],
) -> tuple[bool, bool, Union[None, tuple[int, int]]]:
if line == b"" or line[0] == 37: # 37 = %
return process_rg, process_char, multiline_rg
line = line.replace(b"\t", b" ")
Expand All @@ -319,10 +319,10 @@ def process_cm_line(

def parse_bfrange(
line: bytes,
map_dict: Dict[Any, Any],
int_entry: List[int],
multiline_rg: Union[None, Tuple[int, int]],
) -> Union[None, Tuple[int, int]]:
map_dict: dict[Any, Any],
int_entry: list[int],
multiline_rg: Union[None, tuple[int, int]],
) -> Union[None, tuple[int, int]]:
lst = [x for x in line.split(b" ") if x]
closure_found = False
if multiline_rg is not None:
Expand Down Expand Up @@ -377,7 +377,7 @@ def parse_bfrange(
return None if closure_found else (a, b)


def parse_bfchar(line: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) -> None:
def parse_bfchar(line: bytes, map_dict: dict[Any, Any], int_entry: list[int]) -> None:
lst = [x for x in line.split(b" ") if x]
map_dict[-1] = len(lst[0]) // 2
while len(lst) > 1:
Expand All @@ -401,8 +401,8 @@ def parse_bfchar(line: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) ->

def build_font_width_map(
ft: DictionaryObject, default_font_width: float
) -> Dict[Any, float]:
font_width_map: Dict[Any, float] = {}
) -> dict[Any, float]:
font_width_map: dict[Any, float] = {}
st: int = 0
en: int = 0
try:
Expand Down Expand Up @@ -482,7 +482,7 @@ def build_font_width_map(


def compute_space_width(
font_width_map: Dict[Any, float], space_char: str
font_width_map: dict[Any, float], space_char: str
) -> float:
try:
sp_width = font_width_map[space_char]
Expand All @@ -497,7 +497,7 @@ def compute_space_width(


def compute_font_width(
font_width_map: Dict[Any, float],
font_width_map: dict[Any, float],
char: str
) -> float:
char_width: float = 0.0
Expand All @@ -513,9 +513,9 @@ def compute_font_width(

def _type1_alternative(
ft: DictionaryObject,
map_dict: Dict[Any, Any],
int_entry: List[int],
) -> Tuple[Dict[Any, Any], List[int]]:
map_dict: dict[Any, Any],
int_entry: list[int],
) -> tuple[dict[Any, Any], list[int]]:
if "/FontDescriptor" not in ft:
return map_dict, int_entry
ft_desc = cast(DictionaryObject, ft["/FontDescriptor"]).get("/FontFile")
Expand Down
22 changes: 10 additions & 12 deletions pypdf/_codecs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
from typing import Dict, List

from .adobe_glyphs import adobe_glyphs
from .pdfdoc import _pdfdoc_encoding
from .std import _std_encoding
from .symbol import _symbol_encoding
from .zapfding import _zapfding_encoding


def fill_from_encoding(enc: str) -> List[str]:
lst: List[str] = []
def fill_from_encoding(enc: str) -> list[str]:
lst: list[str] = []
for x in range(256):
try:
lst += (bytes((x,)).decode(enc),)
Expand All @@ -17,8 +15,8 @@ def fill_from_encoding(enc: str) -> List[str]:
return lst


def rev_encoding(enc: List[str]) -> Dict[str, int]:
rev: Dict[str, int] = {}
def rev_encoding(enc: list[str]) -> dict[str, int]:
rev: dict[str, int] = {}
for i in range(256):
char = enc[i]
if char == "\u0000":
Expand All @@ -32,14 +30,14 @@ def rev_encoding(enc: List[str]) -> Dict[str, int]:
_mac_encoding = fill_from_encoding("mac_roman")


_win_encoding_rev: Dict[str, int] = rev_encoding(_win_encoding)
_mac_encoding_rev: Dict[str, int] = rev_encoding(_mac_encoding)
_symbol_encoding_rev: Dict[str, int] = rev_encoding(_symbol_encoding)
_zapfding_encoding_rev: Dict[str, int] = rev_encoding(_zapfding_encoding)
_pdfdoc_encoding_rev: Dict[str, int] = rev_encoding(_pdfdoc_encoding)
_win_encoding_rev: dict[str, int] = rev_encoding(_win_encoding)
_mac_encoding_rev: dict[str, int] = rev_encoding(_mac_encoding)
_symbol_encoding_rev: dict[str, int] = rev_encoding(_symbol_encoding)
_zapfding_encoding_rev: dict[str, int] = rev_encoding(_zapfding_encoding)
_pdfdoc_encoding_rev: dict[str, int] = rev_encoding(_pdfdoc_encoding)


charset_encoding: Dict[str, List[str]] = {
charset_encoding: dict[str, list[str]] = {
"/StandardEncoding": _std_encoding,
"/WinAnsiEncoding": _win_encoding,
"/MacRomanEncoding": _mac_encoding,
Expand Down
7 changes: 3 additions & 4 deletions pypdf/_codecs/_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import io
from abc import ABC, abstractmethod
from typing import Dict, List

from pypdf._utils import logger_warning

Expand Down Expand Up @@ -52,7 +51,7 @@ class LzwCodec(Codec):

def _initialize_encoding_table(self) -> None:
"""Initialize the encoding table and state to initial conditions."""
self.encoding_table: Dict[bytes, int] = {bytes([i]): i for i in range(256)}
self.encoding_table: dict[bytes, int] = {bytes([i]): i for i in range(256)}
self.next_code = self.EOD_MARKER + 1
self.bits_per_code = self.INITIAL_BITS_PER_CODE
self.max_code_value = (1 << self.bits_per_code) - 1
Expand All @@ -73,7 +72,7 @@ def encode(self, data: bytes) -> bytes:

Taken from PDF 1.7 specs, "7.4.4.2 Details of LZW Encoding".
"""
result_codes: List[int] = []
result_codes: list[int] = []

# The encoder shall begin by issuing a clear-table code
result_codes.append(self.CLEAR_TABLE_MARKER)
Expand Down Expand Up @@ -109,7 +108,7 @@ def encode(self, data: bytes) -> bytes:

return self._pack_codes_into_bytes(result_codes)

def _pack_codes_into_bytes(self, codes: List[int]) -> bytes:
def _pack_codes_into_bytes(self, codes: list[int]) -> bytes:
"""
Convert the list of result codes into a continuous byte stream, with codes packed as per the code bit-width.
The bit-width starts at 9 bits and expands as needed.
Expand Down
Loading
Loading