Skip to content

Commit 1383234

Browse files
nsw42stefan6419846
andauthored
BUG: Handle indirect objects in font width calculations (#2967)
Closes #2966. --------- Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com>
1 parent db460c0 commit 1383234

File tree

3 files changed

+22
-8
lines changed

3 files changed

+22
-8
lines changed

pypdf/_cmap.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -396,17 +396,17 @@ def build_font_width_map(
396396
st: int = 0
397397
en: int = 0
398398
try:
399-
default_font_width = _default_fonts_space_width[cast(str, ft["/BaseFont"].get_object)] * 2.0
399+
default_font_width = _default_fonts_space_width[cast(str, ft["/BaseFont"].get_object())] * 2.0
400400
except KeyError:
401401
pass
402402
if "/DescendantFonts" in ft: # ft["/Subtype"].startswith("/CIDFontType"):
403403
# §9.7.4.3 of the 1.7 reference ("Glyph Metrics in CIDFonts")
404404
# Widths for a CIDFont are defined using the DW and W entries.
405405
# DW2 and W2 are for vertical use. Vertical type is not implemented.
406406
ft1 = ft["/DescendantFonts"][0].get_object() # type: ignore
407-
try:
408-
font_width_map["default"] = cast(float, ft1["/DW"])
409-
except Exception:
407+
if "/DW" in ft1:
408+
font_width_map["default"] = cast(float, ft1["/DW"].get_object())
409+
else:
410410
font_width_map["default"] = default_font_width
411411
if "/W" in ft1:
412412
w = ft1["/W"].get_object()
@@ -418,13 +418,15 @@ def build_font_width_map(
418418
if isinstance(second, int):
419419
# C_first C_last same_W
420420
en = second
421+
width = w[2].get_object()
421422
for c_code in range(st, en + 1):
422-
font_width_map[chr(c_code)] = w[2]
423+
font_width_map[chr(c_code)] = width
423424
w = w[3:]
424425
elif isinstance(second, list):
425426
# Starting_C [W1 W2 ... Wn]
426427
c_code = st
427-
for width in second:
428+
for ww in second:
429+
width = ww.get_object()
428430
font_width_map[chr(c_code)] = width
429431
c_code += 1
430432
w = w[2:]

pypdf/_page.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1750,7 +1750,7 @@ def _debug_for_extract(self) -> str: # pragma: no cover
17501750
out += "No Font\n"
17511751
return out
17521752

1753-
def _get_acutual_font_widths(
1753+
def _get_actual_font_widths(
17541754
self,
17551755
cmap: Tuple[
17561756
Union[str, Dict[int, str]], Dict[str, str], str, Optional[DictionaryObject]
@@ -1817,7 +1817,7 @@ def _handle_tj(
18171817
rtl_dir,
18181818
visitor_text)
18191819
font_widths, actual_str_size["space_width"], actual_str_size["str_height"] = (
1820-
self._get_acutual_font_widths(cmap, text_operands, font_size, space_width))
1820+
self._get_actual_font_widths(cmap, text_operands, font_size, space_width))
18211821
actual_str_size["str_widths"] += font_widths
18221822

18231823
return text, rtl_dir, actual_str_size

tests/test_cmap.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,15 @@ def test_iss2925():
269269
name = "iss2925.pdf"
270270
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
271271
assert "slicing on the PDG to extract the relevant contextual" in reader.pages[3].extract_text()
272+
273+
274+
@pytest.mark.enable_socket
275+
def test_iss2966():
276+
"""Regression test for issue #2966: indirect objects in fonts"""
277+
url = (
278+
"https://github.com/user-attachments/files/17904233/repro_out.pdf"
279+
)
280+
name = "iss2966.pdf"
281+
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
282+
assert "Lorem ipsum dolor sit amet" in reader.pages[0].extract_text()
283+

0 commit comments

Comments
 (0)