Skip to content

Commit 5b50f47

Browse files
BUG: Fix undefined variable for text extraction (regression) (#2934)
1 parent 98aa974 commit 5b50f47

File tree

2 files changed

+12
-0
lines changed

2 files changed

+12
-0
lines changed

pypdf/_cmap.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,8 @@ def _type1_alternative(
527527
v = chr(int(words[2][4:], 16))
528528
except ValueError: # pragma: no cover
529529
continue
530+
else:
531+
continue
530532
map_dict[chr(i)] = v
531533
int_entry.append(i)
532534
return map_dict, int_entry

tests/test_cmap.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,3 +259,13 @@ def test_too_many_differences():
259259
name = "iss2836.pdf"
260260
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
261261
assert reader.pages[0].extract_text() == ""
262+
263+
264+
@pytest.mark.enable_socket
265+
def test_iss2925():
266+
url = (
267+
"https://github.com/user-attachments/files/17621508/2305.09315.pdf"
268+
)
269+
name = "iss2925.pdf"
270+
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
271+
assert "slicing on the PDG to extract the relevant contextual" in reader.pages[3].extract_text()

0 commit comments

Comments
 (0)