Skip to content

Commit f9d77bb

Browse files
authored
Merge branch 'main' into Merger
2 parents 938fc4a + c4e95bd commit f9d77bb

File tree

6 files changed

+428
-17
lines changed

6 files changed

+428
-17
lines changed

pypdf/_reader.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
NullObject,
7878
NumberObject,
7979
PdfObject,
80+
StreamObject,
8081
TextStringObject,
8182
read_object,
8283
)
@@ -316,8 +317,6 @@ def _get_object_from_stream(
316317
obj_stm: EncodedStreamObject = IndirectObject(stmnum, 0, self).get_object() # type: ignore
317318
# This is an xref to a stream, so its type better be a stream
318319
assert cast(str, obj_stm["/Type"]) == "/ObjStm"
319-
# /N is the number of indirect objects in the stream
320-
assert idx < obj_stm["/N"]
321320
stream_data = BytesIO(obj_stm.get_data())
322321
for i in range(obj_stm["/N"]): # type: ignore
323322
read_non_whitespace(stream_data)
@@ -999,6 +998,41 @@ def _rebuild_xref_table(self, stream: StreamType) -> None:
999998
if generation not in self.xref:
1000999
self.xref[generation] = {}
10011000
self.xref[generation][idnum] = m.start(1)
1001+
1002+
logger_warning("parsing for Object Streams", __name__)
1003+
for g in self.xref:
1004+
for i in self.xref[g]:
1005+
# get_object in manual
1006+
stream.seek(self.xref[g][i], 0)
1007+
try:
1008+
_ = self.read_object_header(stream)
1009+
o = cast(StreamObject, read_object(stream, self))
1010+
if o.get("/Type", "") != "/ObjStm":
1011+
continue
1012+
strm = BytesIO(o.get_data())
1013+
cpt = 0
1014+
while True:
1015+
s = read_until_whitespace(strm)
1016+
if not s.isdigit():
1017+
break
1018+
_i = int(s)
1019+
skip_over_whitespace(strm)
1020+
strm.seek(-1, 1)
1021+
s = read_until_whitespace(strm)
1022+
if not s.isdigit(): # pragma: no cover
1023+
break # pragma: no cover
1024+
_o = int(s)
1025+
self.xref_objStm[_i] = (i, _o)
1026+
cpt += 1
1027+
if cpt != o.get("/N"): # pragma: no cover
1028+
logger_warning( # pragma: no cover
1029+
f"found {cpt} objects within Object({i},{g})"
1030+
f" whereas {o.get('/N')} expected",
1031+
__name__,
1032+
)
1033+
except Exception: # could be of many cause
1034+
pass
1035+
10021036
stream.seek(0, 0)
10031037
for m in re.finditer(rb"[\r\n \t][ \t]*trailer[\r\n \t]*(<<)", f_):
10041038
stream.seek(m.start(1), 0)

pypdf/annotations/_non_markup_annotations.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def __init__(
3636
if is_external and is_internal:
3737
raise ValueError(
3838
"Either 'url' or 'target_page_index' have to be provided. "
39-
f"url={url}, target_page_index={target_page_index}"
39+
f"{url=}, {target_page_index=}"
4040
)
4141

4242
border_arr: BorderArrayType

pypdf/generic/_rectangle.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def __init__(
2626
ArrayObject.__init__(self, [self._ensure_is_number(x) for x in arr]) # type: ignore
2727

2828
def _ensure_is_number(self, value: Any) -> Union[FloatObject, NumberObject]:
29-
if not isinstance(value, (NumberObject, FloatObject)):
29+
if not isinstance(value, (FloatObject, NumberObject)):
3030
value = FloatObject(value)
3131
return value
3232

tests/test_filters.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from io import BytesIO
66
from itertools import product as cartesian_product
77
from pathlib import Path
8-
from unittest.mock import patch
98

109
import pytest
1110
from PIL import Image
@@ -225,14 +224,11 @@ def test_ccitt_fax_decode():
225224

226225

227226
@pytest.mark.enable_socket()
228-
@patch("pypdf._reader.logger_warning")
229-
def test_decompress_zlib_error(mock_logger_warning):
227+
def test_decompress_zlib_error(caplog):
230228
reader = PdfReader(BytesIO(get_data_from_url(name="tika-952445.pdf")))
231229
for page in reader.pages:
232230
page.extract_text()
233-
mock_logger_warning.assert_called_with(
234-
"incorrect startxref pointer(3)", "pypdf._reader"
235-
)
231+
assert "incorrect startxref pointer(3)" in caplog.text
236232

237233

238234
@pytest.mark.enable_socket()

0 commit comments

Comments
 (0)