-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BUG: Don't close stream passed to PdfWriter.write() #2909
base: main
Are you sure you want to change the base?
Changes from 4 commits
1d9a52c
5568450
a905bac
dafbafc
3d6b7dc
4a1189b
6ce190f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -235,9 +235,9 @@ def _get_clone_from( | |
or Path(str(fileobj)).stat().st_size == 0 | ||
): | ||
cloning = False | ||
if isinstance(fileobj, (IO, BytesIO)): | ||
if isinstance(fileobj, (IOBase, BytesIO)): | ||
t = fileobj.tell() | ||
fileobj.seek(-1, 2) | ||
fileobj.seek(0, 2) | ||
if fileobj.tell() == 0: | ||
cloning = False | ||
fileobj.seek(t, 0) | ||
|
@@ -249,7 +249,7 @@ def _get_clone_from( | |
# to prevent overwriting | ||
self.temp_fileobj = fileobj | ||
self.fileobj = "" | ||
self.with_as_usage = False | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not found of removing with_as_usage attribute. it may be usefull to know that the the object has been created for a context manager. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's never used. Keeping it around would mislead someone reading the code that it matters in some way. It's dead code but easy to revive if a need arises. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree that having a dead property/attribute is not good, but it is a public interface which tends to need a proper deprecation process. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm happy to put There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we please make |
||
self.cloned = False | ||
# The root of our page tree node. | ||
pages = DictionaryObject() | ||
pages.update( | ||
|
@@ -267,6 +267,7 @@ def _get_clone_from( | |
if not isinstance(clone_from, PdfReader): | ||
clone_from = PdfReader(clone_from) | ||
self.clone_document_from_reader(clone_from) | ||
self.cloned = True | ||
else: | ||
self._pages = self._add_object(pages) | ||
# root object | ||
|
@@ -353,10 +354,11 @@ def xmp_metadata(self, value: Optional[XmpInformation]) -> None: | |
return self.root_object.xmp_metadata # type: ignore | ||
|
||
def __enter__(self) -> "PdfWriter": | ||
"""Store that writer is initialized by 'with'.""" | ||
"""Store how writer is initialized by 'with'.""" | ||
c: bool = self.cloned | ||
t = self.temp_fileobj | ||
self.__init__() # type: ignore | ||
self.with_as_usage = True | ||
self.cloned = c | ||
self.fileobj = t # type: ignore | ||
return self | ||
|
||
|
@@ -367,7 +369,7 @@ def __exit__( | |
traceback: Optional[TracebackType], | ||
) -> None: | ||
"""Write data to the fileobj.""" | ||
if self.fileobj: | ||
if self.fileobj and not self.cloned: | ||
self.write(self.fileobj) | ||
|
||
def _repr_mimebundle_( | ||
|
@@ -1388,13 +1390,14 @@ def write(self, stream: Union[Path, StrByteType]) -> Tuple[bool, IO[Any]]: | |
|
||
if isinstance(stream, (str, Path)): | ||
stream = FileIO(stream, "wb") | ||
self.with_as_usage = True # | ||
my_file = True | ||
|
||
self.write_stream(stream) | ||
|
||
if self.with_as_usage: | ||
if my_file: | ||
stream.close() | ||
else: | ||
stream.flush() | ||
|
||
return my_file, stream | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2480,3 +2480,32 @@ def test_append_pdf_with_dest_without_page(caplog): | |
writer.append(reader) | ||
assert "/__WKANCHOR_8" not in writer.named_destinations | ||
assert len(writer.named_destinations) == 3 | ||
|
||
|
||
def test_stream_not_closed(): | ||
"""Tests for #2905""" | ||
src = RESOURCE_ROOT / "pdflatex-outline.pdf" | ||
with NamedTemporaryFile(suffix=".pdf") as tmp: | ||
with PdfReader(src) as reader, PdfWriter() as writer: | ||
writer.add_page(reader.pages[0]) | ||
writer.write(tmp) | ||
assert not tmp.file.closed | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it might be great to also have a test for where the automatic write at the closure of the context will be done There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I managed to add a test for this. It was a bit confusing because of the double-construct that happens in |
||
|
||
with NamedTemporaryFile(suffix=".pdf") as target: | ||
with PdfWriter(target.file) as writer: | ||
writer.add_blank_page(100, 100) | ||
assert not target.file.closed | ||
|
||
with open(src, "rb") as fileobj: | ||
with PdfWriter(fileobj) as writer: | ||
pass | ||
assert not fileobj.closed | ||
|
||
|
||
def test_auto_write(): | ||
"""Another test for #2905""" | ||
target = Path(_get_write_target(str)) | ||
stefan6419846 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
with PdfWriter(target) as writer: | ||
writer.add_blank_page(100, 100) | ||
assert target.stat().st_size > 0 | ||
target.unlink() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we need this change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Two reasons. First, the original code was not correctly detecting an empty file; it was detecting a 1-byte file. Second, to avoid an exception when
fileobj
refers to an empty file. I can also tighten up the next line of code, asseek()
returns the resulting file offset.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the explanation. Is this something we can actually test?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe the second stanza of
test_stream_not_closed
tests this case. I remember I had to make this fix in order to get tests working. As far as the behavior ofseek()
I don't intend to test that directly. Cpython callslseek()
which is well-documented and stable. The OS cannot seek to absolute offset -1 of a file, which is whatlseek(fd, -1, SEEK_END)
is asking iffd
refers to a zero-byte file.