Skip to content

pathlib ABCs: yield progress reports from WritablePath._copy_from() #131636

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions Lib/pathlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1105,7 +1105,7 @@ def copy(self, target, **kwargs):
if not hasattr(target, 'with_segments'):
target = self.with_segments(target)
ensure_distinct_paths(self, target)
target._copy_from(self, **kwargs)
list(target._copy_from(self, **kwargs)) # Consume generator.
Copy link
Contributor

@graingert graingert Mar 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

per the docs in itertools the best way to consume a generator is collections.deque:

def consume(iterator, n=None):
    "Advance the iterator n-steps ahead. If n is None, consume entirely."
    # Use functions that consume iterators at C speed.
    if n is None:
        deque(iterator, maxlen=0)
    else:
        next(islice(iterator, n, n), None)

it's a neat little short-cut when deque is maxlen=0

if (maxlen == 0)
return consume_iterator(it);

Suggested change
list(target._copy_from(self, **kwargs)) # Consume generator.
collections.deque(target._copy_from(self, **kwargs), maxlen=0) # Consume generator.

return target.joinpath() # Empty join to ensure fresh metadata.

def copy_into(self, target_dir, **kwargs):
Expand All @@ -1123,26 +1123,30 @@ def copy_into(self, target_dir, **kwargs):

def _copy_from(self, source, follow_symlinks=True, preserve_metadata=False):
"""
Recursively copy the given path to this path.
Recursively copy the given path to this path. This a generator
function that yields (target, source, sent) tuples as the copying
operation progresses.
"""
yield self, source, 0
if not follow_symlinks and source.info.is_symlink():
self._copy_from_symlink(source, preserve_metadata)
elif source.info.is_dir():
children = source.iterdir()
os.mkdir(self)
for child in children:
self.joinpath(child.name)._copy_from(
yield from self.joinpath(child.name)._copy_from(
child, follow_symlinks, preserve_metadata)
if preserve_metadata:
copy_info(source.info, self)
else:
self._copy_from_file(source, preserve_metadata)
for sent in self._copy_from_file(source, preserve_metadata):
yield self, source, sent

def _copy_from_file(self, source, preserve_metadata=False):
ensure_different_files(source, self)
with magic_open(source, 'rb') as source_f:
with open(self, 'wb') as target_f:
copyfileobj(source_f, target_f)
yield from copyfileobj(source_f, target_f)
if preserve_metadata:
copy_info(source.info, self)

Expand All @@ -1157,7 +1161,7 @@ def _copy_from_file(self, source, preserve_metadata=False):
else:
copyfile2(source, str(self))
return
self._copy_from_file_fallback(source, preserve_metadata)
yield from self._copy_from_file_fallback(source, preserve_metadata)

if os.name == 'nt':
# If a directory-symlink is copied *before* its target, then
Expand Down
21 changes: 9 additions & 12 deletions Lib/pathlib/_os.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,11 @@ def _copy_file_range(source_fd, target_fd):
copy.
This should work on Linux >= 4.5 only.
"""
fn = os.copy_file_range
blocksize = _get_copy_blocksize(source_fd)
offset = 0
while True:
sent = os.copy_file_range(source_fd, target_fd, blocksize,
offset_dst=offset)
if sent == 0:
break # EOF
while sent := fn(source_fd, target_fd, blocksize, None, offset):
yield sent
offset += sent
else:
_copy_file_range = None
Expand All @@ -90,12 +88,11 @@ def _sendfile(source_fd, target_fd):
high-performance sendfile(2) syscall.
This should work on Linux >= 2.6.33 only.
"""
fn = os.sendfile
blocksize = _get_copy_blocksize(source_fd)
offset = 0
while True:
sent = os.sendfile(target_fd, source_fd, offset, blocksize)
if sent == 0:
break # EOF
while sent := fn(target_fd, source_fd, offset, blocksize):
yield sent
offset += sent
else:
_sendfile = None
Expand Down Expand Up @@ -141,14 +138,14 @@ def copyfileobj(source_f, target_f):
raise err
if _copy_file_range:
try:
_copy_file_range(source_fd, target_fd)
yield from _copy_file_range(source_fd, target_fd)
return
except OSError as err:
if err.errno not in (ETXTBSY, EXDEV):
raise err
if _sendfile:
try:
_sendfile(source_fd, target_fd)
yield from _sendfile(source_fd, target_fd)
return
except OSError as err:
if err.errno != ENOTSOCK:
Expand All @@ -163,7 +160,7 @@ def copyfileobj(source_f, target_f):
read_source = source_f.read
write_target = target_f.write
while buf := read_source(1024 * 1024):
write_target(buf)
yield write_target(buf)


def magic_open(path, mode='r', buffering=-1, encoding=None, errors=None,
Expand Down
16 changes: 10 additions & 6 deletions Lib/pathlib/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ def copy(self, target, **kwargs):
Recursively copy this file or directory tree to the given destination.
"""
ensure_distinct_paths(self, target)
target._copy_from(self, **kwargs)
list(target._copy_from(self, **kwargs)) # Consume generator.
return target.joinpath() # Empty join to ensure fresh metadata.

def copy_into(self, target_dir, **kwargs):
Expand Down Expand Up @@ -399,23 +399,27 @@ def write_text(self, data, encoding=None, errors=None, newline=None):

def _copy_from(self, source, follow_symlinks=True):
"""
Recursively copy the given path to this path.
Recursively copy the given path to this path. This a generator
function that yields (target, source, sent) tuples as the copying
operation progresses.
"""
stack = [(source, self)]
stack = [(self, source)]
while stack:
src, dst = stack.pop()
dst, src = stack.pop()
yield dst, src, 0
if not follow_symlinks and src.info.is_symlink():
dst.symlink_to(str(src.readlink()), src.info.is_dir())
elif src.info.is_dir():
children = src.iterdir()
dst.mkdir()
for child in children:
stack.append((child, dst.joinpath(child.name)))
stack.append((dst.joinpath(child.name), child))
else:
ensure_different_files(src, dst)
with magic_open(src, 'rb') as source_f:
with magic_open(dst, 'wb') as target_f:
copyfileobj(source_f, target_f)
for sent in copyfileobj(source_f, target_f):
yield dst, src, sent


_JoinablePath.register(PurePath)
Expand Down
Loading