Skip to content

Commit d4d79bc

Browse files
bpo-28564: Use os.scandir() in shutil.rmtree(). (#4085)
This speeds up it to 20-40%.
1 parent 82cd3ce commit d4d79bc

File tree

4 files changed

+55
-31
lines changed

4 files changed

+55
-31
lines changed

Doc/whatsnew/3.7.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,10 @@ Optimizations
440440
using the :func:`os.scandir` function.
441441
(Contributed by Serhiy Storchaka in :issue:`25996`.)
442442

443+
* The :func:`shutil.rmtree` function has been sped up to 20--40%.
444+
This was done using the :func:`os.scandir` function.
445+
(Contributed by Serhiy Storchaka in :issue:`28564`.)
446+
443447
* Optimized case-insensitive matching and searching of :mod:`regular
444448
expressions <re>`. Searching some patterns can now be up to 20 times faster.
445449
(Contributed by Serhiy Storchaka in :issue:`30285`.)
@@ -656,6 +660,11 @@ Changes in the Python API
656660
* ``repr`` for :class:`datetime.timedelta` has changed to include keyword arguments
657661
in the output. (Contributed by Utkarsh Upadhyay in :issue:`30302`.)
658662

663+
* Because :func:`shutil.rmtree` is now implemented using the :func:`os.scandir`
664+
function, the user specified handler *onerror* is now called with the first
665+
argument ``os.scandir`` instead of ``os.listdir`` when listing the direcory
666+
is failed.
667+
659668

660669
Changes in the C API
661670
--------------------

Lib/shutil.py

Lines changed: 43 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -362,25 +362,27 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
362362
# version vulnerable to race conditions
363363
def _rmtree_unsafe(path, onerror):
364364
try:
365-
if os.path.islink(path):
366-
# symlinks to directories are forbidden, see bug #1669
367-
raise OSError("Cannot call rmtree on a symbolic link")
365+
with os.scandir(path) as scandir_it:
366+
entries = list(scandir_it)
368367
except OSError:
369-
onerror(os.path.islink, path, sys.exc_info())
370-
# can't continue even if onerror hook returns
371-
return
372-
names = []
373-
try:
374-
names = os.listdir(path)
375-
except OSError:
376-
onerror(os.listdir, path, sys.exc_info())
377-
for name in names:
378-
fullname = os.path.join(path, name)
368+
onerror(os.scandir, path, sys.exc_info())
369+
entries = []
370+
for entry in entries:
371+
fullname = entry.path
379372
try:
380-
mode = os.lstat(fullname).st_mode
373+
is_dir = entry.is_dir(follow_symlinks=False)
381374
except OSError:
382-
mode = 0
383-
if stat.S_ISDIR(mode):
375+
is_dir = False
376+
if is_dir:
377+
try:
378+
if entry.is_symlink():
379+
# This can only happen if someone replaces
380+
# a directory with a symlink after the call to
381+
# os.scandir or entry.is_dir above.
382+
raise OSError("Cannot call rmtree on a symbolic link")
383+
except OSError:
384+
onerror(os.path.islink, fullname, sys.exc_info())
385+
continue
384386
_rmtree_unsafe(fullname, onerror)
385387
else:
386388
try:
@@ -394,37 +396,40 @@ def _rmtree_unsafe(path, onerror):
394396

395397
# Version using fd-based APIs to protect against races
396398
def _rmtree_safe_fd(topfd, path, onerror):
397-
names = []
398399
try:
399-
names = os.listdir(topfd)
400+
with os.scandir(topfd) as scandir_it:
401+
entries = list(scandir_it)
400402
except OSError as err:
401403
err.filename = path
402-
onerror(os.listdir, path, sys.exc_info())
403-
for name in names:
404-
fullname = os.path.join(path, name)
404+
onerror(os.scandir, path, sys.exc_info())
405+
return
406+
for entry in entries:
407+
fullname = os.path.join(path, entry.name)
405408
try:
406-
orig_st = os.stat(name, dir_fd=topfd, follow_symlinks=False)
407-
mode = orig_st.st_mode
409+
is_dir = entry.is_dir(follow_symlinks=False)
410+
if is_dir:
411+
orig_st = entry.stat(follow_symlinks=False)
412+
is_dir = stat.S_ISDIR(orig_st.st_mode)
408413
except OSError:
409-
mode = 0
410-
if stat.S_ISDIR(mode):
414+
is_dir = False
415+
if is_dir:
411416
try:
412-
dirfd = os.open(name, os.O_RDONLY, dir_fd=topfd)
417+
dirfd = os.open(entry.name, os.O_RDONLY, dir_fd=topfd)
413418
except OSError:
414419
onerror(os.open, fullname, sys.exc_info())
415420
else:
416421
try:
417422
if os.path.samestat(orig_st, os.fstat(dirfd)):
418423
_rmtree_safe_fd(dirfd, fullname, onerror)
419424
try:
420-
os.rmdir(name, dir_fd=topfd)
425+
os.rmdir(entry.name, dir_fd=topfd)
421426
except OSError:
422427
onerror(os.rmdir, fullname, sys.exc_info())
423428
else:
424429
try:
425430
# This can only happen if someone replaces
426431
# a directory with a symlink after the call to
427-
# stat.S_ISDIR above.
432+
# os.scandir or stat.S_ISDIR above.
428433
raise OSError("Cannot call rmtree on a symbolic "
429434
"link")
430435
except OSError:
@@ -433,13 +438,13 @@ def _rmtree_safe_fd(topfd, path, onerror):
433438
os.close(dirfd)
434439
else:
435440
try:
436-
os.unlink(name, dir_fd=topfd)
441+
os.unlink(entry.name, dir_fd=topfd)
437442
except OSError:
438443
onerror(os.unlink, fullname, sys.exc_info())
439444

440445
_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
441446
os.supports_dir_fd and
442-
os.listdir in os.supports_fd and
447+
os.scandir in os.supports_fd and
443448
os.stat in os.supports_follow_symlinks)
444449

445450
def rmtree(path, ignore_errors=False, onerror=None):
@@ -491,6 +496,14 @@ def onerror(*args):
491496
finally:
492497
os.close(fd)
493498
else:
499+
try:
500+
if os.path.islink(path):
501+
# symlinks to directories are forbidden, see bug #1669
502+
raise OSError("Cannot call rmtree on a symbolic link")
503+
except OSError:
504+
onerror(os.path.islink, path, sys.exc_info())
505+
# can't continue even if onerror hook returns
506+
return
494507
return _rmtree_unsafe(path, onerror)
495508

496509
# Allow introspection of whether or not the hardening against symlink

Lib/test/test_shutil.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def onerror(*args):
183183
errors.append(args)
184184
shutil.rmtree(filename, onerror=onerror)
185185
self.assertEqual(len(errors), 2)
186-
self.assertIs(errors[0][0], os.listdir)
186+
self.assertIs(errors[0][0], os.scandir)
187187
self.assertEqual(errors[0][1], filename)
188188
self.assertIsInstance(errors[0][2][1], NotADirectoryError)
189189
self.assertIn(errors[0][2][1].filename, possible_args)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
The shutil.rmtree() function has been sped up to 20--40%. This was done
2+
using the os.scandir() function.

0 commit comments

Comments
 (0)