Skip to content

Commit 68a51e0

Browse files
authored
GH-125413: pathlib ABCs: use scandir() to speed up glob() (#126261)
Use the new `PathBase.scandir()` method in `PathBase.glob()`, which greatly reduces the number of `PathBase.stat()` calls needed when globbing. There are no user-facing changes, because the pathlib ABCs are still private and `Path.glob()` doesn't use the implementation in its superclass.
1 parent 464a7a9 commit 68a51e0

File tree

3 files changed

+10
-25
lines changed

3 files changed

+10
-25
lines changed

Lib/glob.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -364,12 +364,6 @@ def concat_path(path, text):
364364
"""
365365
raise NotImplementedError
366366

367-
@staticmethod
368-
def parse_entry(entry):
369-
"""Returns the path of an entry yielded from scandir().
370-
"""
371-
raise NotImplementedError
372-
373367
# High-level methods
374368

375369
def compile(self, pat):
@@ -438,6 +432,7 @@ def select_wildcard(path, exists=False):
438432
except OSError:
439433
pass
440434
else:
435+
prefix = self.add_slash(path)
441436
for entry in entries:
442437
if match is None or match(entry.name):
443438
if dir_only:
@@ -446,7 +441,7 @@ def select_wildcard(path, exists=False):
446441
continue
447442
except OSError:
448443
continue
449-
entry_path = self.parse_entry(entry)
444+
entry_path = self.concat_path(prefix, entry.name)
450445
if dir_only:
451446
yield from select_next(entry_path, exists=True)
452447
else:
@@ -495,6 +490,7 @@ def select_recursive_step(stack, match_pos):
495490
except OSError:
496491
pass
497492
else:
493+
prefix = self.add_slash(path)
498494
for entry in entries:
499495
is_dir = False
500496
try:
@@ -504,7 +500,7 @@ def select_recursive_step(stack, match_pos):
504500
pass
505501

506502
if is_dir or not dir_only:
507-
entry_path = self.parse_entry(entry)
503+
entry_path = self.concat_path(prefix, entry.name)
508504
if match is None or match(str(entry_path), match_pos):
509505
if dir_only:
510506
yield from select_next(entry_path, exists=True)
@@ -533,7 +529,6 @@ class _StringGlobber(_GlobberBase):
533529
"""
534530
lexists = staticmethod(os.path.lexists)
535531
scandir = staticmethod(os.scandir)
536-
parse_entry = operator.attrgetter('path')
537532
concat_path = operator.add
538533

539534
if os.name == 'nt':

Lib/pathlib/_abc.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -94,25 +94,13 @@ class PathGlobber(_GlobberBase):
9494

9595
lexists = operator.methodcaller('exists', follow_symlinks=False)
9696
add_slash = operator.methodcaller('joinpath', '')
97-
98-
@staticmethod
99-
def scandir(path):
100-
"""Emulates os.scandir(), which returns an object that can be used as
101-
a context manager. This method is called by walk() and glob().
102-
"""
103-
import contextlib
104-
return contextlib.nullcontext(path.iterdir())
97+
scandir = operator.methodcaller('scandir')
10598

10699
@staticmethod
107100
def concat_path(path, text):
108101
"""Appends text to the given path."""
109102
return path.with_segments(path._raw_path + text)
110103

111-
@staticmethod
112-
def parse_entry(entry):
113-
"""Returns the path of an entry yielded from scandir()."""
114-
return entry
115-
116104

117105
class PurePathBase:
118106
"""Base class for pure path objects.

Lib/test/test_pathlib/test_pathlib_abc.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1633,8 +1633,10 @@ def setUp(self):
16331633
p.joinpath('linkA').symlink_to('fileA')
16341634
p.joinpath('brokenLink').symlink_to('non-existing')
16351635
p.joinpath('linkB').symlink_to('dirB', target_is_directory=True)
1636-
p.joinpath('dirA', 'linkC').symlink_to(parser.join('..', 'dirB'))
1637-
p.joinpath('dirB', 'linkD').symlink_to(parser.join('..', 'dirB'))
1636+
p.joinpath('dirA', 'linkC').symlink_to(
1637+
parser.join('..', 'dirB'), target_is_directory=True)
1638+
p.joinpath('dirB', 'linkD').symlink_to(
1639+
parser.join('..', 'dirB'), target_is_directory=True)
16381640
p.joinpath('brokenLinkLoop').symlink_to('brokenLinkLoop')
16391641

16401642
def tearDown(self):
@@ -2479,7 +2481,7 @@ def test_glob_permissions(self):
24792481
if i % 2:
24802482
link.symlink_to(P(self.base, "dirE", "nonexistent"))
24812483
else:
2482-
link.symlink_to(P(self.base, "dirC"))
2484+
link.symlink_to(P(self.base, "dirC"), target_is_directory=True)
24832485

24842486
self.assertEqual(len(set(base.glob("*"))), 100)
24852487
self.assertEqual(len(set(base.glob("*/"))), 50)

0 commit comments

Comments
 (0)