Skip to content

Commit

Permalink
pythonGH-125413: pathlib ABCs: replace _scandir() with _info
Browse files Browse the repository at this point in the history
When a path object is generated by `PathBase.iterdir()`, then its `_info`
attribute now stores a `os.DirEntry`-like object that can be used to query
the file type. This removes any need for a `_scandir()` method.

Currently the `_info` attribute is private and only guaranteed to be
populated in paths from `iterdir()`. Later on, I'm hoping to rename it to
`info` and ensure that it's populated for all kinds of paths (this probably
involves adding a `pathlib.FileInfo` class.) In the pathlib ABCs, `info`
will replace `stat()` as the lowest-level abstract file status querying
mechanism.
  • Loading branch information
barneygale committed Dec 7, 2024
1 parent 31c9f3c commit 6046a27
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 64 deletions.
31 changes: 14 additions & 17 deletions Lib/glob.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ def lexists(path):

@staticmethod
def scandir(path):
"""Implements os.scandir().
"""Like os.scandir(), but generates (entry, name, path) tuples.
"""
raise NotImplementedError

Expand Down Expand Up @@ -425,23 +425,18 @@ def wildcard_selector(self, part, parts):

def select_wildcard(path, exists=False):
try:
# We must close the scandir() object before proceeding to
# avoid exhausting file descriptors when globbing deep trees.
with self.scandir(path) as scandir_it:
entries = list(scandir_it)
entries = self.scandir(path)
except OSError:
pass
else:
prefix = self.add_slash(path)
for entry in entries:
if match is None or match(entry.name):
for entry, entry_name, entry_path in entries:
if match is None or match(entry_name):
if dir_only:
try:
if not entry.is_dir():
continue
except OSError:
continue
entry_path = self.concat_path(prefix, entry.name)
if dir_only:
yield from select_next(entry_path, exists=True)
else:
Expand Down Expand Up @@ -483,15 +478,11 @@ def select_recursive(path, exists=False):
def select_recursive_step(stack, match_pos):
path = stack.pop()
try:
# We must close the scandir() object before proceeding to
# avoid exhausting file descriptors when globbing deep trees.
with self.scandir(path) as scandir_it:
entries = list(scandir_it)
entries = self.scandir(path)
except OSError:
pass
else:
prefix = self.add_slash(path)
for entry in entries:
for entry, _entry_name, entry_path in entries:
is_dir = False
try:
if entry.is_dir(follow_symlinks=follow_symlinks):
Expand All @@ -500,7 +491,6 @@ def select_recursive_step(stack, match_pos):
pass

if is_dir or not dir_only:
entry_path = self.concat_path(prefix, entry.name)
if match is None or match(str(entry_path), match_pos):
if dir_only:
yield from select_next(entry_path, exists=True)
Expand Down Expand Up @@ -528,9 +518,16 @@ class _StringGlobber(_GlobberBase):
"""Provides shell-style pattern matching and globbing for string paths.
"""
lexists = staticmethod(os.path.lexists)
scandir = staticmethod(os.scandir)
concat_path = operator.add

@staticmethod
def scandir(path):
# We must close the scandir() object before proceeding to
# avoid exhausting file descriptors when globbing deep trees.
with os.scandir(path) as scandir_it:
entries = list(scandir_it)
return ((entry, entry.name, entry.path) for entry in entries)

if os.name == 'nt':
@staticmethod
def add_slash(pathname):
Expand Down
45 changes: 23 additions & 22 deletions Lib/pathlib/_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,11 @@ class PathGlobber(_GlobberBase):

lexists = operator.methodcaller('exists', follow_symlinks=False)
add_slash = operator.methodcaller('joinpath', '')
scandir = operator.methodcaller('_scandir')

@staticmethod
def scandir(path):
"""Like os.scandir(), but generates (entry, name, path) tuples."""
return ((child._info, child.name, child) for child in path.iterdir())

@staticmethod
def concat_path(path, text):
Expand Down Expand Up @@ -419,6 +423,14 @@ class PathBase(PurePathBase):
def _unsupported_msg(cls, attribute):
return f"{cls.__name__}.{attribute} is unsupported"

@property
def _info(self):
"""
An os.DirEntry-like object, if this path was generated by iterdir().
"""
# TODO: make this public + abstract, delete PathBase.stat().
return self

def stat(self, *, follow_symlinks=True):
"""
Return the result of the stat() system call on this path, like
Expand Down Expand Up @@ -620,15 +632,6 @@ def write_text(self, data, encoding=None, errors=None, newline=None):
with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f:
return f.write(data)

def _scandir(self):
"""Yield os.DirEntry-like objects of the directory contents.
The children are yielded in arbitrary order, and the
special entries '.' and '..' are not included.
"""
import contextlib
return contextlib.nullcontext(self.iterdir())

def iterdir(self):
"""Yield path objects of the directory contents.
Expand Down Expand Up @@ -685,18 +688,16 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False):
if not top_down:
paths.append((path, dirnames, filenames))
try:
with path._scandir() as entries:
for entry in entries:
name = entry.name
try:
if entry.is_dir(follow_symlinks=follow_symlinks):
if not top_down:
paths.append(path.joinpath(name))
dirnames.append(name)
else:
filenames.append(name)
except OSError:
filenames.append(name)
for child in path.iterdir():
try:
if child._info.is_dir(follow_symlinks=follow_symlinks):
if not top_down:
paths.append(child)
dirnames.append(child.name)
else:
filenames.append(child.name)
except OSError:
filenames.append(child.name)
except OSError as error:
if on_error is not None:
on_error(error)
Expand Down
21 changes: 10 additions & 11 deletions Lib/pathlib/_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ class Path(PathBase, PurePath):
object. You can also instantiate a PosixPath or WindowsPath directly,
but cannot instantiate a WindowsPath on a POSIX system or vice versa.
"""
__slots__ = ()
__slots__ = ('_info',)
as_uri = PurePath.as_uri

@classmethod
Expand Down Expand Up @@ -635,13 +635,11 @@ def _filter_trailing_slash(self, paths):
path_str = path_str[:-1]
yield path_str

def _scandir(self):
"""Yield os.DirEntry-like objects of the directory contents.
The children are yielded in arbitrary order, and the
special entries '.' and '..' are not included.
"""
return os.scandir(self)
def _from_dir_entry(self, dir_entry, path_str):
path = self.with_segments(path_str)
path._str = path_str
path._info = dir_entry
return path

def iterdir(self):
"""Yield path objects of the directory contents.
Expand All @@ -651,10 +649,11 @@ def iterdir(self):
"""
root_dir = str(self)
with os.scandir(root_dir) as scandir_it:
paths = [entry.path for entry in scandir_it]
entries = list(scandir_it)
if root_dir == '.':
paths = map(self._remove_leading_dot, paths)
return map(self._from_parsed_string, paths)
return (self._from_dir_entry(e, e.name) for e in entries)
else:
return (self._from_dir_entry(e, e.path) for e in entries)

def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False):
"""Iterate over this subtree and yield all existing files (of any
Expand Down
27 changes: 13 additions & 14 deletions Lib/test/test_pathlib/test_pathlib_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1958,21 +1958,20 @@ def test_iterdir_nodir(self):
self.assertIn(cm.exception.errno, (errno.ENOTDIR,
errno.ENOENT, errno.EINVAL))

def test_scandir(self):
def test_iterdir_info(self):
p = self.cls(self.base)
with p._scandir() as entries:
self.assertTrue(list(entries))
with p._scandir() as entries:
for entry in entries:
child = p / entry.name
self.assertIsNotNone(entry)
self.assertEqual(entry.name, child.name)
self.assertEqual(entry.is_symlink(),
child.is_symlink())
self.assertEqual(entry.is_dir(follow_symlinks=False),
child.is_dir(follow_symlinks=False))
if entry.name != 'brokenLinkLoop':
self.assertEqual(entry.is_dir(), child.is_dir())
for child in p.iterdir():
entry = child._info
self.assertIsNotNone(entry)
self.assertEqual(entry.is_dir(follow_symlinks=False),
child.is_dir(follow_symlinks=False))
self.assertEqual(entry.is_file(follow_symlinks=False),
child.is_file(follow_symlinks=False))
self.assertEqual(entry.is_symlink(),
child.is_symlink())
if child.name != 'brokenLinkLoop':
self.assertEqual(entry.is_dir(), child.is_dir())
self.assertEqual(entry.is_file(), child.is_file())

def test_glob_common(self):
def _check(glob, expected):
Expand Down

0 comments on commit 6046a27

Please sign in to comment.