Skip to content

Commit

Permalink
GH-104104: Optimize pathlib.Path.glob() by avoiding repeated calls …
Browse files Browse the repository at this point in the history
…to `os.path.normcase()` (GH-104105)

Use `re.IGNORECASE` to implement case-insensitive matching. This
restores behaviour from before GH-31691.
  • Loading branch information
barneygale authored May 2, 2023
1 parent 1f53844 commit 47770a1
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 11 deletions.
25 changes: 14 additions & 11 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ def _is_wildcard_pattern(pat):
# be looked up directly as a file.
return "*" in pat or "?" in pat or "[" in pat

def _is_case_sensitive(flavour):
return flavour.normcase('Aa') == 'Aa'

#
# Globbing helpers
#
Expand Down Expand Up @@ -100,15 +103,14 @@ def select_from(self, parent_path):
is_dir = path_cls.is_dir
exists = path_cls.exists
scandir = path_cls._scandir
normcase = path_cls._flavour.normcase
if not is_dir(parent_path):
return iter([])
return self._select_from(parent_path, is_dir, exists, scandir, normcase)
return self._select_from(parent_path, is_dir, exists, scandir)


class _TerminatingSelector:

def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
def _select_from(self, parent_path, is_dir, exists, scandir):
yield parent_path


Expand All @@ -118,11 +120,11 @@ def __init__(self, name, child_parts, flavour):
self.name = name
_Selector.__init__(self, child_parts, flavour)

def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
def _select_from(self, parent_path, is_dir, exists, scandir):
try:
path = parent_path._make_child_relpath(self.name)
if (is_dir if self.dironly else exists)(path):
for p in self.successor._select_from(path, is_dir, exists, scandir, normcase):
for p in self.successor._select_from(path, is_dir, exists, scandir):
yield p
except PermissionError:
return
Expand All @@ -131,10 +133,11 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
class _WildcardSelector(_Selector):

def __init__(self, pat, child_parts, flavour):
self.match = re.compile(fnmatch.translate(flavour.normcase(pat))).fullmatch
flags = re.NOFLAG if _is_case_sensitive(flavour) else re.IGNORECASE
self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch
_Selector.__init__(self, child_parts, flavour)

def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
def _select_from(self, parent_path, is_dir, exists, scandir):
try:
# We must close the scandir() object before proceeding to
# avoid exhausting file descriptors when globbing deep trees.
Expand All @@ -153,9 +156,9 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
raise
continue
name = entry.name
if self.match(normcase(name)):
if self.match(name):
path = parent_path._make_child_relpath(name)
for p in self.successor._select_from(path, is_dir, exists, scandir, normcase):
for p in self.successor._select_from(path, is_dir, exists, scandir):
yield p
except PermissionError:
return
Expand Down Expand Up @@ -187,13 +190,13 @@ def _iterate_directories(self, parent_path, is_dir, scandir):
except PermissionError:
return

def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
def _select_from(self, parent_path, is_dir, exists, scandir):
try:
yielded = set()
try:
successor_select = self.successor._select_from
for starting_point in self._iterate_directories(parent_path, is_dir, scandir):
for p in successor_select(starting_point, is_dir, exists, scandir, normcase):
for p in successor_select(starting_point, is_dir, exists, scandir):
if p not in yielded:
yield p
yielded.add(p)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Improve performance of :meth:`pathlib.Path.glob` by using
:data:`re.IGNORECASE` to implement case-insensitive matching.

0 comments on commit 47770a1

Please sign in to comment.