@@ -86,19 +86,29 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match):
8686 continue
8787 except OSError :
8888 continue
89- if match (entry .name ):
90- yield parent_path ._make_child_entry (entry )
89+ # Avoid cost of making a path object for non-matching paths by
90+ # matching against the os.DirEntry.name string.
91+ if match is None or match (entry .name ):
92+ yield parent_path ._make_child_direntry (entry )
9193
9294
93- def _select_recursive (parent_paths , dir_only , follow_symlinks ):
94- """Yield given paths and all their subdirectories, recursively."""
95+ def _select_recursive (parent_paths , dir_only , follow_symlinks , match ):
96+ """Yield given paths and all their children, recursively, filtering by
97+ string and type.
98+ """
9599 if follow_symlinks is None :
96100 follow_symlinks = False
97101 for parent_path in parent_paths :
102+ if match is not None :
103+ # If we're filtering paths through a regex, record the length of
104+ # the parent path. We'll pass it to match(path, pos=...) later.
105+ parent_len = len (str (parent_path ._make_child_relpath ('_' ))) - 1
98106 paths = [parent_path ._make_child_relpath ('' )]
99107 while paths :
100108 path = paths .pop ()
101- yield path
109+ if match is None or match (str (path ), parent_len ):
110+ # Yield *directory* path that matches pattern (if any).
111+ yield path
102112 try :
103113 # We must close the scandir() object before proceeding to
104114 # avoid exhausting file descriptors when globbing deep trees.
@@ -108,14 +118,22 @@ def _select_recursive(parent_paths, dir_only, follow_symlinks):
108118 pass
109119 else :
110120 for entry in entries :
121+ # Handle directory entry.
111122 try :
112123 if entry .is_dir (follow_symlinks = follow_symlinks ):
113- paths .append (path ._make_child_entry (entry ))
124+ # Recurse into this directory.
125+ paths .append (path ._make_child_direntry (entry ))
114126 continue
115127 except OSError :
116128 pass
129+
130+ # Handle file entry.
117131 if not dir_only :
118- yield path ._make_child_entry (entry )
132+ # Avoid cost of making a path object for non-matching
133+ # files by matching against the os.DirEntry object.
134+ if match is None or match (path ._direntry_str (entry ), parent_len ):
135+ # Yield *file* path that matches pattern (if any).
136+ yield path ._make_child_direntry (entry )
119137
120138
121139def _select_unique (paths ):
@@ -750,8 +768,14 @@ def _scandir(self):
750768 from contextlib import nullcontext
751769 return nullcontext (self .iterdir ())
752770
753- def _make_child_entry (self , entry ):
771+ def _direntry_str (self , entry ):
772+ # Transform an entry yielded from _scandir() into a path string.
773+ # PathBase._scandir() yields PathBase objects, so use str().
774+ return str (entry )
775+
776+ def _make_child_direntry (self , entry ):
754777 # Transform an entry yielded from _scandir() into a path object.
778+ # PathBase._scandir() yields PathBase objects, so this is a no-op.
755779 return entry
756780
757781 def _make_child_relpath (self , name ):
@@ -769,43 +793,49 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
769793
770794 stack = pattern ._pattern_stack
771795 specials = ('' , '.' , '..' )
772- filter_paths = False
773796 deduplicate_paths = False
774797 sep = self .pathmod .sep
775798 paths = iter ([self ] if self .is_dir () else [])
776799 while stack :
777800 part = stack .pop ()
778801 if part in specials :
802+ # Join special component (e.g. '..') onto paths.
779803 paths = _select_special (paths , part )
804+
780805 elif part == '**' :
781- # Consume adjacent '**' components.
806+ # Consume following '**' components, which have no effect .
782807 while stack and stack [- 1 ] == '**' :
783808 stack .pop ()
784809
785- # Consume adjacent non-special components and enable post-walk
786- # regex filtering, provided we're treating symlinks consistently.
810+ # Consume following non-special components, provided we're
811+ # treating symlinks consistently. Each component is joined
812+ # onto 'part', which is used to generate an re.Pattern object.
787813 if follow_symlinks is not None :
788814 while stack and stack [- 1 ] not in specials :
789- filter_paths = True
790- stack .pop ()
815+ part += sep + stack .pop ()
791816
792- dir_only = bool (stack )
793- paths = _select_recursive (paths , dir_only , follow_symlinks )
817+ # If the previous loop consumed pattern components, compile an
818+ # re.Pattern object based on those components.
819+ match = _compile_pattern (part , sep , case_sensitive ) if part != '**' else None
820+
821+ # Recursively walk directories, filtering by type and regex.
822+ paths = _select_recursive (paths , bool (stack ), follow_symlinks , match )
823+
824+ # De-duplicate if we've already seen a '**' component.
794825 if deduplicate_paths :
795- # De-duplicate if we've already seen a '**' component.
796826 paths = _select_unique (paths )
797827 deduplicate_paths = True
828+
798829 elif '**' in part :
799830 raise ValueError ("Invalid pattern: '**' can only be an entire path component" )
831+
800832 else :
801- dir_only = bool (stack )
802- match = _compile_pattern (part , sep , case_sensitive )
803- paths = _select_children (paths , dir_only , follow_symlinks , match )
804- if filter_paths :
805- # Filter out paths that don't match pattern.
806- prefix_len = len (str (self ._make_child_relpath ('_' ))) - 1
807- match = _compile_pattern (pattern ._pattern_str , sep , case_sensitive )
808- paths = (path for path in paths if match (path ._pattern_str , prefix_len ))
833+ # If the pattern component isn't '*', compile an re.Pattern
834+ # object based on the component.
835+ match = _compile_pattern (part , sep , case_sensitive ) if part != '*' else None
836+
837+ # Iterate over directories' children filtering by type and regex.
838+ paths = _select_children (paths , bool (stack ), follow_symlinks , match )
809839 return paths
810840
811841 def rglob (self , pattern , * , case_sensitive = None , follow_symlinks = None ):
@@ -854,7 +884,7 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False):
854884
855885 if is_dir :
856886 if not top_down :
857- paths .append (path ._make_child_entry (entry ))
887+ paths .append (path ._make_child_direntry (entry ))
858888 dirnames .append (entry .name )
859889 else :
860890 filenames .append (entry .name )
0 commit comments