From fc8fca49eb78db2b05c7dd766ac3aad97499e96e Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Tue, 7 Feb 2023 16:42:25 +0200 Subject: [PATCH] Improve extract performance via ignoring directories early during os.walk Co-authored-by: Steven Kao --- babel/messages/extract.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/babel/messages/extract.py b/babel/messages/extract.py index 5a34f645d..d97e947af 100644 --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -102,10 +102,25 @@ def _strip(line: str): comments[:] = map(_strip, comments) -def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool: - subdir = os.path.basename(dirpath) - # Legacy default behavior: ignore dot and underscore directories - return not (subdir.startswith('.') or subdir.startswith('_')) +def make_default_directory_filter( + method_map: Iterable[tuple[str, str]], + root_dir: str | os.PathLike[str], +): + def directory_filter(dirpath: str | os.PathLike[str]) -> bool: + subdir = os.path.basename(dirpath) + # Legacy default behavior: ignore dot and underscore directories + if subdir.startswith('.') or subdir.startswith('_'): + return False + + dir_rel = os.path.relpath(dirpath, root_dir).replace(os.sep, '/') + + for pattern, method in method_map: + if method == "ignore" and pathmatch(pattern, dir_rel): + return False + + return True + + return directory_filter def extract_from_dir( @@ -189,13 +204,19 @@ def extract_from_dir( """ if dirname is None: dirname = os.getcwd() + if options_map is None: options_map = {} + + dirname = os.path.abspath(dirname) + if directory_filter is None: - directory_filter = default_directory_filter + directory_filter = make_default_directory_filter( + method_map=method_map, + root_dir=dirname, + ) - absname = os.path.abspath(dirname) - for root, dirnames, filenames in os.walk(absname): + for root, dirnames, filenames in os.walk(dirname): dirnames[:] = [ subdir for subdir in dirnames if directory_filter(os.path.join(root, subdir)) @@ -213,7 +234,7 @@ def extract_from_dir( keywords, comment_tags, strip_comment_tags, - dirpath=absname, + dirpath=dirname, )