-
-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Do not scan whole file tree when making MANIFEST #764
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ | |
| Create a distribution's .egg-info directory and contents""" | ||
|
|
||
| from distutils.filelist import FileList as _FileList | ||
| from distutils.errors import DistutilsInternalError | ||
| from distutils.util import convert_path | ||
| from distutils import log | ||
| import distutils.errors | ||
|
|
@@ -27,6 +28,7 @@ | |
| parse_requirements, safe_name, parse_version, | ||
| safe_version, yield_lines, EntryPoint, iter_entry_points, to_filename) | ||
| import setuptools.unicode_utils as unicode_utils | ||
| from setuptools.glob import glob | ||
|
|
||
| from pkg_resources.extern import packaging | ||
|
|
||
|
|
@@ -36,6 +38,88 @@ | |
| pass | ||
|
|
||
|
|
||
| def translate_pattern(glob): | ||
| """ | ||
| Translate a file path glob like '*.txt' in to a regular expression. | ||
| This differs from fnmatch.translate which allows wildcards to match | ||
| directory separators. It also knows about '**/' which matches any number of | ||
| directories. | ||
| """ | ||
| pat = '' | ||
|
|
||
| # This will split on '/' within [character classes]. This is deliberate. | ||
| chunks = glob.split(os.path.sep) | ||
|
|
||
| sep = re.escape(os.sep) | ||
| valid_char = '[^%s]' % (sep,) | ||
|
|
||
| for c, chunk in enumerate(chunks): | ||
| last_chunk = c == len(chunks) - 1 | ||
|
|
||
| # Chunks that are a literal ** are globstars. They match anything. | ||
| if chunk == '**': | ||
| if last_chunk: | ||
| # Match anything if this is the last component | ||
| pat += '.*' | ||
| else: | ||
| # Match '(name/)*' | ||
| pat += '(?:%s+%s)*' % (valid_char, sep) | ||
| continue # Break here as the whole path component has been handled | ||
|
|
||
| # Find any special characters in the remainder | ||
| i = 0 | ||
| chunk_len = len(chunk) | ||
| while i < chunk_len: | ||
| char = chunk[i] | ||
| if char == '*': | ||
| # Match any number of name characters | ||
| pat += valid_char + '*' | ||
| elif char == '?': | ||
| # Match a name character | ||
| pat += valid_char | ||
| elif char == '[': | ||
| # Character class | ||
| inner_i = i + 1 | ||
| # Skip initial !/] chars | ||
| if inner_i < chunk_len and chunk[inner_i] == '!': | ||
| inner_i = inner_i + 1 | ||
| if inner_i < chunk_len and chunk[inner_i] == ']': | ||
| inner_i = inner_i + 1 | ||
|
|
||
| # Loop till the closing ] is found | ||
| while inner_i < chunk_len and chunk[inner_i] != ']': | ||
| inner_i = inner_i + 1 | ||
|
|
||
| if inner_i >= chunk_len: | ||
| # Got to the end of the string without finding a closing ] | ||
| # Do not treat this as a matching group, but as a literal [ | ||
| pat += re.escape(char) | ||
| else: | ||
| # Grab the insides of the [brackets] | ||
| inner = chunk[i + 1:inner_i] | ||
| char_class = '' | ||
|
|
||
| # Class negation | ||
| if inner[0] == '!': | ||
| char_class = '^' | ||
| inner = inner[1:] | ||
|
|
||
| char_class += re.escape(inner) | ||
| pat += '[%s]' % (char_class,) | ||
|
|
||
| # Skip to the end ] | ||
| i = inner_i | ||
| else: | ||
| pat += re.escape(char) | ||
| i += 1 | ||
|
|
||
| # Join each chunk with the dir separator | ||
| if not last_chunk: | ||
| pat += sep | ||
|
|
||
| return re.compile(pat + r'\Z(?ms)') | ||
|
|
||
|
|
||
| class egg_info(Command): | ||
| description = "create a distribution's .egg-info directory" | ||
|
|
||
|
|
@@ -239,7 +323,151 @@ def check_broken_egg_info(self): | |
|
|
||
|
|
||
| class FileList(_FileList): | ||
| """File list that accepts only existing, platform-independent paths""" | ||
| # Implementations of the various MANIFEST.in commands | ||
|
|
||
| def process_template_line(self, line): | ||
| # Parse the line: split it up, make sure the right number of words | ||
| # is there, and return the relevant words. 'action' is always | ||
| # defined: it's the first word of the line. Which of the other | ||
| # three are defined depends on the action; it'll be either | ||
| # patterns, (dir and patterns), or (dir_pattern). | ||
| (action, patterns, dir, dir_pattern) = self._parse_template_line(line) | ||
|
|
||
| # OK, now we know that the action is valid and we have the | ||
| # right number of words on the line for that action -- so we | ||
| # can proceed with minimal error-checking. | ||
| if action == 'include': | ||
| self.debug_print("include " + ' '.join(patterns)) | ||
| for pattern in patterns: | ||
| if not self.include(pattern): | ||
| log.warn("warning: no files found matching '%s'", pattern) | ||
|
|
||
| elif action == 'exclude': | ||
| self.debug_print("exclude " + ' '.join(patterns)) | ||
| for pattern in patterns: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The behaviour is still required, but is now as simple as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay. Great! |
||
| if not self.exclude(pattern): | ||
| log.warn(("warning: no previously-included files " | ||
| "found matching '%s'"), pattern) | ||
|
|
||
| elif action == 'global-include': | ||
| self.debug_print("global-include " + ' '.join(patterns)) | ||
| for pattern in patterns: | ||
| if not self.global_include(pattern): | ||
| log.warn(("warning: no files found matching '%s' " | ||
| "anywhere in distribution"), pattern) | ||
|
|
||
| elif action == 'global-exclude': | ||
| self.debug_print("global-exclude " + ' '.join(patterns)) | ||
| for pattern in patterns: | ||
| if not self.global_exclude(pattern): | ||
| log.warn(("warning: no previously-included files matching " | ||
| "'%s' found anywhere in distribution"), | ||
| pattern) | ||
|
|
||
| elif action == 'recursive-include': | ||
| self.debug_print("recursive-include %s %s" % | ||
| (dir, ' '.join(patterns))) | ||
| for pattern in patterns: | ||
| if not self.recursive_include(dir, pattern): | ||
| log.warn(("warning: no files found matching '%s' " | ||
| "under directory '%s'"), | ||
| pattern, dir) | ||
|
|
||
| elif action == 'recursive-exclude': | ||
| self.debug_print("recursive-exclude %s %s" % | ||
| (dir, ' '.join(patterns))) | ||
| for pattern in patterns: | ||
| if not self.recursive_exclude(dir, pattern): | ||
| log.warn(("warning: no previously-included files matching " | ||
| "'%s' found under directory '%s'"), | ||
| pattern, dir) | ||
|
|
||
| elif action == 'graft': | ||
| self.debug_print("graft " + dir_pattern) | ||
| if not self.graft(dir_pattern): | ||
| log.warn("warning: no directories found matching '%s'", | ||
| dir_pattern) | ||
|
|
||
| elif action == 'prune': | ||
| self.debug_print("prune " + dir_pattern) | ||
| if not self.prune(dir_pattern): | ||
| log.warn(("no previously-included directories found " | ||
| "matching '%s'"), dir_pattern) | ||
|
|
||
| else: | ||
| raise DistutilsInternalError( | ||
| "this cannot happen: invalid action '%s'" % action) | ||
|
|
||
| def _remove_files(self, predicate): | ||
| """ | ||
| Remove all files from the file list that match the predicate. | ||
| Return True if any matching files were removed | ||
| """ | ||
| found = False | ||
| for i in range(len(self.files) - 1, -1, -1): | ||
| if predicate(self.files[i]): | ||
| self.debug_print(" removing " + self.files[i]) | ||
| del self.files[i] | ||
| found = True | ||
| return found | ||
|
|
||
| def include(self, pattern): | ||
| """Include files that match 'pattern'.""" | ||
| found = [f for f in glob(pattern) if not os.path.isdir(f)] | ||
| self.extend(found) | ||
| return bool(found) | ||
|
|
||
| def exclude(self, pattern): | ||
| """Exclude files that match 'pattern'.""" | ||
| match = translate_pattern(pattern) | ||
| return self._remove_files(match.match) | ||
|
|
||
| def recursive_include(self, dir, pattern): | ||
| """ | ||
| Include all files anywhere in 'dir/' that match the pattern. | ||
| """ | ||
| full_pattern = os.path.join(dir, '**', pattern) | ||
| found = [f for f in glob(full_pattern, recursive=True) | ||
| if not os.path.isdir(f)] | ||
| self.extend(found) | ||
| return bool(found) | ||
|
|
||
| def recursive_exclude(self, dir, pattern): | ||
| """ | ||
| Exclude any file anywhere in 'dir/' that match the pattern. | ||
| """ | ||
| match = translate_pattern(os.path.join(dir, '**', pattern)) | ||
| return self._remove_files(match.match) | ||
|
|
||
| def graft(self, dir): | ||
| """Include all files from 'dir/'.""" | ||
| found = distutils.filelist.findall(dir) | ||
| self.extend(found) | ||
| return bool(found) | ||
|
|
||
| def prune(self, dir): | ||
| """Filter out files from 'dir/'.""" | ||
| match = translate_pattern(os.path.join(dir, '**')) | ||
| return self._remove_files(match.match) | ||
|
|
||
| def global_include(self, pattern): | ||
| """ | ||
| Include all files anywhere in the current directory that match the | ||
| pattern. This is very inefficient on large file trees. | ||
| """ | ||
| if self.allfiles is None: | ||
| self.findall() | ||
| match = translate_pattern(os.path.join('**', pattern)) | ||
| found = [f for f in self.allfiles if match.match(f)] | ||
| self.extend(found) | ||
| return bool(found) | ||
|
|
||
| def global_exclude(self, pattern): | ||
| """ | ||
| Exclude all files anywhere that match the pattern. | ||
| """ | ||
| match = translate_pattern(os.path.join('**', pattern)) | ||
| return self._remove_files(match.match) | ||
|
|
||
| def append(self, item): | ||
| if item.endswith('\r'): # Fix older sdists built on Windows | ||
|
|
@@ -302,7 +530,6 @@ def run(self): | |
| self.filelist = FileList() | ||
| if not os.path.exists(self.manifest): | ||
| self.write_manifest() # it must exist so it'll get in the list | ||
| self.filelist.findall() | ||
| self.add_defaults() | ||
| if os.path.exists(self.template): | ||
| self.read_template() | ||
|
|
@@ -341,38 +568,13 @@ def add_defaults(self): | |
| elif os.path.exists(self.manifest): | ||
| self.read_manifest() | ||
| ei_cmd = self.get_finalized_command('egg_info') | ||
| self._add_egg_info(cmd=ei_cmd) | ||
| self.filelist.include_pattern("*", prefix=ei_cmd.egg_info) | ||
|
|
||
| def _add_egg_info(self, cmd): | ||
| """ | ||
| Add paths for egg-info files for an external egg-base. | ||
|
|
||
| The egg-info files are written to egg-base. If egg-base is | ||
| outside the current working directory, this method | ||
| searchs the egg-base directory for files to include | ||
| in the manifest. Uses distutils.filelist.findall (which is | ||
| really the version monkeypatched in by setuptools/__init__.py) | ||
| to perform the search. | ||
|
|
||
| Since findall records relative paths, prefix the returned | ||
| paths with cmd.egg_base, so add_default's include_pattern call | ||
| (which is looking for the absolute cmd.egg_info) will match | ||
| them. | ||
| """ | ||
| if cmd.egg_base == os.curdir: | ||
| # egg-info files were already added by something else | ||
| return | ||
|
|
||
| discovered = distutils.filelist.findall(cmd.egg_base) | ||
| resolved = (os.path.join(cmd.egg_base, path) for path in discovered) | ||
| self.filelist.allfiles.extend(resolved) | ||
| self.filelist.graft(ei_cmd.egg_info) | ||
|
|
||
| def prune_file_list(self): | ||
| build = self.get_finalized_command('build') | ||
| base_dir = self.distribution.get_fullname() | ||
| self.filelist.exclude_pattern(None, prefix=build.build_base) | ||
| self.filelist.exclude_pattern(None, prefix=base_dir) | ||
| self.filelist.prune(build.build_base) | ||
| self.filelist.prune(base_dir) | ||
| sep = re.escape(os.sep) | ||
| self.filelist.exclude_pattern(r'(^|' + sep + r')(RCS|CVS|\.svn)' + sep, | ||
| is_regex=1) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Previously, this class was a subclass of the implementation in distutils. It's not clear from this diff if there's still value in subclassing
_FileList. Is this a complete re-implementation or does it also re-use some of the upstream implementation? Do you expect that these changes could be merged upstream at some point?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I kept it for backwards compatibility. Packages or plugins may depend on calling
FileList.include_pattern, the old way of adding packages, and I did not want to break these.