Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
262 changes: 232 additions & 30 deletions setuptools/command/egg_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Create a distribution's .egg-info directory and contents"""

from distutils.filelist import FileList as _FileList
from distutils.errors import DistutilsInternalError
from distutils.util import convert_path
from distutils import log
import distutils.errors
Expand All @@ -27,6 +28,7 @@
parse_requirements, safe_name, parse_version,
safe_version, yield_lines, EntryPoint, iter_entry_points, to_filename)
import setuptools.unicode_utils as unicode_utils
from setuptools.glob import glob

from pkg_resources.extern import packaging

Expand All @@ -36,6 +38,88 @@
pass


def translate_pattern(glob):
"""
Translate a file path glob like '*.txt' in to a regular expression.
This differs from fnmatch.translate which allows wildcards to match
directory separators. It also knows about '**/' which matches any number of
directories.
"""
pat = ''

# This will split on '/' within [character classes]. This is deliberate.
chunks = glob.split(os.path.sep)

sep = re.escape(os.sep)
valid_char = '[^%s]' % (sep,)

for c, chunk in enumerate(chunks):
last_chunk = c == len(chunks) - 1

# Chunks that are a literal ** are globstars. They match anything.
if chunk == '**':
if last_chunk:
# Match anything if this is the last component
pat += '.*'
else:
# Match '(name/)*'
pat += '(?:%s+%s)*' % (valid_char, sep)
continue # Break here as the whole path component has been handled

# Find any special characters in the remainder
i = 0
chunk_len = len(chunk)
while i < chunk_len:
char = chunk[i]
if char == '*':
# Match any number of name characters
pat += valid_char + '*'
elif char == '?':
# Match a name character
pat += valid_char
elif char == '[':
# Character class
inner_i = i + 1
# Skip initial !/] chars
if inner_i < chunk_len and chunk[inner_i] == '!':
inner_i = inner_i + 1
if inner_i < chunk_len and chunk[inner_i] == ']':
inner_i = inner_i + 1

# Loop till the closing ] is found
while inner_i < chunk_len and chunk[inner_i] != ']':
inner_i = inner_i + 1

if inner_i >= chunk_len:
# Got to the end of the string without finding a closing ]
# Do not treat this as a matching group, but as a literal [
pat += re.escape(char)
else:
# Grab the insides of the [brackets]
inner = chunk[i + 1:inner_i]
char_class = ''

# Class negation
if inner[0] == '!':
char_class = '^'
inner = inner[1:]

char_class += re.escape(inner)
pat += '[%s]' % (char_class,)

# Skip to the end ]
i = inner_i
else:
pat += re.escape(char)
i += 1

# Join each chunk with the dir separator
if not last_chunk:
pat += sep

return re.compile(pat + r'\Z(?ms)')


class egg_info(Command):
description = "create a distribution's .egg-info directory"

Expand Down Expand Up @@ -239,7 +323,151 @@ def check_broken_egg_info(self):


class FileList(_FileList):
"""File list that accepts only existing, platform-independent paths"""
# Implementations of the various MANIFEST.in commands
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously, this class was a subclass of the implementation in distutils. It's not clear from this diff if there's still value in subclassing _FileList. Is this a complete re-implementation or does it also re-use some of the upstream implementation? Do you expect that these changes could be merged upstream at some point?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I kept it for backwards compatibility. Packages or plugins may depend on calling FileList.include_pattern, the old way of adding packages, and I did not want to break these.


def process_template_line(self, line):
# Parse the line: split it up, make sure the right number of words
# is there, and return the relevant words. 'action' is always
# defined: it's the first word of the line. Which of the other
# three are defined depends on the action; it'll be either
# patterns, (dir and patterns), or (dir_pattern).
(action, patterns, dir, dir_pattern) = self._parse_template_line(line)

# OK, now we know that the action is valid and we have the
# right number of words on the line for that action -- so we
# can proceed with minimal error-checking.
if action == 'include':
self.debug_print("include " + ' '.join(patterns))
for pattern in patterns:
if not self.include(pattern):
log.warn("warning: no files found matching '%s'", pattern)

elif action == 'exclude':
self.debug_print("exclude " + ' '.join(patterns))
for pattern in patterns:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously, cmd.egg_base was processed, but now it appears not to be. What new behavior obviates the need for this behavior?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The behaviour is still required, but is now as simple as self.filelist.graft(ei_cmd.egg_info), assuming I understand what is going in _add_egg_info(). I've replaced that whole function with that one-liner.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay. Great!

if not self.exclude(pattern):
log.warn(("warning: no previously-included files "
"found matching '%s'"), pattern)

elif action == 'global-include':
self.debug_print("global-include " + ' '.join(patterns))
for pattern in patterns:
if not self.global_include(pattern):
log.warn(("warning: no files found matching '%s' "
"anywhere in distribution"), pattern)

elif action == 'global-exclude':
self.debug_print("global-exclude " + ' '.join(patterns))
for pattern in patterns:
if not self.global_exclude(pattern):
log.warn(("warning: no previously-included files matching "
"'%s' found anywhere in distribution"),
pattern)

elif action == 'recursive-include':
self.debug_print("recursive-include %s %s" %
(dir, ' '.join(patterns)))
for pattern in patterns:
if not self.recursive_include(dir, pattern):
log.warn(("warning: no files found matching '%s' "
"under directory '%s'"),
pattern, dir)

elif action == 'recursive-exclude':
self.debug_print("recursive-exclude %s %s" %
(dir, ' '.join(patterns)))
for pattern in patterns:
if not self.recursive_exclude(dir, pattern):
log.warn(("warning: no previously-included files matching "
"'%s' found under directory '%s'"),
pattern, dir)

elif action == 'graft':
self.debug_print("graft " + dir_pattern)
if not self.graft(dir_pattern):
log.warn("warning: no directories found matching '%s'",
dir_pattern)

elif action == 'prune':
self.debug_print("prune " + dir_pattern)
if not self.prune(dir_pattern):
log.warn(("no previously-included directories found "
"matching '%s'"), dir_pattern)

else:
raise DistutilsInternalError(
"this cannot happen: invalid action '%s'" % action)

def _remove_files(self, predicate):
"""
Remove all files from the file list that match the predicate.
Return True if any matching files were removed
"""
found = False
for i in range(len(self.files) - 1, -1, -1):
if predicate(self.files[i]):
self.debug_print(" removing " + self.files[i])
del self.files[i]
found = True
return found

def include(self, pattern):
"""Include files that match 'pattern'."""
found = [f for f in glob(pattern) if not os.path.isdir(f)]
self.extend(found)
return bool(found)

def exclude(self, pattern):
"""Exclude files that match 'pattern'."""
match = translate_pattern(pattern)
return self._remove_files(match.match)

def recursive_include(self, dir, pattern):
"""
Include all files anywhere in 'dir/' that match the pattern.
"""
full_pattern = os.path.join(dir, '**', pattern)
found = [f for f in glob(full_pattern, recursive=True)
if not os.path.isdir(f)]
self.extend(found)
return bool(found)

def recursive_exclude(self, dir, pattern):
"""
Exclude any file anywhere in 'dir/' that match the pattern.
"""
match = translate_pattern(os.path.join(dir, '**', pattern))
return self._remove_files(match.match)

def graft(self, dir):
"""Include all files from 'dir/'."""
found = distutils.filelist.findall(dir)
self.extend(found)
return bool(found)

def prune(self, dir):
"""Filter out files from 'dir/'."""
match = translate_pattern(os.path.join(dir, '**'))
return self._remove_files(match.match)

def global_include(self, pattern):
"""
Include all files anywhere in the current directory that match the
pattern. This is very inefficient on large file trees.
"""
if self.allfiles is None:
self.findall()
match = translate_pattern(os.path.join('**', pattern))
found = [f for f in self.allfiles if match.match(f)]
self.extend(found)
return bool(found)

def global_exclude(self, pattern):
"""
Exclude all files anywhere that match the pattern.
"""
match = translate_pattern(os.path.join('**', pattern))
return self._remove_files(match.match)

def append(self, item):
if item.endswith('\r'): # Fix older sdists built on Windows
Expand Down Expand Up @@ -302,7 +530,6 @@ def run(self):
self.filelist = FileList()
if not os.path.exists(self.manifest):
self.write_manifest() # it must exist so it'll get in the list
self.filelist.findall()
self.add_defaults()
if os.path.exists(self.template):
self.read_template()
Expand Down Expand Up @@ -341,38 +568,13 @@ def add_defaults(self):
elif os.path.exists(self.manifest):
self.read_manifest()
ei_cmd = self.get_finalized_command('egg_info')
self._add_egg_info(cmd=ei_cmd)
self.filelist.include_pattern("*", prefix=ei_cmd.egg_info)

def _add_egg_info(self, cmd):
"""
Add paths for egg-info files for an external egg-base.

The egg-info files are written to egg-base. If egg-base is
outside the current working directory, this method
searchs the egg-base directory for files to include
in the manifest. Uses distutils.filelist.findall (which is
really the version monkeypatched in by setuptools/__init__.py)
to perform the search.

Since findall records relative paths, prefix the returned
paths with cmd.egg_base, so add_default's include_pattern call
(which is looking for the absolute cmd.egg_info) will match
them.
"""
if cmd.egg_base == os.curdir:
# egg-info files were already added by something else
return

discovered = distutils.filelist.findall(cmd.egg_base)
resolved = (os.path.join(cmd.egg_base, path) for path in discovered)
self.filelist.allfiles.extend(resolved)
self.filelist.graft(ei_cmd.egg_info)

def prune_file_list(self):
build = self.get_finalized_command('build')
base_dir = self.distribution.get_fullname()
self.filelist.exclude_pattern(None, prefix=build.build_base)
self.filelist.exclude_pattern(None, prefix=base_dir)
self.filelist.prune(build.build_base)
self.filelist.prune(base_dir)
sep = re.escape(os.sep)
self.filelist.exclude_pattern(r'(^|' + sep + r')(RCS|CVS|\.svn)' + sep,
is_regex=1)
Expand Down
Loading