Skip to content

Commit

Permalink
better crawling for namespace packages, explicit base dirs, abs paths
Browse files Browse the repository at this point in the history
  • Loading branch information
hauntsaninja committed Nov 22, 2020
1 parent ccb5e94 commit 804a574
Show file tree
Hide file tree
Showing 5 changed files with 350 additions and 96 deletions.
197 changes: 113 additions & 84 deletions mypy/find_sources.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""Routines for finding the sources that mypy will check"""

import os.path
import functools
import os

from typing import List, Sequence, Set, Tuple, Optional, Dict
from typing import List, Sequence, Set, Tuple, Optional
from typing_extensions import Final

from mypy.modulefinder import BuildSource, PYTHON_EXTENSIONS
from mypy.modulefinder import BuildSource, PYTHON_EXTENSIONS, mypy_path
from mypy.fscache import FileSystemCache
from mypy.options import Options

Expand All @@ -24,7 +25,7 @@ def create_source_list(paths: Sequence[str], options: Options,
Raises InvalidSourceList on errors.
"""
fscache = fscache or FileSystemCache()
finder = SourceFinder(fscache)
finder = SourceFinder(fscache, options)

sources = []
for path in paths:
Expand All @@ -34,7 +35,7 @@ def create_source_list(paths: Sequence[str], options: Options,
name, base_dir = finder.crawl_up(path)
sources.append(BuildSource(path, name, None, base_dir))
elif fscache.isdir(path):
sub_sources = finder.find_sources_in_dir(path, explicit_package_roots=None)
sub_sources = finder.find_sources_in_dir(path)
if not sub_sources and not allow_empty_dir:
raise InvalidSourceList(
"There are no .py[i] files in directory '{}'".format(path)
Expand All @@ -58,112 +59,141 @@ def keyfunc(name: str) -> Tuple[int, str]:
return (-1, name)


def normalise_package_base(root: str) -> str:
if not root:
root = os.curdir
root = os.path.normpath(os.path.abspath(root))
if root.endswith(os.sep):
root = root[:-1]
return root


def get_explicit_package_bases(options: Options) -> Optional[List[str]]:
if not options.explicit_package_bases:
return None
roots = mypy_path() + options.mypy_path + [os.getcwd()]
return [normalise_package_base(root) for root in roots]


class SourceFinder:
def __init__(self, fscache: FileSystemCache) -> None:
def __init__(self, fscache: FileSystemCache, options: Options) -> None:
self.fscache = fscache
# A cache for package names, mapping from directory path to module id and base dir
self.package_cache = {} # type: Dict[str, Tuple[str, str]]

def find_sources_in_dir(
self, path: str, explicit_package_roots: Optional[List[str]]
) -> List[BuildSource]:
if explicit_package_roots is None:
mod_prefix, root_dir = self.crawl_up_dir(path)
else:
mod_prefix = os.path.basename(path)
root_dir = os.path.dirname(path) or "."
if mod_prefix:
mod_prefix += "."
return self.find_sources_in_dir_helper(path, mod_prefix, root_dir, explicit_package_roots)

def find_sources_in_dir_helper(
self, dir_path: str, mod_prefix: str, root_dir: str,
explicit_package_roots: Optional[List[str]]
) -> List[BuildSource]:
assert not mod_prefix or mod_prefix.endswith(".")

init_file = self.get_init_file(dir_path)
# If the current directory is an explicit package root, explore it as such.
# Alternatively, if we aren't given explicit package roots and we don't have an __init__
# file, recursively explore this directory as a new package root.
if (
(explicit_package_roots is not None and dir_path in explicit_package_roots)
or (explicit_package_roots is None and init_file is None)
):
mod_prefix = ""
root_dir = dir_path
self.explicit_package_bases = get_explicit_package_bases(options)
self.namespace_packages = options.namespace_packages

seen = set() # type: Set[str]
sources = []
def is_explicit_package_base(self, path: str) -> bool:
assert self.explicit_package_bases
return normalise_package_base(path) in self.explicit_package_bases

if init_file:
sources.append(BuildSource(init_file, mod_prefix.rstrip("."), None, root_dir))
def find_sources_in_dir(self, path: str) -> List[BuildSource]:
sources = []

names = self.fscache.listdir(dir_path)
names.sort(key=keyfunc)
seen = set() # type: Set[str]
names = sorted(self.fscache.listdir(path), key=keyfunc)
for name in names:
# Skip certain names altogether
if name == '__pycache__' or name.startswith('.') or name.endswith('~'):
continue
path = os.path.join(dir_path, name)
subpath = os.path.join(path, name)

if self.fscache.isdir(path):
sub_sources = self.find_sources_in_dir_helper(
path, mod_prefix + name + '.', root_dir, explicit_package_roots
)
if self.fscache.isdir(subpath):
sub_sources = self.find_sources_in_dir(subpath)
if sub_sources:
seen.add(name)
sources.extend(sub_sources)
else:
stem, suffix = os.path.splitext(name)
if stem == '__init__':
continue
if stem not in seen and '.' not in stem and suffix in PY_EXTENSIONS:
if stem not in seen and suffix in PY_EXTENSIONS:
seen.add(stem)
src = BuildSource(path, mod_prefix + stem, None, root_dir)
sources.append(src)
module, base_dir = self.crawl_up(subpath)
sources.append(BuildSource(subpath, module, None, base_dir))

return sources

def crawl_up(self, path: str) -> Tuple[str, str]:
"""Given a .py[i] filename, return module and base directory
"""Given a .py[i] filename, return module and base directory.
We crawl up the path until we find a directory without
__init__.py[i], or until we run out of path components.
For example, given "xxx/yyy/foo/bar.py", we might return something like:
("foo.bar", "xxx/yyy")
If namespace packages is off, we crawl upwards until we find a directory without
an __init__.py
If namespace packages is on, we crawl upwards until the nearest explicit base directory.
Failing that, we return one past the highest directory containing an __init__.py
We won't crawl past directories with invalid package names.
The base directory returned is an absolute path.
"""
path = os.path.normpath(os.path.abspath(path))
parent, filename = os.path.split(path)
module_name = strip_py(filename) or os.path.basename(filename)
module_prefix, base_dir = self.crawl_up_dir(parent)
if module_name == '__init__' or not module_name:
module = module_prefix
else:
module = module_join(module_prefix, module_name)

module_name = strip_py(filename) or filename
if not module_name.isidentifier():
return module_name, parent

parent_module, base_dir = self.crawl_up_dir(parent)
if module_name == "__init__":
return parent_module, base_dir

module = module_join(parent_module, module_name)
return module, base_dir

def crawl_up_dir(self, dir: str) -> Tuple[str, str]:
"""Given a directory name, return the corresponding module name and base directory
return self._crawl_up_helper(dir) or ("", dir)

Use package_cache to cache results.
"""
if dir in self.package_cache:
return self.package_cache[dir]
@functools.lru_cache()
def _crawl_up_helper(self, dir: str) -> Optional[Tuple[str, str]]:
"""Given a directory, maybe returns module and base directory.
parent_dir, base = os.path.split(dir)
if not dir or not self.get_init_file(dir) or not base:
module = ''
base_dir = dir or '.'
else:
# Ensure that base is a valid python module name
if base.endswith('-stubs'):
base = base[:-6] # PEP-561 stub-only directory
if not base.isidentifier():
raise InvalidSourceList('{} is not a valid Python package name'.format(base))
parent_module, base_dir = self.crawl_up_dir(parent_dir)
module = module_join(parent_module, base)

self.package_cache[dir] = module, base_dir
return module, base_dir
We return a non-None value if we were able to find something clearly intended as a base
directory (as adjudicated by being an explicit base directory or by containing a package
with __init__.py).
This distinction is necessary for namespace packages, so that we know when to treat
ourselves as a subpackage.
"""
# stop crawling if we're an explicit base directory
if self.explicit_package_bases is not None and self.is_explicit_package_base(dir):
return "", dir

# stop crawling if we've exhausted path components
parent, name = os.path.split(dir)
if not name or not parent:
return None
if name.endswith('-stubs'):
name = name[:-6] # PEP-561 stub-only directory

# recurse if there's an __init__.py
init_file = self.get_init_file(dir)
if init_file is not None:
if not name.isidentifier():
# in most cases the directory name is invalid, we'll just stop crawling upwards
# but if there's an __init__.py in the directory, something is messed up
raise InvalidSourceList("{} is not a valid Python package name".format(name))
# we're definitely a package, so we always return a non-None value
mod_prefix, base_dir = self.crawl_up_dir(parent)
return module_join(mod_prefix, name), base_dir

# stop crawling if our name is an invalid identifier
if not name.isidentifier():
return None

# stop crawling if namespace packages is off (and we don't have an __init__.py)
if not self.namespace_packages:
return None

# at this point: namespace packages is on, we don't have an __init__.py and we're not an
# explicit base directory
result = self._crawl_up_helper(parent)
if result is None:
# we're not an explicit base directory and we don't have an __init__.py
# and none of our parents are either, so return
return None
# one of our parents was an explicit base directory or had an __init__.py, so we're
# definitely a subpackage! chain our name to the module.
mod_prefix, base_dir = result
return module_join(mod_prefix, name), base_dir

def get_init_file(self, dir: str) -> Optional[str]:
"""Check whether a directory contains a file named __init__.py[i].
Expand All @@ -185,8 +215,7 @@ def module_join(parent: str, child: str) -> str:
"""Join module ids, accounting for a possibly empty parent."""
if parent:
return parent + '.' + child
else:
return child
return child


def strip_py(arg: str) -> Optional[str]:
Expand Down
26 changes: 15 additions & 11 deletions mypy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,9 @@ def add_invertible_flag(flag: str,
title="Running code",
description="Specify the code you want to type check. For more details, see "
"mypy.readthedocs.io/en/latest/running_mypy.html#running-mypy")
code_group.add_argument(
'--explicit-package-bases', action='store_true',
help="Use current directory and MYPYPATH to determine module names of files passed")
code_group.add_argument(
'-m', '--module', action='append', metavar='MODULE',
default=[],
Expand Down Expand Up @@ -862,6 +865,11 @@ def set_strict_flags() -> None:
parser.error("Missing target module, package, files, or command.")
elif code_methods > 1:
parser.error("May only specify one of: module/package, files, or command.")
if options.explicit_package_bases and not options.namespace_packages:
parser.error(
"Can only use --explicit-base-dirs with --namespace-packages, since otherwise "
"examining __init__.py's is sufficient to determine module names for files"
)

# Check for overlapping `--always-true` and `--always-false` flags.
overlap = set(options.always_true) & set(options.always_false)
Expand Down Expand Up @@ -966,10 +974,7 @@ def process_package_roots(fscache: Optional[FileSystemCache],
assert fscache is not None # Since mypy doesn't know parser.error() raises.
# Do some stuff with drive letters to make Windows happy (esp. tests).
current_drive, _ = os.path.splitdrive(os.getcwd())
dot = os.curdir
dotslash = os.curdir + os.sep
dotdotslash = os.pardir + os.sep
trivial_paths = {dot, dotslash}
package_root = []
for root in options.package_root:
if os.path.isabs(root):
Expand All @@ -978,14 +983,13 @@ def process_package_roots(fscache: Optional[FileSystemCache],
if drive and drive != current_drive:
parser.error("Package root must be on current drive: %r" % (drive + root))
# Empty package root is always okay.
if root:
root = os.path.relpath(root) # Normalize the heck out of it.
if root.startswith(dotdotslash):
parser.error("Package root cannot be above current directory: %r" % root)
if root in trivial_paths:
root = ''
elif not root.endswith(os.sep):
root = root + os.sep
if not root:
root = os.curdir
if os.path.relpath(root).startswith(dotdotslash):
parser.error("Package root cannot be above current directory: %r" % root)
root = os.path.normpath(os.path.abspath(root))
if not root.endswith(os.sep):
root += os.sep
package_root.append(root)
options.package_root = package_root
# Pass the package root on the the filesystem cache.
Expand Down
1 change: 1 addition & 0 deletions mypy/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def __init__(self) -> None:
self.follow_imports_for_stubs = False
# PEP 420 namespace packages
self.namespace_packages = False
self.explicit_package_bases = False

# disallow_any options
self.disallow_any_generics = False
Expand Down
2 changes: 1 addition & 1 deletion mypy/suggestions.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def __init__(self, fgmanager: FineGrainedBuildManager,
self.manager = fgmanager.manager
self.plugin = self.manager.plugin
self.graph = fgmanager.graph
self.finder = SourceFinder(self.manager.fscache)
self.finder = SourceFinder(self.manager.fscache, self.manager.options)

self.give_json = json
self.no_errors = no_errors
Expand Down
Loading

0 comments on commit 804a574

Please sign in to comment.