Skip to content

Overhaul file-system operations to use FileSystemCache #4623

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Mar 14, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Make BuildManager use an fscache
  • Loading branch information
msullivan committed Mar 1, 2018
commit a324d381da265637bb9b03e7c65c03f5bd1a239f
22 changes: 15 additions & 7 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
from mypy.plugin import Plugin, DefaultPlugin, ChainedPlugin
from mypy.defaults import PYTHON3_VERSION_MIN
from mypy.server.deps import get_dependencies
from mypy.fscache import FileSystemCache


# Switch to True to produce debug output related to fine-grained incremental
Expand Down Expand Up @@ -142,6 +143,7 @@ def build(sources: List[BuildSource],
bin_dir: Optional[str] = None,
saved_cache: Optional[SavedCache] = None,
flush_errors: Optional[Callable[[List[str], bool], None]] = None,
fscache: Optional[FileSystemCache] = None,
) -> BuildResult:
"""Analyze a program.

Expand All @@ -165,6 +167,7 @@ def build(sources: List[BuildSource],
directories; if omitted, use '.' as the data directory
saved_cache: optional dict with saved cache state for dmypy (read-write!)
flush_errors: optional function to flush errors after a file is processed
fscache: optionally a file-system cacher

"""
# If we were not given a flush_errors, we use one that will populate those
Expand All @@ -177,7 +180,8 @@ def default_flush_errors(new_messages: List[str], is_serious: bool) -> None:
flush_errors = flush_errors or default_flush_errors

try:
result = _build(sources, options, alt_lib_path, bin_dir, saved_cache, flush_errors)
result = _build(sources, options, alt_lib_path, bin_dir,
saved_cache, flush_errors, fscache)
result.errors = messages
return result
except CompileError as e:
Expand All @@ -197,6 +201,7 @@ def _build(sources: List[BuildSource],
bin_dir: Optional[str],
saved_cache: Optional[SavedCache],
flush_errors: Callable[[List[str], bool], None],
fscache: Optional[FileSystemCache],
) -> BuildResult:
# This seems the most reasonable place to tune garbage collection.
gc.set_threshold(50000)
Expand Down Expand Up @@ -260,7 +265,8 @@ def _build(sources: List[BuildSource],
plugin=plugin,
errors=errors,
saved_cache=saved_cache,
flush_errors=flush_errors)
flush_errors=flush_errors,
fscache=fscache)

try:
graph = dispatch(sources, manager)
Expand Down Expand Up @@ -570,6 +576,7 @@ class BuildManager:
saved_cache: Dict with saved cache state for coarse-grained dmypy
(read-write!)
stats: Dict with various instrumentation numbers
fscache: A file system cacher
"""

def __init__(self, data_dir: str,
Expand All @@ -583,6 +590,7 @@ def __init__(self, data_dir: str,
errors: Errors,
flush_errors: Callable[[List[str], bool], None],
saved_cache: Optional[SavedCache] = None,
fscache: Optional[FileSystemCache] = None,
) -> None:
self.start_time = time.time()
self.data_dir = data_dir
Expand All @@ -608,6 +616,7 @@ def __init__(self, data_dir: str,
self.flush_errors = flush_errors
self.saved_cache = saved_cache if saved_cache is not None else {} # type: SavedCache
self.stats = {} # type: Dict[str, Any] # Values are ints or floats
self.fscache = fscache or FileSystemCache(self.options.python_version)

def maybe_swap_for_shadow_path(self, path: str) -> str:
if (self.options.shadow_file and
Expand All @@ -616,7 +625,7 @@ def maybe_swap_for_shadow_path(self, path: str) -> str:
return path

def get_stat(self, path: str) -> os.stat_result:
return os.stat(self.maybe_swap_for_shadow_path(path))
return self.fscache.stat(self.maybe_swap_for_shadow_path(path))

def all_imported_modules_in_file(self,
file: MypyFile) -> List[Tuple[int, str, int]]:
Expand Down Expand Up @@ -1167,8 +1176,7 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],

mtime = int(st.st_mtime)
if mtime != meta.mtime or path != meta.path:
with open(path, 'rb') as f:
source_hash = hashlib.md5(f.read()).hexdigest()
source_hash = manager.fscache.md5(path)
if source_hash != meta.hash:
manager.log('Metadata abandoned for {}: file {} has different hash'.format(id, path))
return None
Expand Down Expand Up @@ -1872,8 +1880,8 @@ def parse_file(self) -> None:
if self.path and source is None:
try:
path = manager.maybe_swap_for_shadow_path(self.path)
source, self.source_hash = read_with_python_encoding(
path, self.options.python_version)
source = manager.fscache.read_with_python_encoding(path)
self.source_hash = manager.fscache.md5(path)
except IOError as ioerr:
raise CompileError([
"mypy: can't read file '{}': {}".format(self.path, ioerr.strerror)])
Expand Down
20 changes: 14 additions & 6 deletions mypy/dmypy_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,16 +251,23 @@ def check_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict[str,
return self.fine_grained_increment(sources)

def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict[str, Any]:
self.fscache = FileSystemCache(self.options.python_version)
self.fswatcher = FileSystemWatcher(self.fscache)
# The file system cache we create gets passed off to
# BuildManager, and thence to FineGrainedBuildManager, which
# assumes responsibility for clearing it at the appropriate
# times (after init and update()).
# We also need to clear it ourselves sometimes, when we don't invoke
# update, which is unfortunate.
fscache = FileSystemCache(self.options.python_version)
self.fswatcher = FileSystemWatcher(fscache)
self.update_sources(sources)
if not self.options.use_fine_grained_cache:
# Stores the initial state of sources as a side effect.
self.fswatcher.find_changed()
try:
# TODO: alt_lib_path
result = mypy.build.build(sources=sources,
options=self.options)
options=self.options,
fscache=fscache)
except mypy.errors.CompileError as e:
output = ''.join(s + '\n' for s in e.messages)
if e.use_stdout:
Expand All @@ -274,7 +281,7 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict
self.fine_grained_manager = mypy.server.update.FineGrainedBuildManager(manager, graph)
self.fine_grained_initialized = True
self.previous_sources = sources
self.fscache.flush()
#self.fscache.flush()

# If we are using the fine-grained cache, build hasn't actually done
# the typechecking on the updated files yet.
Expand All @@ -294,7 +301,8 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict
changed = self.find_changed(sources)
if changed:
messages = self.fine_grained_manager.update(changed)
self.fscache.flush()
else:
self.fine_grained_manager.manager.fscache.flush() # XXX: sigh

status = 1 if messages else 0
self.previous_messages = messages[:]
Expand All @@ -308,6 +316,7 @@ def fine_grained_increment(self, sources: List[mypy.build.BuildSource]) -> Dict[
if not changed:
# Nothing changed -- just produce the same result as before.
messages = self.previous_messages
self.fine_grained_manager.manager.fscache.flush() # XXX: sigh
else:
messages = self.fine_grained_manager.update(changed)
t2 = time.time()
Expand All @@ -317,7 +326,6 @@ def fine_grained_increment(self, sources: List[mypy.build.BuildSource]) -> Dict[
status = 1 if messages else 0
self.previous_messages = messages[:]
self.previous_sources = sources
self.fscache.flush()
return {'out': ''.join(s + '\n' for s in messages), 'err': '', 'status': status}

def update_sources(self, sources: List[mypy.build.BuildSource]) -> None:
Expand Down
3 changes: 2 additions & 1 deletion mypy/fscache.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
import stat
from typing import Tuple, Dict, List

from mypy.build import read_with_python_encoding
from mypy.errors import DecodeError


Expand All @@ -52,6 +51,8 @@ def flush(self) -> None:
self.listdir_error_cache = {} # type: Dict[str, Exception]

def read_with_python_encoding(self, path: str) -> str:
from mypy.build import read_with_python_encoding

if path in self.read_cache:
return self.read_cache[path]
if path in self.read_error_cache:
Expand Down
4 changes: 3 additions & 1 deletion mypy/server/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ def __init__(self,
manager.saved_cache = {}
# Active triggers during the last update
self.triggered = [] # type: List[str]
self.manager.fscache.flush()

def update(self, changed_modules: List[Tuple[str, str]]) -> List[str]:
"""Update previous build result by processing changed modules.
Expand Down Expand Up @@ -225,8 +226,9 @@ def update(self, changed_modules: List[Tuple[str, str]]) -> List[str]:
if blocker:
self.blocking_error = (next_id, next_path)
self.stale = changed_modules
return messages
break

self.manager.fscache.flush()
return messages

def update_single(self, module: str, path: str) -> Tuple[List[str],
Expand Down