python · msullivan · Mar 9, 2018 · Mar 7, 2018 · Mar 8, 2018 · Mar 8, 2018
diff --git a/mypy/build.py b/mypy/build.py
@@ -80,6 +80,7 @@ class BuildResult:
       manager: The build manager.
       files:   Dictionary from module name to related AST node.
       types:   Dictionary from parse tree node to its inferred type.
+      used_cache: Whether the build took advantage of a cache
       errors:  List of error messages.
     """
 
@@ -88,6 +89,7 @@ def __init__(self, manager: 'BuildManager', graph: Graph) -> None:
         self.graph = graph
         self.files = manager.modules
         self.types = manager.all_types  # Non-empty for tests only or if dumping deps
+        self.used_cache = manager.cache_enabled
         self.errors = []  # type: List[str]  # Filled in by build if desired
 
 
@@ -569,6 +571,9 @@ class BuildManager:
       flush_errors:    A function for processing errors after each SCC
       saved_cache:     Dict with saved cache state for coarse-grained dmypy
                        (read-write!)
+      cache_enabled:   Whether cache usage is enabled. This is set based on options,
+                       but is disabled if fine-grained cache loading fails
+                       and after an initial fine-grained load.
       stats:           Dict with various instrumentation numbers
     """
 
@@ -588,7 +593,6 @@ def __init__(self, data_dir: str,
         self.data_dir = data_dir
         self.errors = errors
         self.errors.set_ignore_prefix(ignore_prefix)
-        self.only_load_from_cache = options.use_fine_grained_cache
         self.lib_path = tuple(lib_path)
         self.source_set = source_set
         self.reports = reports
@@ -607,9 +611,14 @@ def __init__(self, data_dir: str,
         self.rechecked_modules = set()  # type: Set[str]
         self.plugin = plugin
         self.flush_errors = flush_errors
+        self.cache_enabled = options.incremental and (
+            not options.fine_grained_incremental or options.use_fine_grained_cache)
         self.saved_cache = saved_cache if saved_cache is not None else {}  # type: SavedCache
         self.stats = {}  # type: Dict[str, Any]  # Values are ints or floats
 
+    def use_fine_grained_cache(self) -> bool:
+        return self.cache_enabled and self.options.use_fine_grained_cache
+
     def maybe_swap_for_shadow_path(self, path: str) -> str:
         if (self.options.shadow_file and
                 os.path.samefile(self.options.shadow_file[0], path)):
@@ -1157,7 +1166,7 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],
     # changed since the cache was generated. We *don't* want to do a
     # coarse-grained incremental rebuild, so we accept the cache
     # metadata even if it doesn't match the source file.
-    if manager.options.use_fine_grained_cache:
+    if manager.use_fine_grained_cache():
         manager.log('Using potentially stale metadata for {}'.format(id))
         return meta
 
@@ -1655,7 +1664,7 @@ def __init__(self,
         self.path = path
         self.xpath = path or '<string>'
         self.source = source
-        if path and source is None and self.options.incremental:
+        if path and source is None and self.manager.cache_enabled:
             self.meta = find_cache_meta(self.id, path, manager)
             # TODO: Get mtime if not cached.
             if self.meta is not None:
@@ -1675,10 +1684,10 @@ def __init__(self,
                                  for id, line in zip(self.meta.dependencies, self.meta.dep_lines)}
             self.child_modules = set(self.meta.child_modules)
         else:
-            # In fine-grained cache mode, pretend we only know about modules that
-            # have cache information and defer handling new modules until the
-            # fine-grained update.
-            if manager.only_load_from_cache:
+            # When doing a fine-grained cache load, pretend we only
+            # know about modules that have cache information and defer
+            # handling new modules until the fine-grained update.
+            if manager.use_fine_grained_cache():
                 manager.log("Deferring module to fine-grained update %s (%s)" % (path, id))
                 raise ModuleNotFound
 
@@ -1795,13 +1804,15 @@ def load_tree(self) -> None:
 
     def fix_cross_refs(self) -> None:
         assert self.tree is not None, "Internal error: method must be called on parsed file only"
+        # We need to set quick_and_dirty when doing a fine grained
+        # cache load because we need to gracefully handle missing modules.
         fixup_module_pass_one(self.tree, self.manager.modules,
-                              self.manager.options.quick_and_dirty)
+                              self.manager.options.quick_and_dirty or
+                              self.manager.use_fine_grained_cache())
 
     def calculate_mros(self) -> None:
         assert self.tree is not None, "Internal error: method must be called on parsed file only"
-        fixup_module_pass_two(self.tree, self.manager.modules,
-                              self.manager.options.quick_and_dirty)
+        fixup_module_pass_two(self.tree, self.manager.modules)
 
     def patch_dependency_parents(self) -> None:
         """
@@ -2058,7 +2069,7 @@ def valid_references(self) -> Set[str]:
 
     def write_cache(self) -> None:
         assert self.tree is not None, "Internal error: method must be called on parsed file only"
-        if not self.path or self.options.cache_dir == os.devnull:
+        if not self.path or not self.manager.cache_enabled:
             return
         if self.manager.options.quick_and_dirty:
             is_errors = self.manager.errors.is_errors_for_file(self.path)
@@ -2105,9 +2116,12 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph:
     # This is a kind of unfortunate hack to work around some of fine-grained's
     # fragility: if we have loaded less than 50% of the specified files from
     # cache in fine-grained cache mode, load the graph again honestly.
-    if manager.options.use_fine_grained_cache and len(graph) < 0.50 * len(sources):
-        manager.log("Redoing load_graph because too much was missing")
-        manager.only_load_from_cache = False
+    # In this case, we just turn the cache off entirely, so we don't need
+    # to worry about some files being loaded and some from cache and so
+    # that fine-grained mode never *writes* to the cache.
+    if manager.use_fine_grained_cache() and len(graph) < 0.50 * len(sources):
+        manager.log("Redoing load_graph without cache because too much was missing")
+        manager.cache_enabled = False
         graph = load_graph(sources, manager)
 
     t1 = time.time()
@@ -2128,7 +2142,13 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph:
     if manager.options.dump_graph:
         dump_graph(graph)
         return graph
-    process_graph(graph, manager)
+    # If we are loading a fine-grained incremental mode cache, we
+    # don't want to do a real incremental reprocess of the graph---we
+    # just want to load in all of the cache information.
+    if manager.use_fine_grained_cache():
+        process_fine_grained_cache_graph(graph, manager)
+    else:
+        process_graph(graph, manager)
     updated = preserve_cache(graph)
     set_updated = set(updated)
     manager.saved_cache.clear()
@@ -2437,14 +2457,6 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
                 manager.log("Processing SCC of size %d (%s) as %s" % (size, scc_str, fresh_msg))
             process_stale_scc(graph, scc, manager)
 
-    # If we are running in fine-grained incremental mode with caching,
-    # we always process fresh SCCs so that we have all of the symbol
-    # tables and fine-grained dependencies available.
-    if manager.options.use_fine_grained_cache:
-        for prev_scc in fresh_scc_queue:
-            process_fresh_scc(graph, prev_scc, manager)
-        fresh_scc_queue = []
-
     sccs_left = len(fresh_scc_queue)
     nodes_left = sum(len(scc) for scc in fresh_scc_queue)
     manager.add_stats(sccs_left=sccs_left, nodes_left=nodes_left)
@@ -2456,6 +2468,25 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
         manager.log("No fresh SCCs left in queue")
 
 
+def process_fine_grained_cache_graph(graph: Graph, manager: BuildManager) -> None:
+    """Finish loading everything for use in the fine-grained incremental cache"""
+
+    # If we are running in fine-grained incremental mode with caching,
+    # we process all SCCs as fresh SCCs so that we have all of the symbol
+    # tables and fine-grained dependencies available.
+    # We fail the loading of any SCC that we can't load a meta for, so we
+    # don't have anything *but* fresh SCCs.
+    sccs = sorted_components(graph)
+    manager.log("Found %d SCCs; largest has %d nodes" %
+                (len(sccs), max(len(scc) for scc in sccs)))
+
+    for ascc in sccs:
+        # Order the SCC's nodes using a heuristic.
+        # Note that ascc is a set, and scc is a list.
+        scc = order_ascc(graph, ascc)
+        process_fresh_scc(graph, scc, manager)
+
+
 def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_ALL) -> List[str]:
     """Come up with the ideal processing order within an SCC.
 

diff --git a/mypy/dmypy_server.py b/mypy/dmypy_server.py
@@ -273,9 +273,6 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict
         self.fscache = FileSystemCache(self.options.python_version)
         self.fswatcher = FileSystemWatcher(self.fscache)
         self.update_sources(sources)
-        if not self.options.use_fine_grained_cache:
-            # Stores the initial state of sources as a side effect.
-            self.fswatcher.find_changed()
         try:
             result = mypy.build.build(sources=sources,
                                       options=self.options,
@@ -292,12 +289,11 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict
         graph = result.graph
         self.fine_grained_manager = FineGrainedBuildManager(manager, graph)
         self.previous_sources = sources
-        self.fscache.flush()
 
         # If we are using the fine-grained cache, build hasn't actually done
         # the typechecking on the updated files yet.
         # Run a fine-grained update starting from the cached data
-        if self.options.use_fine_grained_cache:
+        if result.used_cache:
             # Pull times and hashes out of the saved_cache and stick them into
             # the fswatcher, so we pick up the changes.
             for state in self.fine_grained_manager.graph.values():
@@ -310,9 +306,20 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict
 
             # Run an update
             changed = self.find_changed(sources)
+
+            # Find anything that has had its dependency list change
+            for state in self.fine_grained_manager.graph.values():
+                if not state.is_fresh():
+                    assert state.path is not None
+                    changed.append((state.id, state.path))
+
             if changed:
                 messages = self.fine_grained_manager.update(changed)
-            self.fscache.flush()
+        else:
+            # Stores the initial state of sources as a side effect.
+            self.fswatcher.find_changed()
+
+        self.fscache.flush()
 
         status = 1 if messages else 0
         self.previous_messages = messages[:]

diff --git a/mypy/fixup.py b/mypy/fixup.py
@@ -22,8 +22,7 @@ def fixup_module_pass_one(tree: MypyFile, modules: Dict[str, MypyFile],
     node_fixer.visit_symbol_table(tree.names)
 
 
-def fixup_module_pass_two(tree: MypyFile, modules: Dict[str, MypyFile],
-                          quick_and_dirty: bool) -> None:
+def fixup_module_pass_two(tree: MypyFile, modules: Dict[str, MypyFile]) -> None:
     compute_all_mros(tree.names, modules)
 
 

diff --git a/mypy/server/astdiff.py b/mypy/server/astdiff.py
@@ -136,7 +136,9 @@ def snapshot_symbol_table(name_prefix: str, table: SymbolTable) -> Dict[str, Sna
         common = (fullname, symbol.kind, symbol.module_public)
         if symbol.kind == MODULE_REF:
             # This is a cross-reference to another module.
-            assert isinstance(node, MypyFile)
+            # If the reference is busted because the other module is missing,
+            # the node will be a "stale_info" TypeInfo produced by fixup,
+            # but that doesn't really matter to us here.
             result[name] = ('Moduleref', common)
         elif symbol.kind == TVAR:
             assert isinstance(node, TypeVarExpr)

diff --git a/mypy/server/update.py b/mypy/server/update.py
@@ -171,13 +171,17 @@ def __init__(self,
         # Module that we haven't processed yet but that are known to be stale.
         self.stale = []  # type: List[Tuple[str, str]]
         # Disable the cache so that load_graph doesn't try going back to disk
-        # for the cache. This is kind of a hack and it might be better to have
-        # this directly reflected in load_graph's interface.
-        self.options.cache_dir = os.devnull
+        # for the cache.
+        self.manager.cache_enabled = False
         manager.saved_cache = {}
-        manager.only_load_from_cache = False
+
+        # Some hints to the test suite about what is going on:
         # Active triggers during the last update
         self.triggered = []  # type: List[str]
+        # Modules passed to update during the last update
+        self.changed_modules = []  # type: List[Tuple[str, str]]
+        # Modules processed during the last update
+        self.updated_modules = []  # type: List[str]
 
     def update(self, changed_modules: List[Tuple[str, str]]) -> List[str]:
         """Update previous build result by processing changed modules.
@@ -198,10 +202,13 @@ def update(self, changed_modules: List[Tuple[str, str]]) -> List[str]:
         """
         assert changed_modules, 'No changed modules'
 
+        self.changed_modules = changed_modules
+
         # Reset global caches for the new build.
         find_module_clear_caches()
 
         self.triggered = []
+        self.updated_modules = []
         changed_modules = dedupe_modules(changed_modules + self.stale)
         initial_set = {id for id, _ in changed_modules}
         self.manager.log_fine_grained('==== update %s ====' % ', '.join(
@@ -258,6 +265,7 @@ def update_single(self, module: str, path: str) -> Tuple[List[str],
             - Whether there was a blocking error in the module
         """
         self.manager.log_fine_grained('--- update single %r ---' % module)
+        self.updated_modules.append(module)
 
         # TODO: If new module brings in other modules, we parse some files multiple times.
         manager = self.manager

diff --git a/mypy/test/helpers.py b/mypy/test/helpers.py
@@ -5,7 +5,7 @@
 import time
 import shutil
 
-from typing import List, Dict, Tuple, Callable, Any, Optional
+from typing import List, Iterable, Dict, Tuple, Callable, Any, Optional
 
 from mypy import defaults
 from mypy.test.config import test_temp_dir
@@ -98,6 +98,21 @@ def assert_string_arrays_equal(expected: List[str], actual: List[str],
         raise AssertionError(msg)
 
 
+def assert_module_equivalence(name: str,
+                              expected: Optional[Iterable[str]], actual: Iterable[str]) -> None:
+    if expected is not None:
+        expected_normalized = sorted(expected)
+        actual_normalized = sorted(set(actual).difference({"__main__"}))
+        assert_string_arrays_equal(
+            expected_normalized,
+            actual_normalized,
+            ('Actual modules ({}) do not match expected modules ({}) '
+             'for "[{} ...]"').format(
+                 ', '.join(actual_normalized),
+                 ', '.join(expected_normalized),
+                 name))
+
+
 def update_testcase_output(testcase: DataDrivenTestCase, output: List[str]) -> None:
     assert testcase.old_cwd is not None, "test was not properly set up"
     testcase_path = os.path.join(testcase.old_cwd, testcase.file)

diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py
@@ -11,7 +11,7 @@
 from mypy.test.config import test_temp_dir
 from mypy.test.data import DataDrivenTestCase, DataSuite
 from mypy.test.helpers import (
-    assert_string_arrays_equal, normalize_error_messages,
+    assert_string_arrays_equal, normalize_error_messages, assert_module_equivalence,
     retry_on_error, update_testcase_output, parse_options,
     copy_and_fudge_mtime
 )
@@ -190,29 +190,15 @@ def run_case_once(self, testcase: DataDrivenTestCase, incremental_step: int = 0)
                 self.verify_cache(module_data, a, res.manager)
             if incremental_step > 1:
                 suffix = '' if incremental_step == 2 else str(incremental_step - 1)
-                self.check_module_equivalence(
+                assert_module_equivalence(
                     'rechecked' + suffix,
                     testcase.expected_rechecked_modules.get(incremental_step - 1),
                     res.manager.rechecked_modules)
-                self.check_module_equivalence(
+                assert_module_equivalence(
                     'stale' + suffix,
                     testcase.expected_stale_modules.get(incremental_step - 1),
                     res.manager.stale_modules)
 
-    def check_module_equivalence(self, name: str,
-                                 expected: Optional[Set[str]], actual: Set[str]) -> None:
-        if expected is not None:
-            expected_normalized = sorted(expected)
-            actual_normalized = sorted(actual.difference({"__main__"}))
-            assert_string_arrays_equal(
-                expected_normalized,
-                actual_normalized,
-                ('Actual modules ({}) do not match expected modules ({}) '
-                 'for "[{} ...]"').format(
-                    ', '.join(actual_normalized),
-                    ', '.join(expected_normalized),
-                    name))
-
     def verify_cache(self, module_data: List[Tuple[str, str, str]], a: List[str],
                      manager: build.BuildManager) -> None:
         # There should be valid cache metadata for each module except