Merge pull request #3862 from Zac-HD/uncap-replay-buffers

Avoid pointless discards during the `reuse` and `target` phases
HypothesisWorks · Jan 30, 2024 · 8a9279c · 8a9279c
2 parents 17e23f2 + 91c63bb
commit 8a9279c
Show file tree

Hide file tree

Showing 5 changed files with 39 additions and 9 deletions.
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,13 @@
+RELEASE_TYPE: patch
+
+This patch slightly changes how we replay examples from
+:doc:`the database <database>`: if the behavior of the saved example has
+changed, we now keep running the test case instead of aborting at the size
+of the saved example.  While we know it's not the *same* example, we might
+as well continue running the test!
+
+Because we now finish running a few more examples for affected tests, this
+might be a slight slowdown - but correspondingly more likely to find a bug.
+
+We've also applied similar tricks to the :ref:`target phase <phases>`, where
+they are a pure performance improvement for affected tests.
diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
@@ -1054,7 +1054,9 @@ def _execute_once_for_engine(self, data: ConjectureData) -> None:
             if TESTCASE_CALLBACKS:
                 if self.failed_normally or self.failed_due_to_deadline:
                     phase = "shrink"
-                else:
+                elif runner := getattr(self, "_runner", None):
+                    phase = runner._current_phase
+                else:  # pragma: no cover  # in case of messing with internals
                     phase = "unknown"
                 tc = make_testcase(
                     start_timestamp=self._start_timestamp,
@@ -1084,7 +1086,7 @@ def run_engine(self):
             else:
                 database_key = None
 
-        runner = ConjectureRunner(
+        runner = self._runner = ConjectureRunner(
             self._execute_once_for_engine,
             settings=self.settings,
             random=self.random,
@@ -1510,8 +1512,7 @@ def wrapped_test(*arguments, **kwargs):
                 except UnsatisfiedAssumption:
                     raise DidNotReproduce(
                         "The test data failed to satisfy an assumption in the "
-                        "test. Have you added it since this blob was "
-                        "generated?"
+                        "test. Have you added it since this blob was generated?"
                     ) from None
 
             # There was no @reproduce_failure, so start by running any explicit

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -129,6 +129,7 @@ def __init__(
 
         # Global dict of per-phase statistics, and a list of per-call stats
         # which transfer to the global dict at the end of each phase.
+        self._current_phase = "(not a phase)"
         self.statistics = {}
         self.stats_per_test_case = []
 
@@ -175,6 +176,7 @@ def _log_phase_statistics(self, phase):
         self.stats_per_test_case.clear()
         start_time = time.perf_counter()
         try:
+            self._current_phase = phase
             yield
         finally:
             self.statistics[phase + "-phase"] = {
@@ -554,7 +556,7 @@ def reuse_existing_examples(self):
                 corpus.extend(extra)
 
             for existing in corpus:
-                data = self.cached_test_function(existing)
+                data = self.cached_test_function(existing, extend=BUFFER_SIZE)
                 if data.status != Status.INTERESTING:
                     self.settings.database.delete(self.database_key, existing)
                     self.settings.database.delete(self.secondary_key, existing)
@@ -569,7 +571,7 @@ def reuse_existing_examples(self):
                 pareto_corpus.sort(key=sort_key)
 
                 for existing in pareto_corpus:
-                    data = self.cached_test_function(existing)
+                    data = self.cached_test_function(existing, extend=BUFFER_SIZE)
                     if data not in self.pareto_front:
                         self.settings.database.delete(self.pareto_key, existing)
                     if data.status == Status.INTERESTING:
@@ -693,6 +695,7 @@ def generate_new_examples(self):
         ran_optimisations = False
 
         while self.should_generate_more():
+            self._current_phase = "generate"
             prefix = self.generate_novel_prefix()
             assert len(prefix) <= BUFFER_SIZE
             if (
@@ -763,6 +766,7 @@ def generate_new_examples(self):
                 and not ran_optimisations
             ):
                 ran_optimisations = True
+                self._current_phase = "target"
                 self.optimise_targets()
 
     def generate_mutations_from(self, data):
@@ -884,7 +888,8 @@ def optimise_targets(self):
             if any_improvements:
                 continue
 
-            self.pareto_optimise()
+            if self.best_observed_targets:
+                self.pareto_optimise()
 
             if prev_calls == self.call_count:
                 break
@@ -902,6 +907,7 @@ def _run(self):
             # but if we've been asked to run it but not generation then we have to
             # run it explciitly on its own here.
             if Phase.generate not in self.settings.phases:
+                self._current_phase = "target"
                 self.optimise_targets()
         with self._log_phase_statistics("shrink"):
             self.shrink_interesting_examples()
@@ -1011,6 +1017,7 @@ def new_shrinker(self, example, predicate=None, allow_transition=None):
             predicate,
             allow_transition=allow_transition,
             explain=Phase.explain in self.settings.phases,
+            in_target_phase=self._current_phase == "target",
         )
 
     def cached_test_function(self, buffer, *, error_on_discard=False, extend=0):

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/pareto.py b/hypothesis-python/src/hypothesis/internal/conjecture/pareto.py
@@ -317,7 +317,7 @@ def allow_transition(source, destination):
                     # If ``destination`` dominates ``source`` then ``source``
                     # must be dominated in the front - either ``destination`` is in
                     # the front, or it was not added to it because it was
-                    # dominated by something in it.,
+                    # dominated by something in it.
                     try:
                         self.front.front.remove(source)
                     except ValueError:

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py
@@ -271,6 +271,7 @@ def __init__(
         *,
         allow_transition: bool,
         explain: bool,
+        in_target_phase: bool = False,
     ):
         """Create a shrinker for a particular engine, with a given starting
         point and predicate. When shrink() is called it will attempt to find an
@@ -309,6 +310,14 @@ def __init__(
         # testing and learning purposes.
         self.extra_dfas: Dict[str, ConcreteDFA] = {}
 
+        # Because the shrinker is also used to `pareto_optimise` in the target phase,
+        # we sometimes want to allow extending buffers instead of aborting at the end.
+        if in_target_phase:
+            from hypothesis.internal.conjecture.engine import BUFFER_SIZE
+
+            self.__extend = BUFFER_SIZE
+        else:
+            self.__extend = 0
         self.should_explain = explain
 
     @derived_value  # type: ignore
@@ -417,7 +426,7 @@ def cached_test_function(self, buffer):
         with status >= INVALID that would result from running this buffer."""
 
         buffer = bytes(buffer)
-        result = self.engine.cached_test_function(buffer)
+        result = self.engine.cached_test_function(buffer, extend=self.__extend)
         self.incorporate_test_data(result)
         if self.calls - self.calls_at_last_shrink >= self.max_stall:
             raise StopShrinking