Do not reuse offload() for wrapping generic executor

crusaderky · crusaderky · commit ad45e4a8e26c · 2023-03-13T18:21:45.000Z
diff --git a/distributed/tests/test_utils.py b/distributed/tests/test_utils.py
@@ -49,6 +49,7 @@
     parse_ports,
     read_block,
     recursive_to_dict,
+    run_in_executor_with_context,
     seek_delimiter,
     set_thread_state,
     sync,
@@ -663,6 +664,37 @@ def test_parse_ports():
         parse_ports("100.5")
 
 
+@gen_test()
+async def test_run_in_executor_with_context():
+    class MyExecutor(Executor):
+        call_count = 0
+
+        def submit(self, __fn, *args, **kwargs):
+            self.call_count += 1
+            f = Future()
+            f.set_result(__fn(*args, **kwargs))
+            return f
+
+    ex = MyExecutor()
+    out = await run_in_executor_with_context(ex, inc, 1)
+    assert out == 2
+    assert ex.call_count == 1
+
+
+@gen_test()
+async def test_run_in_executor_with_context_preserves_contextvars():
+    var = contextvars.ContextVar("var")
+
+    with ThreadPoolExecutor(2) as ex:
+
+        async def set_var(v: str) -> None:
+            var.set(v)
+            r = await run_in_executor_with_context(ex, var.get)
+            assert r == v
+
+        await asyncio.gather(set_var("foo"), set_var("bar"))
+
+
 @gen_test()
 async def test_offload():
     assert (await offload(inc, 1)) == 2
@@ -681,23 +713,6 @@ async def set_var(v: str) -> None:
     await asyncio.gather(set_var("foo"), set_var("bar"))
 
 
-@gen_test()
-async def test_offload_custom_executor():
-    class MyExecutor(Executor):
-        call_count = 0
-
-        def submit(self, __fn, *args, **kwargs):
-            self.call_count += 1
-            f = Future()
-            f.set_result(__fn(*args, **kwargs))
-            return f
-
-    ex = MyExecutor()
-    out = await offload(inc, 1, executor=ex)
-    assert out == 2
-    assert ex.call_count == 1
-
-
 def test_serialize_for_cli_deprecated():
     with pytest.warns(FutureWarning, match="serialize_for_cli is deprecated"):
         from distributed.utils import serialize_for_cli
diff --git a/distributed/utils.py b/distributed/utils.py
@@ -1421,32 +1421,49 @@ def import_term(name: str) -> AnyType:
     return getattr(module, attr_name)
 
 
-async def offload(  # type: ignore[valid-type]
-    fn: Callable[P, T],
+async def run_in_executor_with_context(
+    executor: Executor | None,
+    func: Callable[P, T],
+    /,
     *args: P.args,
-    executor: Executor | None = None,
     **kwargs: P.kwargs,
 ) -> T:
     """Variant of :meth:`~asyncio.AbstractEventLoop.run_in_executor`, which
     propagates contextvars.
-    By default, it offloads to an ad-hoc thread pool with a single worker.
+    Note that this limits the type of Executor to those that do not pickle objects.
 
     See also
     --------
+    asyncio.AbstractEventLoop.run_in_executor
+    offload
     https://bugs.python.org/issue34014
     """
-    if executor is None:
-        # Not the same as defaulting to _offload_executor in the parameters, as this
-        # allows monkey-patching the _offload_executor during unit tests
-        executor = _offload_executor
-
     loop = asyncio.get_running_loop()
     context = contextvars.copy_context()
     return await loop.run_in_executor(
-        executor, lambda: context.run(fn, *args, **kwargs)
+        executor, lambda: context.run(func, *args, **kwargs)
     )
 
 
+def offload(
+    func: Callable[P, T],
+    /,
+    *args: P.args,
+    **kwargs: P.kwargs,
+) -> Awaitable[T]:
+    """Run a synchronous function in a separate thread.
+    Unlike :meth:`asyncio.to_thread`, this propagates contextvars and offloads to an
+    ad-hoc thread pool with a single worker.
+
+    See also
+    --------
+    asyncio.to_thread
+    run_in_executor_with_context
+    https://bugs.python.org/issue34014
+    """
+    return run_in_executor_with_context(_offload_executor, func, *args, **kwargs)
+
+
 class EmptyContext:
     def __enter__(self):
         pass
diff --git a/distributed/worker.py b/distributed/worker.py
@@ -105,6 +105,7 @@
     offload,
     parse_ports,
     recursive_to_dict,
+    run_in_executor_with_context,
     silence_logging,
     thread_state,
     wait_for,
@@ -2275,7 +2276,8 @@ async def execute(self, key: str, *, stimulus_id: str) -> StateMachineEvent:
                         self.scheduler_delay,
                     )
                 elif "ThreadPoolExecutor" in str(type(e)):
-                    result = await offload(
+                    result = await run_in_executor_with_context(
+                        e,
                         apply_function,
                         function,
                         args2,
@@ -2285,7 +2287,6 @@ async def execute(self, key: str, *, stimulus_id: str) -> StateMachineEvent:
                         self.active_threads,
                         self.active_threads_lock,
                         self.scheduler_delay,
-                        executor=e,
                     )
                 else:
                     # Can't capture contextvars across processes
diff --git a/distributed/worker_memory.py b/distributed/worker_memory.py
@@ -260,15 +260,19 @@ def metrics_callback(label: Hashable, value: float, unit: str) -> None:
                 label = (label,)
             worker.digest_metric(("memory-monitor", *label, unit), value)
 
-        # Work around bug with Tornado PeriodicCallback, which does not properly
-        # insulate contextvars
+        # Work around bug with Tornado 6.2 PeriodicCallback, which does not properly
+        # insulate contextvars. Without this hack, you would see metrics that are
+        # clearly emitted by Worker.execute labelled with 'memory-monitor'.So we're
+        # wrapping our change in contextvars (inside add_callback) inside create_task(),
+        # which copies and insulates the context.
         async def _() -> None:
             with context_meter.add_callback(metrics_callback):
                 # Measure delta between the measures from the SpillBuffer and the total
                 # end-to-end duration of _spill
                 await self._spill(worker, memory)
 
         await asyncio.create_task(_(), name="memory-monitor-spill")
+        # End work around
 
     async def _spill(self, worker: Worker, memory: int) -> None:
         """Evict keys until the process memory goes below the ``target`` threshold"""
diff --git a/distributed/worker_state_machine.py b/distributed/worker_state_machine.py
@@ -3123,9 +3123,9 @@ def _execute_done_common(
     def _handle_execute_success(self, ev: ExecuteSuccessEvent) -> RecsInstrs:
         """Task completed successfully"""
         ts, recs, instr = self._execute_done_common(ev)
-        # This is used for scheduler-side heuristics such as work stealing; it's
-        # important that it does not contain overhead from the thread pool or the
-        # worker's event loop (which are not the task's fault and are unpredictable).
+        # This is used for scheduler-side occupancy heuristics; it's important that it
+        # does not contain overhead from the thread pool or the worker's event loop
+        # (which are not the task's fault and are unpredictable).
         ts.startstops.append({"action": "compute", "start": ev.start, "stop": ev.stop})
         ts.nbytes = ev.nbytes
         ts.type = ev.type