From 3b57a49215ad9df1717007aeddd57e67c5a381c1 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Wed, 23 Mar 2022 11:14:15 +0100 Subject: [PATCH] avoid using `@sync_add` on remotecalls (JuliaLang/julia#44671) * avoid using `@sync_add` on remotecalls It seems like @sync_add adds the Futures to a queue (Channel) for @sync, which in turn calls wait() for all the futures synchronously. Not only that is slightly detrimental for network operations (latencies add up), but in case of Distributed the call to wait() may actually cause some compilation on remote processes, which is also wait()ed for. In result, some operations took a great amount of "serial" processing time if executed on many workers at once. For me, this closes JuliaLang/julia#44645. The major change can be illustrated as follows: First add some workers: ``` using Distributed addprocs(10) ``` and then trigger something that, for example, causes package imports on the workers: ``` using SomeTinyPackage ``` In my case (importing UnicodePlots on 10 workers), this improves the loading time over 10 workers from ~11s to ~5.5s. This is a far bigger issue when worker count gets high. The time of the processing on each worker is usually around 0.3s, so triggering this problem even on a relatively small cluster (64 workers) causes a really annoying delay, and running `@everywhere` for the first time on reasonable clusters (I tested with 1024 workers, see JuliaLang/julia#44645) usually takes more than 5 minutes. Which sucks. Anyway, on 64 workers this reduces the "first import" time from ~30s to ~6s, and on 1024 workers this seems to reduce the time from over 5 minutes (I didn't bother to measure that precisely now, sorry) to ~11s. Related issues: - Probably fixes JuliaLang/julia#39291. - JuliaLang/julia#42156 is a kinda complementary -- it removes the most painful source of slowness (the 0.3s precompilation on the workers), but the fact that the wait()ing is serial remains a problem if the network latencies are high. May help with JuliaLang/julia#38931 Co-authored-by: Valentin Churavy --- src/Distributed.jl | 4 ++-- src/clusterserialize.jl | 2 +- src/macros.jl | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Distributed.jl b/src/Distributed.jl index d428a6df0e6837..3bcbc7b67f60d7 100644 --- a/src/Distributed.jl +++ b/src/Distributed.jl @@ -10,7 +10,7 @@ import Base: getindex, wait, put!, take!, fetch, isready, push!, length, hash, ==, kill, close, isopen, showerror # imports for use -using Base: Process, Semaphore, JLOptions, buffer_writes, @sync_add, +using Base: Process, Semaphore, JLOptions, buffer_writes, @async_unwrap, VERSION_STRING, binding_module, atexit, julia_exename, julia_cmd, AsyncGenerator, acquire, release, invokelatest, shell_escape_posixly, shell_escape_csh, @@ -76,7 +76,7 @@ function _require_callback(mod::Base.PkgId) # broadcast top-level (e.g. from Main) import/using from node 1 (only) @sync for p in procs() p == 1 && continue - @sync_add remotecall(p) do + @async_unwrap remotecall_wait(p) do Base.require(mod) nothing end diff --git a/src/clusterserialize.jl b/src/clusterserialize.jl index e37987c5bf8751..28025ae867c788 100644 --- a/src/clusterserialize.jl +++ b/src/clusterserialize.jl @@ -243,7 +243,7 @@ An exception is raised if a global constant is requested to be cleared. """ function clear!(syms, pids=workers(); mod=Main) @sync for p in pids - @sync_add remotecall(clear_impl!, p, syms, mod) + @async_unwrap remotecall_wait(clear_impl!, p, syms, mod) end end clear!(sym::Symbol, pid::Int; mod=Main) = clear!([sym], [pid]; mod=mod) diff --git a/src/macros.jl b/src/macros.jl index 0a62fdd5439f0a..a767c7a40d9c9f 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -222,10 +222,10 @@ function remotecall_eval(m::Module, procs, ex) if pid == myid() run_locally += 1 else - @sync_add remotecall(Core.eval, pid, m, ex) + @async_unwrap remotecall_wait(Core.eval, pid, m, ex) end end - yield() # ensure that the remotecall_fetch have had a chance to start + yield() # ensure that the remotecalls have had a chance to start # execute locally last as we do not want local execution to block serialization # of the request to remote nodes.