Closed
Description
IIUC RelationaAI wanted to parallelize their testsuite. (cc: @Sacha0), and while we were looking into making Distributed.jl work with Julia processes that are launched with multi-threading enabled he saw
a failure such like:
ERROR: On worker 4:
LoadError: ConcurrencyViolationError("lock must be held")
Stacktrace:
[1] concurrency_violation
@ ./condition.jl:8
[2] assert_havelock
@ ./condition.jl:25 [inlined]
[3] assert_havelock
@ ./condition.jl:48 [inlined]
[4] assert_havelock
@ ./condition.jl:72 [inlined]
[5] notify
@ ./condition.jl:132
[6] #notify#548
@ ./condition.jl:130 [inlined]
[7] _require
@ ./loading.jl:1159
[8] require
@ ./loading.jl:1013
[9] require
@ ./loading.jl:997
[10] top-level scope
@ /build/source/usr/share/julia/stdlib/v1.7/Distributed/src/macros.jl:200
[11] include
@ ./client.jl:451
[12] top-level scope
@ none:1
[13] eval
@ ./boot.jl:373
[14] #115
@ /build/source/usr/share/julia/stdlib/v1.7/Distributed/src/process_messages.jl:274
[15] run_work_thunk
@ /build/source/usr/share/julia/stdlib/v1.7/Distributed/src/process_messages.jl:63
[16] run_work_thunk
@ /build/source/usr/share/julia/stdlib/v1.7/Distributed/src/process_messages.jl:72
[17] #108
@ ./threadingconstructs.jl:178
in expression starting at /tmp/nix-build-delve-build.drv-0/delve-source/test-
I wrote a small reproducer independent of Distributed.jl (the barrier nonsense just increases the likely hood of the failure.)
const ch = Channel{Bool}(Threads.nthreads())
const barrier = Base.Event()
@sync begin
for _ in 2:Threads.nthreads()
Threads.@spawn begin
@eval begin
put!(ch, true)
wait(barrier)
using LLVM
@info "LLVM loaded" Threads.threadid()
end
end
end
for _ in 2:Threads.nthreads()
take!(ch)
end
notify(barrier)
end
➜ rm ~/.julia/compiled/v1.6/LLVM/*
➜ test julia -t auto test.jl
Threads.threadid() = 3
Threads.threadid() = 2
Threads.threadid() = 4
┌ Info: LLVM loaded
└ Threads.threadid() = 3
ERROR: LoadError: TaskFailedException
nested task error: concurrency violation detected
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:33
[2] concurrency_violation()
@ Base ./condition.jl:8
[3] assert_havelock
@ ./condition.jl:25 [inlined]
[4] assert_havelock
@ ./condition.jl:48 [inlined]
[5] assert_havelock
@ ./condition.jl:72 [inlined]
[6] wait(c::Condition)
@ Base ./condition.jl:102
[7] _require(pkg::Base.PkgId)
@ Base ./loading.jl:978
[8] require(uuidkey::Base.PkgId)
@ Base ./loading.jl:914
[9] require(into::Module, mod::Symbol)
@ Base ./loading.jl:901
[10] top-level scope
@ ~/src/snippets/test/test.jl:13
[11] eval
@ ./boot.jl:360 [inlined]
[12] macro expansion
@ ~/src/snippets/test/test.jl:8 [inlined]
[13] (::var"#1#2")()
@ Main ./threadingconstructs.jl:169
...and 1 more exception.
Stacktrace:
[1] sync_end(c::Channel{Any})
@ Base ./task.jl:369
[2] top-level scope
@ task.jl:388
in expression starting at /home/vchuravy/src/snippets/test/test.jl:5
I sometimes (after making sure that LLVM.jl is precompiled) also get:
➜ test julia -t auto test.jl
signal (6): Aborted
in expression starting at /home/vchuravy/src/snippets/test/test.jl:5
gsignal at /usr/bin/../lib/libc.so.6 (unknown line)
abort at /usr/bin/../lib/libc.so.6 (unknown line)
jl_recache_other_ at /buildworker/worker/package_linux64/build/src/dump.c:2458 [inlined]
jl_recache_other at /buildworker/worker/package_linux64/build/src/dump.c:2472 [inlined]
_jl_restore_incremental at /buildworker/worker/package_linux64/build/src/dump.c:2553
jl_restore_incremental at /buildworker/worker/package_linux64/build/src/dump.c:2605
signal (11): Segmentation fault
in expression starting at /home/vchuravy/src/snippets/test/test.jl:5
jl_recache_types at /buildworker/worker/package_linux64/build/src/dump.c:2346 [inlined]
_jl_restore_incremental at /buildworker/worker/package_linux64/build/src/dump.c:2550
jl_restore_incremental at /buildworker/worker/package_linux64/build/src/dump.c:2605
_include_from_serialized at ./loading.jl:658
_include_from_serialized at ./loading.jl:658
_require_search_from_serialized at ./loading.jl:760
_require_search_from_serialized at ./loading.jl:760
_require at ./loading.jl:998
_tryrequire_from_serialized at ./loading.jl:689
require at ./loading.jl:914
_require_search_from_serialized at ./loading.jl:749
require at ./loading.jl:901
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2237 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2419
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1703 [inlined]
call_require at /buildworker/worker/package_linux64/build/src/toplevel.c:421 [inlined]
eval_import_path at /buildworker/worker/package_linux64/build/src/toplevel.c:458
jl_toplevel_eval_flex at /buildworker/worker/package_linux64/build/src/toplevel.c:684
eval_body at /buildworker/worker/package_linux64/build/src/interpreter.c:529
_require at ./loading.jl:998
jl_interpret_toplevel_thunk at /buildworker/worker/package_linux64/build/src/interpreter.c:670
jl_toplevel_eval_flex at /buildworker/worker/package_linux64/build/src/toplevel.c:877
jl_toplevel_eval_in at /buildworker/worker/package_linux64/build/src/toplevel.c:929
eval at ./boot.jl:360 [inlined]
macro expansion at /home/vchuravy/src/snippets/test/test.jl:8 [inlined]
#1 at ./threadingconstructs.jl:169
unknown function (ip: 0x7f4fa9f763fc)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2237 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2419
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1703 [inlined]
start_task at /buildworker/worker/package_linux64/build/src/task.c:839
unknown function (ip: (nil))
Allocations: 2650 (Pool: 2639; Big: 11); GC: 0