Open
Description
I tried to create a MWE that was closer to the actual workflow I'm working with. I'm guessing the errors occurring here are related to #437 (one of the four reported errors below is the same as the linked issue). I hope this is helpful and not just extra noise!
Contents of mwe.jl
:
using Distributed
nworkers = 1
addprocs(nworkers - nprocs() + 1)
@everywhere using DTables, DataFrames, CSV
@everywhere job_channel = Channel(100)
remotecall(2) do
while true
job = take!(job_channel)
try
func = job[1]
args = job[2:end]
func(args...)
catch ex
@info "error $ex"
@info "stacktrace: $(stacktrace(catch_backtrace()))"
end
end
end
remotecall_fetch(2) do
dt = DTable(x -> CSV.File(x), ["file.csv"]; tabletype = DataFrame)
df = fetch(dt)
cols1 = [df[!, c] for c in 1:48]
cols2 = [df[!, c] for c in 49:102]
cols = (cols1, cols2)
cols_appended = (cols1, (cols2..., rand(length(cols2[1]))))
df = DataFrame(
(names(df)[1:48] .=> cols_appended[1])...,
((names(df)[49:102]..., "appended") .=> cols_appended[2])...;
copycols = false,
)
dt = DTable(df)
@info "$(length(dt))"
@info "$(length(dt))"
df = fetch(dt)
cols1 = [df[!, c] for c in 1:48]
cols2 = [df[!, c] for c in 49:102]
cols = (cols1, cols2)
df = fetch(dt)
foreach((:new1, :new2), (rand(length(dt)), rand(length(dt)))) do name, val
setproperty!(df, name, val)
end
dt = DTable(df)
i = [6, 12, 48, 93, 94]
dt = select(dt, i...; copycols = false)
gdt = groupby(dt, Symbol.(names(df)[[6, 12, 48]]))
gkeys = sort!(collect(keys(gdt)))
sums = map(gkeys) do key
reduce(+, gdt[key]; cols = Symbol.(names(df)[[93, 94]]))
end .|> fetch
end
I include
d mwe.jl
in a fresh Julia session multiple times (meaning each include
occurred in its own fresh Julia session) and recorded the following errors. Note that nothing changed in mwe.jl
from run to run.
Error 1:
julia> include("mwe.jl")
From worker 2: [ Info: 233930
From worker 2: [ Info: 233930
ERROR: LoadError: On worker 2:
MethodError: Cannot `convert` an object of type
Vector{Any} to an object of type
Union{Dagger.Thunk, Dagger.Chunk}
Closest candidates are:
convert(::Type{T}, ::T) where T
@ Base Base.jl:64
Stacktrace:
[1] get!
@ ./dict.jl:455
[2] reschedule_syncdeps!
@ ~/.julia/packages/Dagger/ZOt9H/src/sch/util.jl:116
[3] reschedule_syncdeps!
@ ~/.julia/packages/Dagger/ZOt9H/src/sch/util.jl:100 [inlined]
[4] #eager_submit_internal!#96
@ ~/.julia/packages/Dagger/ZOt9H/src/submission.jl:93
[5] eager_submit_internal!
@ ~/.julia/packages/Dagger/ZOt9H/src/submission.jl:11 [inlined]
[6] eager_submit_internal!
@ ~/.julia/packages/Dagger/ZOt9H/src/submission.jl:9
[7] #invokelatest#2
@ ./essentials.jl:819
[8] invokelatest
@ ./essentials.jl:816
[9] #110
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:285
[10] run_work_thunk
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:70
[11] macro expansion
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:285 [inlined]
[12] #109
@ ./task.jl:514
Stacktrace:
[1] #remotecall_fetch#159
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:465
[2] remotecall_fetch
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:454
[3] #remotecall_fetch#162
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:492 [inlined]
[4] remotecall_fetch
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:492
[5] eager_submit!
@ ~/.julia/packages/Dagger/ZOt9H/src/submission.jl:128
[6] eager_launch!
@ ~/.julia/packages/Dagger/ZOt9H/src/submission.jl:195
[7] enqueue!
@ ~/.julia/packages/Dagger/ZOt9H/src/queue.jl:12 [inlined]
[8] #spawn#88
@ ~/.julia/packages/Dagger/ZOt9H/src/thunk.jl:322
[9] spawn
@ ~/.julia/packages/Dagger/ZOt9H/src/thunk.jl:286 [inlined]
[10] #39
@ ~/.julia/packages/DTables/bA4g3/src/operations/operations.jl:35 [inlined]
[11] iterate
@ ./generator.jl:47 [inlined]
[12] _collect
@ ./array.jl:802
[13] collect_similar
@ ./array.jl:711
[14] map
@ ./abstractarray.jl:3263
[15] map
@ ~/.julia/packages/DTables/bA4g3/src/operations/operations.jl:35
[16] _manipulate
@ ~/.julia/packages/DTables/bA4g3/src/operations/dataframes_interface.jl:89
[17] #manipulate#247
@ ~/.julia/packages/DTables/bA4g3/src/operations/dataframes_interface.jl:48
[18] #select#258
@ ~/.julia/packages/DTables/bA4g3/src/operations/dataframes_interface.jl:171
[19] #7
@ ~/tmp/mwe.jl:47
[20] #invokelatest#2
@ ./essentials.jl:819 [inlined]
[21] invokelatest
@ ./essentials.jl:816
[22] #110
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:285
[23] run_work_thunk
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:70
[24] macro expansion
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:285 [inlined]
[25] #109
@ ./task.jl:514
Stacktrace:
[1] remotecall_fetch(::Function, ::Distributed.Worker; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Distributed ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:465
[2] remotecall_fetch(::Function, ::Distributed.Worker)
@ Distributed ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:454
[3] #remotecall_fetch#162
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:492 [inlined]
[4] remotecall_fetch(::Function, ::Int64)
@ Distributed ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:492
[5] top-level scope
@ ~/tmp/mwe.jl:22
[6] include(fname::String)
@ Base.MainInclude ./client.jl:478
[7] top-level scope
@ REPL[1]:1
in expression starting at /home/steven/tmp/mwe.jl:22
Error 2:
julia> include("mwe.jl")
From worker 2: [ Info: 233930
From worker 2: [ Info: 233930
ERROR: LoadError: On worker 2:
UndefRefError: access to undefined reference
Stacktrace:
[1] getindex
@ ./essentials.jl:13 [inlined]
[2] get!
@ ./dict.jl:465
[3] reschedule_syncdeps!
@ ~/.julia/packages/Dagger/ZOt9H/src/sch/util.jl:116
[4] reschedule_syncdeps!
@ ~/.julia/packages/Dagger/ZOt9H/src/sch/util.jl:100 [inlined]
[5] #eager_submit_internal!#96
@ ~/.julia/packages/Dagger/ZOt9H/src/submission.jl:93
[6] eager_submit_internal!
@ ~/.julia/packages/Dagger/ZOt9H/src/submission.jl:11 [inlined]
[7] eager_submit_internal!
@ ~/.julia/packages/Dagger/ZOt9H/src/submission.jl:9
[8] #invokelatest#2
@ ./essentials.jl:819
[9] invokelatest
@ ./essentials.jl:816
[10] #110
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:285
[11] run_work_thunk
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:70
[12] macro expansion
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:285 [inlined]
[13] #109
@ ./task.jl:514
Stacktrace:
[1] #remotecall_fetch#159
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:465
[2] remotecall_fetch
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:454
[3] #remotecall_fetch#162
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:492 [inlined]
[4] remotecall_fetch
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:492
[5] eager_submit!
@ ~/.julia/packages/Dagger/ZOt9H/src/submission.jl:128
[6] eager_launch!
@ ~/.julia/packages/Dagger/ZOt9H/src/submission.jl:195
[7] enqueue!
@ ~/.julia/packages/Dagger/ZOt9H/src/queue.jl:12 [inlined]
[8] #spawn#88
@ ~/.julia/packages/Dagger/ZOt9H/src/thunk.jl:322
[9] spawn
@ ~/.julia/packages/Dagger/ZOt9H/src/thunk.jl:286 [inlined]
[10] #15
@ ~/.julia/packages/Dagger/ZOt9H/src/thunk.jl:401 [inlined]
[11] iterate
@ ./generator.jl:47 [inlined]
[12] collect
@ ./array.jl:782
[13] chunk_lengths
@ ~/.julia/packages/DTables/bA4g3/src/table/dtable.jl:254
[14] length
@ ~/.julia/packages/DTables/bA4g3/src/table/dtable.jl:258
[15] #7
@ ~/tmp/mwe.jl:42
[16] #invokelatest#2
@ ./essentials.jl:819 [inlined]
[17] invokelatest
@ ./essentials.jl:816
[18] #110
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:285
[19] run_work_thunk
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:70
[20] macro expansion
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:285 [inlined]
[21] #109
@ ./task.jl:514
Stacktrace:
[1] remotecall_fetch(::Function, ::Distributed.Worker; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Distributed ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:465
[2] remotecall_fetch(::Function, ::Distributed.Worker)
@ Distributed ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:454
[3] #remotecall_fetch#162
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:492 [inlined]
[4] remotecall_fetch(::Function, ::Int64)
@ Distributed ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:492
[5] top-level scope
@ ~/tmp/mwe.jl:22
[6] include(fname::String)
@ Base.MainInclude ./client.jl:478
[7] top-level scope
@ REPL[1]:1
in expression starting at /home/steven/tmp/mwe.jl:22
Error 3:
julia> include("mwe.jl")
From worker 2: [ Info: 233930
From worker 2: [ Info: 233930
[7463] signal (11.1): Segmentation fault
in expression starting at /home/steven/tmp/mwe.jl:22
jl_object_id__cold at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/builtins.c:417
type_hash at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/jltypes.c:1332
typekey_hash at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/jltypes.c:1344
jl_precompute_memoized_dt at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/jltypes.c:1409
inst_datatype_inner at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/jltypes.c:1731
jl_inst_arg_tuple_type at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/jltypes.c:1826
arg_type_tuple at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2100 [inlined]
jl_lookup_generic_ at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2884 [inlined]
ijl_apply_generic at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2936
collect_task_inputs at /home/steven/.julia/packages/Dagger/ZOt9H/src/sch/util.jl:392
signature at /home/steven/.julia/packages/Dagger/ZOt9H/src/sch/util.jl:256
#99 at /home/steven/.julia/packages/Dagger/ZOt9H/src/sch/Sch.jl:680
lock at ./lock.jl:229
schedule! at /home/steven/.julia/packages/Dagger/ZOt9H/src/sch/Sch.jl:642 [inlined]
schedule! at /home/steven/.julia/packages/Dagger/ZOt9H/src/sch/Sch.jl:642 [inlined]
scheduler_run at /home/steven/.julia/packages/Dagger/ZOt9H/src/sch/Sch.jl:508
#compute_dag#82 at /home/steven/.julia/packages/Dagger/ZOt9H/src/sch/Sch.jl:449
compute_dag at /home/steven/.julia/packages/Dagger/ZOt9H/src/sch/Sch.jl:414 [inlined]
#compute#141 at /home/steven/.julia/packages/Dagger/ZOt9H/src/compute.jl:23
compute at /home/steven/.julia/packages/Dagger/ZOt9H/src/compute.jl:22 [inlined]
macro expansion at /home/steven/.julia/packages/Dagger/ZOt9H/src/sch/eager.jl:28 [inlined]
#50 at ./threadingconstructs.jl:410
unknown function (ip: 0x7efbf8213f8f)
_jl_invoke at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2758 [inlined]
ijl_apply_generic at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/gf.c:2940
jl_apply at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/julia.h:1880 [inlined]
start_task at /cache/build/default-amdci5-5/julialang/julia-release-1-dot-9/src/task.c:1092
Allocations: 34071935 (Pool: 34043867; Big: 28068); GC: 39
zsh: segmentation fault julia --project
Error 3b: Occasionally the segfault was preceded by one or more occurrences of:
Unhandled Task ERROR: ArgumentError: destination has fewer elements than required
Stacktrace:
[1] copyto!(dest::Vector{Dagger.Sch.ProcessorState}, src::Base.ValueIterator{Dict{Dagger.Processor, Dagger.Sch.ProcessorState}})
@ Base ./abstractarray.jl:949
[2] _collect
@ ./array.jl:713 [inlined]
[3] collect
@ ./array.jl:707 [inlined]
[4] macro expansion
@ ~/.julia/packages/Dagger/ZOt9H/src/sch/Sch.jl:1189 [inlined]
[5] (::Dagger.Sch.var"#126#133"{Dagger.Sch.ProcessorInternalState, UInt64, RemoteChannel{Channel{Any}}, Dagger.ThreadProc})()
@ Dagger.Sch ./task.jl:134
Error 4:
julia> include("mwe.jl")
From worker 2: [ Info: 233930
From worker 2: [ Info: 233930
ERROR: LoadError: On worker 2:
AssertionError: Multiple concurrent writes to Dict detected!
Stacktrace:
[1] rehash!
@ ./dict.jl:208
[2] _setindex!
@ ./dict.jl:355 [inlined]
[3] get!
@ ./dict.jl:477
[4] reschedule_syncdeps!
@ ~/.julia/packages/Dagger/ZOt9H/src/sch/util.jl:116
[5] reschedule_syncdeps!
@ ~/.julia/packages/Dagger/ZOt9H/src/sch/util.jl:100 [inlined]
[6] #eager_submit_internal!#96
@ ~/.julia/packages/Dagger/ZOt9H/src/submission.jl:93
[7] eager_submit_internal!
@ ~/.julia/packages/Dagger/ZOt9H/src/submission.jl:11 [inlined]
[8] eager_submit_internal!
@ ~/.julia/packages/Dagger/ZOt9H/src/submission.jl:9
[9] #invokelatest#2
@ ./essentials.jl:819
[10] invokelatest
@ ./essentials.jl:816
[11] #110
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:285
[12] run_work_thunk
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:70
[13] macro expansion
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:285 [inlined]
[14] #109
@ ./task.jl:514
Stacktrace:
[1] #remotecall_fetch#159
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:465
[2] remotecall_fetch
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:454
[3] #remotecall_fetch#162
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:492 [inlined]
[4] remotecall_fetch
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:492
[5] eager_submit!
@ ~/.julia/packages/Dagger/ZOt9H/src/submission.jl:128
[6] eager_launch!
@ ~/.julia/packages/Dagger/ZOt9H/src/submission.jl:195
[7] enqueue!
@ ~/.julia/packages/Dagger/ZOt9H/src/queue.jl:12 [inlined]
[8] #spawn#88
@ ~/.julia/packages/Dagger/ZOt9H/src/thunk.jl:322
[9] spawn
@ ~/.julia/packages/Dagger/ZOt9H/src/thunk.jl:286 [inlined]
[10] #48
@ ~/.julia/packages/Dagger/ZOt9H/src/thunk.jl:401 [inlined]
[11] iterate
@ ./generator.jl:47 [inlined]
[12] collect_to!
@ ./array.jl:840 [inlined]
[13] collect_to_with_first!
@ ./array.jl:818 [inlined]
[14] collect
@ ./array.jl:792
[15] #reduce#42
@ ~/.julia/packages/DTables/bA4g3/src/operations/operations.jl:111
[16] #14
@ ~/tmp/mwe.jl:51
[17] iterate
@ ./generator.jl:47 [inlined]
[18] collect_to!
@ ./array.jl:840 [inlined]
[19] collect_to_with_first!
@ ./array.jl:818 [inlined]
[20] _collect
@ ./array.jl:812
[21] collect_similar
@ ./array.jl:711
[22] map
@ ./abstractarray.jl:3263
[23] #7
@ ~/tmp/mwe.jl:50
[24] #invokelatest#2
@ ./essentials.jl:819 [inlined]
[25] invokelatest
@ ./essentials.jl:816
[26] #110
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:285
[27] run_work_thunk
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:70
[28] macro expansion
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:285 [inlined]
[29] #109
@ ./task.jl:514
Stacktrace:
[1] remotecall_fetch(::Function, ::Distributed.Worker; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Distributed ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:465
[2] remotecall_fetch(::Function, ::Distributed.Worker)
@ Distributed ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:454
[3] #remotecall_fetch#162
@ ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:492 [inlined]
[4] remotecall_fetch(::Function, ::Int64)
@ Distributed ~/programs/julia/julia-1.9.3/share/julia/stdlib/v1.9/Distributed/src/remotecall.jl:492
[5] top-level scope
@ ~/tmp/mwe.jl:22
[6] include(fname::String)
@ Base.MainInclude ./client.jl:478
[7] top-level scope
@ REPL[1]:1
in expression starting at /home/steven/tmp/mwe.jl:22
Comments:
- The segfault was by far the most common error; the others occurred just once each (over the 10--15 trials I ran).
- In my actual work, I don't think I've ever come across the
MethodError
withconvert
(error 1). I most commonly run into the error mentioned in"Multiple concurrent writes to Dict detected!"
withDTables.reduce
#437 (comment), which I did not see withmwe.jl
. "file.csv"
is a 157 MB table with 233930 rows and 102 columns ofString
andFloat64
values.- The
remotecall
probably isn't necessary for reproducing the bugs, but I included it because that is how my actual work is.