Skip to content

Commit 2e9264b

Browse files
committed
Fix issue 31252 - remote globals not being updated in some cases
1 parent b076d04 commit 2e9264b

File tree

2 files changed

+36
-42
lines changed

2 files changed

+36
-42
lines changed

stdlib/Distributed/src/clusterserialize.jl

Lines changed: 13 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ mutable struct ClusterSerializer{I<:IO} <: AbstractSerializer
1515

1616
pid::Int # Worker we are connected to.
1717
tn_obj_sent::Set{UInt64} # TypeName objects sent
18-
glbs_sent::Dict{UInt64, UInt64} # (key,value) -> (objectid, hash_value)
18+
glbs_sent::Dict{Symbol, Tuple{UInt64, UInt64}} # (key,value) -> (symbol, (hash_value, objectid))
1919
glbs_in_tnobj::Dict{UInt64, Vector{Symbol}} # Track globals referenced in
2020
# anonymous functions.
2121
anonfunc_id::UInt64
@@ -116,10 +116,11 @@ function serialize(s::ClusterSerializer, g::GlobalRef)
116116
invoke(serialize, Tuple{AbstractSerializer, GlobalRef}, s, g)
117117
end
118118

119-
# Send/resend a global object if
120-
# a) has not been sent previously, i.e., we are seeing this objectid for the first time, or,
119+
# Send/resend a global binding if
120+
# a) has not been sent previously, i.e., we are seeing this binding for the first time, or,
121121
# b) hash value has changed or
122-
# c) is a bits type
122+
# c) hash value is same but of a different object, i.e. objectid has changed or
123+
# d) is a bits type
123124
function syms_2b_sent(s::ClusterSerializer, identifier)
124125
lst = Symbol[]
125126
check_syms = get(s.glbs_in_tnobj, identifier, [])
@@ -129,10 +130,12 @@ function syms_2b_sent(s::ClusterSerializer, identifier)
129130
if isbits(v)
130131
push!(lst, sym)
131132
else
132-
oid = objectid(v)
133-
if haskey(s.glbs_sent, oid)
134-
# We have sent this object before, see if it has changed.
135-
s.glbs_sent[oid] != hash(sym, hash(v)) && push!(lst, sym)
133+
if haskey(s.glbs_sent, sym)
134+
# We have sent this binding before, see if it has changed.
135+
hval, oid = s.glbs_sent[sym]
136+
if hval != hash(sym, hash(v)) || oid != objectid(v)
137+
push!(lst, sym)
138+
end
136139
else
137140
push!(lst, sym)
138141
end
@@ -144,26 +147,9 @@ end
144147
function serialize_global_from_main(s::ClusterSerializer, sym)
145148
v = getfield(Main, sym)
146149

147-
oid = objectid(v)
148-
record_v = true
149-
if isbits(v)
150-
record_v = false
151-
elseif !haskey(s.glbs_sent, oid)
152-
# set up a finalizer the first time this object is sent
153-
try
154-
finalizer(v) do x
155-
delete_global_tracker(s,x)
156-
end
157-
catch ex
158-
# Do not track objects that cannot be finalized.
159-
if isa(ex, ErrorException)
160-
record_v = false
161-
else
162-
rethrow()
163-
end
164-
end
150+
if !isbits(v)
151+
s.glbs_sent[sym] = (hash(sym, hash(v)), objectid(v))
165152
end
166-
record_v && (s.glbs_sent[oid] = hash(sym, hash(v)))
167153

168154
serialize(s, isconst(Main, sym))
169155
serialize(s, v)
@@ -180,17 +166,6 @@ function deserialize_global_from_main(s::ClusterSerializer, sym)
180166
return nothing
181167
end
182168

183-
function delete_global_tracker(s::ClusterSerializer, v)
184-
oid = objectid(v)
185-
if haskey(s.glbs_sent, oid)
186-
delete!(s.glbs_sent, oid)
187-
end
188-
189-
# TODO: A global binding is released and gc'ed here but it continues
190-
# to occupy memory on the remote node. Would be nice to release memory
191-
# if possible.
192-
end
193-
194169
function cleanup_tname_glbs(s::ClusterSerializer, identifier)
195170
delete!(s.glbs_in_tnobj, identifier)
196171
end

stdlib/Distributed/test/distributed_exec.jl

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,6 +1214,29 @@ v6 = FooModEverywhere
12141214
@test remotecall_fetch(()->v5, id_other) === FooStructEverywhere
12151215
@test remotecall_fetch(()->v6, id_other) === FooModEverywhere
12161216

1217+
# hash value same but different object instance
1218+
v7 = ones(10)
1219+
oid1 = objectid(v7)
1220+
hval1 = hash(v7)
1221+
@test v7 == @fetchfrom id_other v7
1222+
remote_oid1 = @fetchfrom id_other objectid(v7)
1223+
1224+
v7 = ones(10)
1225+
@test oid1 != objectid(v7)
1226+
@test hval1 == hash(v7)
1227+
@test remote_oid1 != @fetchfrom id_other objectid(v7)
1228+
1229+
1230+
# Github issue #31252
1231+
v31252 = :a
1232+
@test :a == @fetchfrom id_other v31252
1233+
1234+
v31252 = :b
1235+
@test :b == @fetchfrom id_other v31252
1236+
1237+
v31252 = :a
1238+
@test :a == @fetchfrom id_other v31252
1239+
12171240

12181241
# Test that a global is not being repeatedly serialized when
12191242
# a) referenced multiple times in the closure
@@ -1311,10 +1334,6 @@ global ids_func = ()->ids_cleanup
13111334
clust_ser = (Distributed.worker_from_id(id_other)).w_serializer
13121335
@test remotecall_fetch(ids_func, id_other) == ids_cleanup
13131336

1314-
@test haskey(clust_ser.glbs_sent, objectid(ids_cleanup))
1315-
finalize(ids_cleanup)
1316-
@test !haskey(clust_ser.glbs_sent, objectid(ids_cleanup))
1317-
13181337
# TODO Add test for cleanup from `clust_ser.glbs_in_tnobj`
13191338

13201339
# reported github issues - Mostly tests with globals and various distributed macros

0 commit comments

Comments
 (0)