Skip to content

Commit 2a9c1bd

Browse files
committed
Avoid using function closures in mem3
Those are for debugging in remsh but still function closure are fragile when sent between nodes so avoid them and rely on the new erpc module with plain M, F, A args. While at it, improve `dead_nodes/0,1` function. Make it return the node from whose perspective we noticed the dead nodes. That makes it a bit more clear what the network partitioned might be look like.
1 parent 2903cd5 commit 2a9c1bd

File tree

2 files changed

+12
-22
lines changed

2 files changed

+12
-22
lines changed

src/mem3/src/mem3.erl

Lines changed: 10 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,7 @@ do_ping(Node, Timeout) ->
500500
{Tag, Err}
501501
end.
502502

503-
-spec dead_nodes() -> [node() | Error :: term()].
503+
-spec dead_nodes() -> [{node(), [node()]}].
504504

505505
%% @doc Returns a list of dead nodes from the cluster.
506506
%%
@@ -518,32 +518,21 @@ do_ping(Node, Timeout) ->
518518
dead_nodes() ->
519519
dead_nodes(?PING_TIMEOUT_IN_MS).
520520

521-
-spec dead_nodes(Timeout :: pos_integer()) -> [node() | Error :: term()].
521+
-spec dead_nodes(Timeout :: pos_integer()) -> [{node(), [node()]}].
522522

523523
dead_nodes(Timeout) when is_integer(Timeout), Timeout > 0 ->
524524
% Here we are trying to detect overlapping partitions where not all the
525525
% nodes connect to each other. For example: n1 connects to n2 and n3, but
526526
% n2 and n3 are not connected.
527-
DeadFun = fun() ->
528-
Expected = ordsets:from_list(mem3:nodes()),
529-
Live = ordsets:from_list(mem3_util:live_nodes()),
530-
Dead = ordsets:subtract(Expected, Live),
531-
ordsets:to_list(Dead)
527+
Nodes = [node() | erlang:nodes()],
528+
Expected = erpc:multicall(Nodes, mem3, nodes, [], Timeout),
529+
Live = erpc:multicall(Nodes, mem3_util, live_nodes, [], Timeout),
530+
ZipF = fun
531+
(N, {ok, E}, {ok, L}) -> {N, E -- L};
532+
(N, _, _) -> {N, Nodes}
532533
end,
533-
{Responses, BadNodes} = multicall(DeadFun, Timeout),
534-
AccF = lists:foldl(
535-
fun
536-
(Dead, Acc) when is_list(Dead) -> ordsets:union(Acc, Dead);
537-
(Error, Acc) -> ordsets:union(Acc, [Error])
538-
end,
539-
ordsets:from_list(BadNodes),
540-
Responses
541-
),
542-
ordsets:to_list(AccF).
543-
544-
multicall(Fun, Timeout) when is_integer(Timeout), Timeout > 0 ->
545-
F = fun() -> catch Fun() end,
546-
rpc:multicall(erlang, apply, [F, []], Timeout).
534+
DeadPerNode = lists:zipwith3(ZipF, Nodes, Expected, Live),
535+
lists:sort([{N, lists:sort(D)} || {N, D} <- DeadPerNode, D =/= []]).
547536

548537
db_is_current(#shard{name = Name}) ->
549538
db_is_current(Name);

src/mem3/test/eunit/mem3_distribution_test.erl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,4 +143,5 @@ ping_nodes_test(_) ->
143143
dead_nodes_test(_) ->
144144
meck:expect(mem3, nodes, 0, [n1, n2, n3]),
145145
meck:expect(mem3_util, live_nodes, 0, [n1, n2]),
146-
?assertEqual([n3], couch_debug:dead_nodes()).
146+
Node = node(),
147+
?assertEqual([{Node, [n3]}], couch_debug:dead_nodes()).

0 commit comments

Comments
 (0)