Skip to content

Commit 547fc95

Browse files
mkuratczykmergify[bot]
authored andcommitted
await quorum+1 improvements
1. If khepri_db is enabled, rabbitmq_metadata is a critical component 2. When waiting for quorum+1, periodically log what doesn't have the quorum+1 - for components: just list them - for queues: list how many we are waiting for and how to display them (because there could be a large number, logging that could be impractical or even dangerous) 3. make the tests signficantly faster by using a single group (cherry picked from commit 6ca2022)
1 parent fb6dfd9 commit 547fc95

File tree

2 files changed

+48
-27
lines changed

2 files changed

+48
-27
lines changed

deps/rabbit/src/rabbit_upgrade_preparation.erl

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
%%
1616

1717
-define(SAMPLING_INTERVAL, 200).
18+
-define(LOGGING_FREQUENCY, ?SAMPLING_INTERVAL * 100).
1819

1920
await_online_quorum_plus_one(Timeout) ->
2021
Iterations = ceil(Timeout / ?SAMPLING_INTERVAL),
@@ -30,7 +31,11 @@ online_members(Component) ->
3031
erlang, whereis, [Component])).
3132

3233
endangered_critical_components() ->
33-
CriticalComponents = [rabbit_stream_coordinator],
34+
CriticalComponents = [rabbit_stream_coordinator] ++
35+
case rabbit_feature_flags:is_enabled(khepri_db) of
36+
true -> [rabbitmq_metadata];
37+
false -> []
38+
end,
3439
Nodes = rabbit_nodes:list_members(),
3540
lists:filter(fun (Component) ->
3641
NumAlive = length(online_members(Component)),
@@ -57,6 +62,21 @@ do_await_safe_online_quorum(IterationsLeft) ->
5762
case EndangeredQueues =:= [] andalso endangered_critical_components() =:= [] of
5863
true -> true;
5964
false ->
65+
case IterationsLeft rem ?LOGGING_FREQUENCY of
66+
0 ->
67+
case length(EndangeredQueues) of
68+
0 -> ok;
69+
N -> rabbit_log:info("Waiting for ~p queues to have quorum+1 members online."
70+
"You can list them with `rabbitmq-diagnostics check_if_node_is_quorum_critical`", [N])
71+
end,
72+
case endangered_critical_components() of
73+
[] -> ok;
74+
_ -> rabbit_log:info("Waiting for the following critical components to have quorum+1 members online: ~p.",
75+
[endangered_critical_components()])
76+
end;
77+
_ ->
78+
ok
79+
end,
6080
timer:sleep(?SAMPLING_INTERVAL),
6181
do_await_safe_online_quorum(IterationsLeft - 1)
6282
end.

deps/rabbit/test/upgrade_preparation_SUITE.erl

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,16 @@
1414

1515
all() ->
1616
[
17-
{group, quorum_queue},
18-
{group, stream}
17+
{group, clustered}
1918
].
2019

2120
groups() ->
2221
[
23-
{quorum_queue, [], [
24-
await_quorum_plus_one_qq
25-
]},
26-
{stream, [], [
27-
await_quorum_plus_one_stream
28-
]},
29-
{stream_coordinator, [], [
30-
await_quorum_plus_one_stream_coordinator
22+
{clustered, [], [
23+
await_quorum_plus_one_qq,
24+
await_quorum_plus_one_stream,
25+
await_quorum_plus_one_stream_coordinator,
26+
await_quorum_plus_one_rabbitmq_metadata
3127
]}
3228
].
3329

@@ -44,21 +40,14 @@ end_per_suite(Config) ->
4440
rabbit_ct_helpers:run_teardown_steps(Config).
4541

4642
init_per_group(Group, Config) ->
47-
case rabbit_ct_helpers:is_mixed_versions() of
48-
true ->
49-
%% in a 3.8/3.9 mixed cluster, ra will not cluster across versions,
50-
%% so quorum plus one will not be achieved
51-
{skip, "not mixed versions compatible"};
52-
_ ->
53-
Config1 = rabbit_ct_helpers:set_config(Config,
54-
[
55-
{rmq_nodes_count, 3},
56-
{rmq_nodename_suffix, Group}
57-
]),
58-
rabbit_ct_helpers:run_steps(Config1,
59-
rabbit_ct_broker_helpers:setup_steps() ++
60-
rabbit_ct_client_helpers:setup_steps())
61-
end.
43+
Config1 = rabbit_ct_helpers:set_config(Config,
44+
[
45+
{rmq_nodes_count, 3},
46+
{rmq_nodename_suffix, Group}
47+
]),
48+
rabbit_ct_helpers:run_steps(Config1,
49+
rabbit_ct_broker_helpers:setup_steps() ++
50+
rabbit_ct_client_helpers:setup_steps()).
6251

6352
end_per_group(_Group, Config) ->
6453
rabbit_ct_helpers:run_steps(Config,
@@ -120,12 +109,24 @@ await_quorum_plus_one_stream_coordinator(Config) ->
120109
%% no queues/streams beyond this point
121110

122111
ok = rabbit_ct_broker_helpers:stop_node(Config, B),
123-
%% this should fail because the corrdinator has only 2 running nodes
112+
%% this should fail because the coordinator has only 2 running nodes
124113
?assertNot(await_quorum_plus_one(Config, 0)),
125114

126115
ok = rabbit_ct_broker_helpers:start_node(Config, B),
127116
?assert(await_quorum_plus_one(Config, 0)).
128117

118+
await_quorum_plus_one_rabbitmq_metadata(Config) ->
119+
Nodes = [A, B, _C] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
120+
ok = rabbit_ct_broker_helpers:enable_feature_flag(Config, Nodes, khepri_db),
121+
?assert(await_quorum_plus_one(Config, A)),
122+
123+
ok = rabbit_ct_broker_helpers:stop_node(Config, B),
124+
%% this should fail because rabbitmq_metadata has only 2 running nodes
125+
?assertNot(await_quorum_plus_one(Config, A)),
126+
127+
ok = rabbit_ct_broker_helpers:start_node(Config, B),
128+
?assert(await_quorum_plus_one(Config, A)).
129+
129130
%%
130131
%% Implementation
131132
%%

0 commit comments

Comments
 (0)