Skip to content

Commit a631d4b

Browse files
committed
Global counters per protocol + protocol AND queue_type
This way we can show how many messages were received via a certain protocol (stream is the second real protocol besides the default amqp091 one), as well as by queue type, which is something that many asked for a really long time. The most important aspect is that we can also see them by protocol AND queue_type, which becomes very important for Streams, which have different rules from regular queues (e.g. for example, consuming messages is non-destructive, and deep queue backlogs - think billions of messages - are normal). Alerting and consumer scaling due to deep backlogs will now work correctly, as we can distinguish between regular queues & streams. This has gone through a few cycles, with @mkuratczyk & @dcorbacho covering most of the ground. @dcorbacho had most of this in #3045, but the main branch went through a few changes in the meantime. Rather than resolving all the conflicts, and then making the necessary changes, we (@gerhard + @kjnilsson) took all learnings and started re-applying a lot of the existing code from #3045. We are confident in this approach and would like to see it through. We continued working on this with @dumbbell, and the most important changes are captured in rabbitmq/seshat#1. We expose these global counters in rabbitmq_prometheus via a new collector. We don't want to keep modifying the existing collector, which grew really complex in parts, especially since we introduced aggregation, but start with a new namespace, `rabbitmq_global_`, and continue building on top of it. The idea is to build in parallel, and slowly transition to the new metrics, because semantically the changes are too big since streams, and we have been discussing protocol-specific metrics with @kjnilsson, which makes me think that this approach is least disruptive and... simple. While at this, we removed redundant empty return value handling in the channel. The function called no longer returns this. Also removed all DONE / TODO & other comments - we'll handle them when the time comes, no need to leave TODO reminders. Pairs @kjnilsson @dcorbacho @dumbbell (this is multiple commits squashed into one) Signed-off-by: Gerhard Lazu <gerhard@lazu.co.uk>
1 parent 70cb814 commit a631d4b

13 files changed

+564
-197
lines changed

deps/rabbit/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ RUNTIME_DEPS = [
155155
"@observer_cli//:bazel_erlang_lib",
156156
"@osiris//:bazel_erlang_lib",
157157
"@recon//:bazel_erlang_lib",
158+
"@seshat//:bazel_erlang_lib",
158159
"@sysmon_handler//:bazel_erlang_lib",
159160
"@systemd//:bazel_erlang_lib",
160161
]

deps/rabbit/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,14 +136,15 @@ APPS_DIR := $(CURDIR)/apps
136136
LOCAL_DEPS = sasl rabbitmq_prelaunch os_mon inets compiler public_key crypto ssl syntax_tools xmerl
137137

138138
BUILD_DEPS = rabbitmq_cli
139-
DEPS = ranch rabbit_common ra sysmon_handler stdout_formatter recon observer_cli osiris amqp10_common syslog systemd
139+
DEPS = ranch rabbit_common ra sysmon_handler stdout_formatter recon observer_cli osiris amqp10_common syslog systemd seshat
140140
TEST_DEPS = rabbitmq_ct_helpers rabbitmq_ct_client_helpers amqp_client meck proper
141141

142142
PLT_APPS += mnesia
143143

144144
dep_syslog = git https://github.com/schlagert/syslog 4.0.0
145145
dep_osiris = git https://github.com/rabbitmq/osiris master
146146
dep_systemd = hex 0.6.1
147+
dep_seshat = git https://github.com/rabbitmq/seshat valid-prometheus-format
147148

148149
define usage_xml_to_erl
149150
$(subst __,_,$(patsubst $(DOCS_DIR)/rabbitmq%.1.xml, src/rabbit_%_usage.erl, $(subst -,_,$(1))))

deps/rabbit/src/rabbit.erl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,13 @@
127127
{requires, pre_boot},
128128
{enables, external_infrastructure}]}).
129129

130+
-rabbit_boot_step({rabbit_global_counters,
131+
[{description, "global counters"},
132+
{mfa, {rabbit_global_counters, boot_step,
133+
[]}},
134+
{requires, pre_boot},
135+
{enables, external_infrastructure}]}).
136+
130137
-rabbit_boot_step({rabbit_event,
131138
[{description, "statistics event manager"},
132139
{mfa, {rabbit_sup, start_restartable_child,

deps/rabbit/src/rabbit_channel.erl

Lines changed: 62 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1285,6 +1285,7 @@ handle_method(#'basic.publish'{exchange = ExchangeNameBin,
12851285
confirm_enabled = ConfirmEnabled,
12861286
delivery_flow = Flow
12871287
}) ->
1288+
rabbit_global_counters:messages_received(amqp091, 1),
12881289
check_msg_size(Content, MaxMessageSize, GCThreshold),
12891290
ExchangeName = rabbit_misc:r(VHostPath, exchange, ExchangeNameBin),
12901291
check_write_permitted(ExchangeName, User, AuthzContext),
@@ -1302,7 +1303,8 @@ handle_method(#'basic.publish'{exchange = ExchangeNameBin,
13021303
{MsgSeqNo, State1} =
13031304
case DoConfirm orelse Mandatory of
13041305
false -> {undefined, State};
1305-
true -> SeqNo = State#ch.publish_seqno,
1306+
true -> rabbit_global_counters:messages_received_confirm(amqp091, 1),
1307+
SeqNo = State#ch.publish_seqno,
13061308
{SeqNo, State#ch{publish_seqno = SeqNo + 1}}
13071309
end,
13081310
case rabbit_basic:message(ExchangeName, RoutingKey, DecodedContent) of
@@ -1314,9 +1316,11 @@ handle_method(#'basic.publish'{exchange = ExchangeNameBin,
13141316
Username, TraceState),
13151317
DQ = {Delivery#delivery{flow = Flow}, QNames},
13161318
{noreply, case Tx of
1317-
none -> deliver_to_queues(DQ, State1);
1318-
{Msgs, Acks} -> Msgs1 = ?QUEUE:in(DQ, Msgs),
1319-
State1#ch{tx = {Msgs1, Acks}}
1319+
none ->
1320+
deliver_to_queues(DQ, State1);
1321+
{Msgs, Acks} ->
1322+
Msgs1 = ?QUEUE:in(DQ, Msgs),
1323+
State1#ch{tx = {Msgs1, Acks}}
13201324
end};
13211325
{error, Reason} ->
13221326
precondition_failed("invalid message: ~p", [Reason])
@@ -1360,14 +1364,14 @@ handle_method(#'basic.get'{queue = QueueNameBin, no_ack = NoAck},
13601364
DeliveryTag, QueueStates0)
13611365
end) of
13621366
{ok, MessageCount, Msg, QueueStates} ->
1367+
{ok, QueueType} = rabbit_queue_type:module(QueueName, QueueStates),
13631368
handle_basic_get(WriterPid, DeliveryTag, NoAck, MessageCount, Msg,
1364-
State#ch{queue_states = QueueStates});
1369+
QueueType, State#ch{queue_states = QueueStates});
13651370
{empty, QueueStates} ->
1371+
{ok, QueueType} = rabbit_queue_type:module(QueueName, QueueStates),
1372+
rabbit_global_counters:messages_get_empty(amqp091, QueueType, 1),
13661373
?INCR_STATS(queue_stats, QueueName, 1, get_empty, State),
13671374
{reply, #'basic.get_empty'{}, State#ch{queue_states = QueueStates}};
1368-
empty ->
1369-
?INCR_STATS(queue_stats, QueueName, 1, get_empty, State),
1370-
{reply, #'basic.get_empty'{}, State};
13711375
{error, {unsupported, single_active_consumer}} ->
13721376
rabbit_misc:protocol_error(
13731377
resource_locked,
@@ -1692,9 +1696,9 @@ handle_method(#'tx.select'{}, _, State) ->
16921696
handle_method(#'tx.commit'{}, _, #ch{tx = none}) ->
16931697
precondition_failed("channel is not transactional");
16941698

1695-
handle_method(#'tx.commit'{}, _, State = #ch{tx = {Msgs, Acks},
1699+
handle_method(#'tx.commit'{}, _, State = #ch{tx = {Deliveries, Acks},
16961700
limiter = Limiter}) ->
1697-
State1 = queue_fold(fun deliver_to_queues/2, State, Msgs),
1701+
State1 = queue_fold(fun deliver_to_queues/2, State, Deliveries),
16981702
Rev = fun (X) -> lists:reverse(lists:sort(X)) end,
16991703
{State2, Actions2} =
17001704
lists:foldl(fun ({ack, A}, {Acc, Actions}) ->
@@ -1954,7 +1958,7 @@ internal_reject(Requeue, Acked, Limiter,
19541958
ok = notify_limiter(Limiter, Acked),
19551959
{State#ch{queue_states = QueueStates}, Actions}.
19561960

1957-
record_sent(Type, Tag, AckRequired,
1961+
record_sent(Type, QueueType, Tag, AckRequired,
19581962
Msg = {QName, _QPid, MsgId, Redelivered, _Message},
19591963
State = #ch{cfg = #conf{channel = ChannelNum,
19601964
trace_state = TraceState,
@@ -1964,15 +1968,28 @@ record_sent(Type, Tag, AckRequired,
19641968
unacked_message_q = UAMQ,
19651969
next_tag = DeliveryTag
19661970
}) ->
1967-
?INCR_STATS(queue_stats, QName, 1, case {Type, AckRequired} of
1968-
{get, true} -> get;
1969-
{get, false} -> get_no_ack;
1970-
{deliver, true} -> deliver;
1971-
{deliver, false} -> deliver_no_ack
1972-
end, State),
1971+
rabbit_global_counters:messages_delivered(amqp091, QueueType, 1),
1972+
?INCR_STATS(queue_stats, QName, 1,
1973+
case {Type, AckRequired} of
1974+
{get, true} ->
1975+
rabbit_global_counters:messages_delivered_get_manual_ack(amqp091, QueueType, 1),
1976+
get;
1977+
{get, false} ->
1978+
rabbit_global_counters:messages_delivered_get_auto_ack(amqp091, QueueType, 1),
1979+
get_no_ack;
1980+
{deliver, true} ->
1981+
rabbit_global_counters:messages_delivered_consume_manual_ack(amqp091, QueueType, 1),
1982+
deliver;
1983+
{deliver, false} ->
1984+
rabbit_global_counters:messages_delivered_consume_auto_ack(amqp091, QueueType, 1),
1985+
deliver_no_ack
1986+
end, State),
19731987
case Redelivered of
1974-
true -> ?INCR_STATS(queue_stats, QName, 1, redeliver, State);
1975-
false -> ok
1988+
true ->
1989+
rabbit_global_counters:messages_redelivered(amqp091, QueueType, 1),
1990+
?INCR_STATS(queue_stats, QName, 1, redeliver, State);
1991+
false ->
1992+
ok
19761993
end,
19771994
DeliveredAt = os:system_time(millisecond),
19781995
rabbit_trace:tap_out(Msg, ConnName, ChannelNum, Username, TraceState),
@@ -2034,8 +2051,14 @@ ack(Acked, State = #ch{queue_states = QueueStates0}) ->
20342051
ok = notify_limiter(State#ch.limiter, Acked),
20352052
{State#ch{queue_states = QueueStates}, Actions}.
20362053

2037-
incr_queue_stats(QName, MsgIds, State) ->
2054+
incr_queue_stats(QName, MsgIds, State = #ch{queue_states = QueueStates}) ->
20382055
Count = length(MsgIds),
2056+
case rabbit_queue_type:module(QName, QueueStates) of
2057+
{ok, QueueType} ->
2058+
rabbit_global_counters:messages_acknowledged(amqp091, QueueType, Count);
2059+
_ ->
2060+
noop
2061+
end,
20392062
?INCR_STATS(queue_stats, QName, Count, ack, State).
20402063

20412064
%% {Msgs, Acks}
@@ -2108,15 +2131,16 @@ notify_limiter(Limiter, Acked) ->
21082131
deliver_to_queues({#delivery{message = #basic_message{exchange_name = XName},
21092132
confirm = false,
21102133
mandatory = false},
2111-
_RoutedToQueueNames = []}, State) -> %% optimisation
2134+
_RoutedToQueueNames = []}, State) -> %% optimisation when there are no queues
21122135
?INCR_STATS(exchange_stats, XName, 1, publish, State),
2136+
rabbit_global_counters:messages_unroutable_dropped(amqp091, 1),
21132137
?INCR_STATS(exchange_stats, XName, 1, drop_unroutable, State),
21142138
State;
21152139
deliver_to_queues({Delivery = #delivery{message = Message = #basic_message{exchange_name = XName},
21162140
mandatory = Mandatory,
21172141
confirm = Confirm,
21182142
msg_seq_no = MsgSeqNo},
2119-
_RoutedToQueueNames = [QName]}, State0 = #ch{queue_states = QueueStates0}) ->
2143+
_RoutedToQueueNames = [QName]}, State0 = #ch{queue_states = QueueStates0}) -> %% optimisation when there is one queue
21202144
AllNames = case rabbit_amqqueue:lookup(QName) of
21212145
{ok, Q0} ->
21222146
case amqqueue:get_options(Q0) of
@@ -2128,6 +2152,7 @@ deliver_to_queues({Delivery = #delivery{message = Message = #basic_message{ex
21282152
Qs = rabbit_amqqueue:lookup(AllNames),
21292153
case rabbit_queue_type:deliver(Qs, Delivery, QueueStates0) of
21302154
{ok, QueueStates, Actions} ->
2155+
rabbit_global_counters:messages_routed(amqp091, length(Qs)),
21312156
%% NB: the order here is important since basic.returns must be
21322157
%% sent before confirms.
21332158
ok = process_routing_mandatory(Mandatory, Qs, Message, State0),
@@ -2164,6 +2189,7 @@ deliver_to_queues({Delivery = #delivery{message = Message = #basic_message{ex
21642189
end,
21652190
case rabbit_queue_type:deliver(Qs, Delivery, QueueStates0) of
21662191
{ok, QueueStates, Actions} ->
2192+
rabbit_global_counters:messages_routed(amqp091, length(Qs)),
21672193
%% NB: the order here is important since basic.returns must be
21682194
%% sent before confirms.
21692195
ok = process_routing_mandatory(Mandatory, Qs, Message, State0),
@@ -2213,11 +2239,13 @@ infer_extra_bcc(Qs) ->
22132239
process_routing_mandatory(_Mandatory = true,
22142240
_RoutedToQs = [],
22152241
Msg, State) ->
2242+
rabbit_global_counters:messages_unroutable_returned(amqp091, 1),
22162243
ok = basic_return(Msg, State, no_route),
22172244
ok;
22182245
process_routing_mandatory(_Mandatory = false,
22192246
_RoutedToQs = [],
22202247
#basic_message{exchange_name = ExchangeName}, State) ->
2248+
rabbit_global_counters:messages_unroutable_dropped(amqp091, 1),
22212249
?INCR_STATS(exchange_stats, ExchangeName, 1, drop_unroutable, State),
22222250
ok;
22232251
process_routing_mandatory(_, _, _, _) ->
@@ -2245,6 +2273,7 @@ send_confirms_and_nacks(State = #ch{tx = none, confirmed = C, rejected = R}) ->
22452273
case rabbit_node_monitor:pause_partition_guard() of
22462274
ok ->
22472275
Confirms = lists:append(C),
2276+
rabbit_global_counters:messages_confirmed(amqp091, length(Confirms)),
22482277
Rejects = lists:append(R),
22492278
ConfirmMsgSeqNos =
22502279
lists:foldl(
@@ -2721,8 +2750,9 @@ handle_deliver0(ConsumerTag, AckRequired,
27212750
redelivered = Redelivered,
27222751
exchange = ExchangeName#resource.name,
27232752
routing_key = RoutingKey},
2724-
case rabbit_queue_type:module(QName, Qs) of
2725-
{ok, rabbit_classic_queue} ->
2753+
{ok, QueueType} = rabbit_queue_type:module(QName, Qs),
2754+
case QueueType of
2755+
rabbit_classic_queue ->
27262756
ok = rabbit_writer:send_command_and_notify(
27272757
WriterPid, QPid, self(), Deliver, Content);
27282758
_ ->
@@ -2732,13 +2762,14 @@ handle_deliver0(ConsumerTag, AckRequired,
27322762
undefined -> ok;
27332763
_ -> rabbit_basic:maybe_gc_large_msg(Content, GCThreshold)
27342764
end,
2735-
record_sent(deliver, ConsumerTag, AckRequired, Msg, State).
2765+
record_sent(deliver, QueueType, ConsumerTag, AckRequired, Msg, State).
27362766

27372767
handle_basic_get(WriterPid, DeliveryTag, NoAck, MessageCount,
27382768
Msg = {_QName, _QPid, _MsgId, Redelivered,
27392769
#basic_message{exchange_name = ExchangeName,
27402770
routing_keys = [RoutingKey | _CcRoutes],
2741-
content = Content}}, State) ->
2771+
content = Content}},
2772+
QueueType, State) ->
27422773
ok = rabbit_writer:send_command(
27432774
WriterPid,
27442775
#'basic.get_ok'{delivery_tag = DeliveryTag,
@@ -2747,7 +2778,7 @@ handle_basic_get(WriterPid, DeliveryTag, NoAck, MessageCount,
27472778
routing_key = RoutingKey,
27482779
message_count = MessageCount},
27492780
Content),
2750-
{noreply, record_sent(get, DeliveryTag, not(NoAck), Msg, State)}.
2781+
{noreply, record_sent(get, QueueType, DeliveryTag, not(NoAck), Msg, State)}.
27512782

27522783
init_tick_timer(State = #ch{tick_timer = undefined}) ->
27532784
{ok, Interval} = application:get_env(rabbit, channel_tick_interval),
@@ -2783,10 +2814,10 @@ get_operation_timeout_and_deadline() ->
27832814
Deadline = now_millis() + Timeout,
27842815
{Timeout, Deadline}.
27852816

2786-
queue_fold(Fun, Init, Q) ->
2787-
case ?QUEUE:out(Q) of
2788-
{empty, _Q} -> Init;
2789-
{{value, V}, Q1} -> queue_fold(Fun, Fun(V, Init), Q1)
2817+
queue_fold(Fun, Acc, Queue) ->
2818+
case ?QUEUE:out(Queue) of
2819+
{empty, _Queue} -> Acc;
2820+
{{value, Item}, Queue1} -> queue_fold(Fun, Fun(Item, Acc), Queue1)
27902821
end.
27912822

27922823
evaluate_consumer_timeout(State0 = #ch{cfg = #conf{channel = Channel,

0 commit comments

Comments
 (0)