diff --git a/CHANGELOG.md b/CHANGELOG.md index e9e5867ada0..6fc7acea499 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,8 +21,8 @@ * Make RocksDB block_size configurable [#6631](https://github.com/near/nearcore/pull/6631) * Increase default max_open_files RocksDB parameter from 512 to 10k [#6607](https://github.com/near/nearcore/pull/6607) * Use kebab-case names for neard subcommands to make them consistent with flag names. snake_case names are still valid for existing subcommands but kebab-case will be used for new commands. -* Added `near_peer_message_received_by_type_bytes` metric [#6661](https://github.com/near/nearcore/pull/6661) -* Removed `near__{total,bytes}` metrics in favour of `near_peer_message_received_by_type_{total,bytes}` metrics [#6661](https://github.com/near/nearcore/pull/6661) +* Added `near_peer_message_received_by_type_bytes` [#6661](https://github.com/near/nearcore/pull/6661) and `near_dropped_message_by_type_and_reason_count` [#6678](https://github.com/near/nearcore/pull/6678) metrics. +* Removed `near__{total,bytes}` [#6661](https://github.com/near/nearcore/pull/6661), `near__dropped`, `near_drop_message_unknown_account` and `near_dropped_messages_count` [#6678](https://github.com/near/nearcore/pull/6678) metrics. ## 1.25.0 [2022-03-16] diff --git a/chain/network-primitives/src/network_protocol/mod.rs b/chain/network-primitives/src/network_protocol/mod.rs index 4f2e988e214..7363b02078b 100644 --- a/chain/network-primitives/src/network_protocol/mod.rs +++ b/chain/network-primitives/src/network_protocol/mod.rs @@ -173,7 +173,6 @@ pub struct Pong { Clone, strum::AsRefStr, strum::AsStaticStr, - strum::EnumVariantNames, )] #[allow(clippy::large_enum_variant)] pub enum RoutedMessageBody { diff --git a/chain/network/src/network_protocol.rs b/chain/network/src/network_protocol.rs index 8c38f5acb91..f03d7a544f3 100644 --- a/chain/network/src/network_protocol.rs +++ b/chain/network/src/network_protocol.rs @@ -178,7 +178,6 @@ impl std::error::Error for HandshakeFailureReason {} Clone, Debug, strum::AsStaticStr, - strum::EnumVariantNames, )] // TODO(#1313): Use Box #[allow(clippy::large_enum_variant)] diff --git a/chain/network/src/peer/codec.rs b/chain/network/src/peer/codec.rs index bb76d0e6229..04d28e5d0a0 100644 --- a/chain/network/src/peer/codec.rs +++ b/chain/network/src/peer/codec.rs @@ -39,37 +39,42 @@ impl Encoder> for Codec { fn encode(&mut self, item: Vec, buf: &mut BytesMut) -> Result<(), Error> { if item.len() > NETWORK_MESSAGE_MAX_SIZE_BYTES { - Err(Error::new(ErrorKind::InvalidInput, "Input is too long")) - } else { + // TODO(mina86): Is there some way we can know what message we’re + // encoding? + metrics::MessageDropped::InputTooLong.inc_unknown_msg(); + return Err(Error::new(ErrorKind::InvalidInput, "Input is too long")); + } + + #[cfg(feature = "performance_stats")] + { + let stat = near_performance_metrics::stats_enabled::get_thread_stats_logger(); + stat.lock().unwrap().log_add_write_buffer( + item.len() + 4, + buf.len(), + buf.capacity(), + ); + } + if buf.capacity() >= MAX_WRITE_BUFFER_CAPACITY_BYTES + && item.len() + 4 + buf.len() > buf.capacity() + { #[cfg(feature = "performance_stats")] - { - let stat = near_performance_metrics::stats_enabled::get_thread_stats_logger(); - stat.lock().unwrap().log_add_write_buffer( - item.len() + 4, - buf.len(), - buf.capacity(), - ); - } - if buf.capacity() >= MAX_WRITE_BUFFER_CAPACITY_BYTES - && item.len() + 4 + buf.len() > buf.capacity() - { - #[cfg(feature = "performance_stats")] - let tid = near_rust_allocator_proxy::get_tid(); - #[cfg(not(feature = "performance_stats"))] - let tid = 0; - error!(target: "network", "{} throwing away message, because buffer is full item.len(): {} buf.capacity: {}", - tid, - item.len(), buf.capacity()); + let tid = near_rust_allocator_proxy::get_tid(); + #[cfg(not(feature = "performance_stats"))] + let tid = 0; + error!(target: "network", "{} throwing away message, because buffer is full item.len(): {} buf.capacity: {}", + tid, + item.len(), buf.capacity()); - metrics::DROPPED_MESSAGES_COUNT.inc_by(1); - return Err(Error::new(ErrorKind::Other, "Buf max capacity exceeded")); - } - // First four bytes is the length of the buffer. - buf.reserve(item.len() + 4); - buf.put_u32_le(item.len() as u32); - buf.put(&item[..]); - Ok(()) + // TODO(mina86): Is there some way we can know what message + // we’re encoding? + metrics::MessageDropped::MaxCapacityExceeded.inc_unknown_msg(); + return Err(Error::new(ErrorKind::Other, "Buf max capacity exceeded")); } + // First four bytes is the length of the buffer. + buf.reserve(item.len() + 4); + buf.put_u32_le(item.len() as u32); + buf.put(&item[..]); + Ok(()) } } diff --git a/chain/network/src/peer_manager/peer_manager_actor.rs b/chain/network/src/peer_manager/peer_manager_actor.rs index b5e683ad790..746609e02cd 100644 --- a/chain/network/src/peer_manager/peer_manager_actor.rs +++ b/chain/network/src/peer_manager/peer_manager_actor.rs @@ -1426,10 +1426,7 @@ impl PeerManagerActor { } Err(find_route_error) => { // TODO(MarX, #1369): Message is dropped here. Define policy for this case. - self.network_metrics.inc( - NetworkMetrics::peer_message_dropped(strum::AsStaticRef::as_static(&msg.body)) - .as_str(), - ); + metrics::MessageDropped::NoRouteFound.inc(&msg.body); debug!(target: "network", account_id = ?self.config.account_id, @@ -1458,7 +1455,7 @@ impl PeerManagerActor { Ok(peer_id) => peer_id, Err(find_route_error) => { // TODO(MarX, #1369): Message is dropped here. Define policy for this case. - metrics::DROP_MESSAGE_UNKNOWN_ACCOUNT.inc(); + metrics::MessageDropped::UnknownAccount.inc(&msg); debug!(target: "network", account_id = ?self.config.account_id, to = ?account_id, diff --git a/chain/network/src/stats/metrics.rs b/chain/network/src/stats/metrics.rs index e0eb9751dad..657f8b99c89 100644 --- a/chain/network/src/stats/metrics.rs +++ b/chain/network/src/stats/metrics.rs @@ -1,4 +1,3 @@ -use crate::types::PeerMessage; use near_metrics::{ do_create_int_counter_vec, try_create_histogram, try_create_int_counter, try_create_int_counter_vec, try_create_int_gauge, Histogram, IntCounter, IntCounterVec, @@ -6,8 +5,6 @@ use near_metrics::{ }; use near_network_primitives::types::RoutedMessageBody; use once_cell::sync::Lazy; -use std::collections::HashMap; -use strum::VariantNames; pub static PEER_CONNECTIONS_TOTAL: Lazy = Lazy::new(|| { try_create_int_gauge("near_peer_connections_total", "Number of connected peers").unwrap() @@ -90,13 +87,6 @@ pub static PEER_REACHABLE: Lazy = Lazy::new(|| { ) .unwrap() }); -pub static DROP_MESSAGE_UNKNOWN_ACCOUNT: Lazy = Lazy::new(|| { - try_create_int_counter( - "near_drop_message_unknown_account", - "Total messages dropped because target account is not known", - ) - .unwrap() -}); pub static RECEIVED_INFO_ABOUT_ITSELF: Lazy = Lazy::new(|| { try_create_int_counter( "received_info_about_itself", @@ -104,10 +94,12 @@ pub static RECEIVED_INFO_ABOUT_ITSELF: Lazy = Lazy::new(|| { ) .unwrap() }); -pub static DROPPED_MESSAGES_COUNT: Lazy = Lazy::new(|| { - near_metrics::try_create_int_counter( - "near_dropped_messages_count", - "Total count of messages which were dropped, because write buffer was full", +static DROPPED_MESSAGE_COUNT: Lazy = Lazy::new(|| { + near_metrics::try_create_int_counter_vec( + "near_dropped_message_by_type_and_reason_count", + "Total count of messages which were dropped by type of message and \ + reason why the message has been dropped", + &["type", "reason"] ) .unwrap() }); @@ -119,10 +111,31 @@ pub static PARTIAL_ENCODED_CHUNK_REQUEST_DELAY: Lazy = Lazy::new(|| { .unwrap() }); +#[derive(Clone, Copy, strum::AsRefStr)] +pub(crate) enum MessageDropped { + NoRouteFound, + UnknownAccount, + InputTooLong, + MaxCapacityExceeded, +} + +impl MessageDropped { + pub fn inc(self, msg: &RoutedMessageBody) { + self.inc_msg_type(msg.as_ref()) + } + + pub fn inc_unknown_msg(self) { + self.inc_msg_type("unknown") + } + + fn inc_msg_type(self, msg_type: &str) { + let reason = self.as_ref(); + DROPPED_MESSAGE_COUNT.with_label_values(&[msg_type, reason]).inc(); + } +} + #[derive(Clone, Debug, actix::MessageResponse)] pub struct NetworkMetrics { - // received messages - peer_messages: HashMap, // sent messages (broadcast style) pub broadcast_messages: IntCounterVec, } @@ -130,17 +143,6 @@ pub struct NetworkMetrics { impl NetworkMetrics { pub fn new() -> Self { Self { - peer_messages: PeerMessage::VARIANTS - .iter() - .filter(|&name| *name != "Routed") - .chain(RoutedMessageBody::VARIANTS.iter()) - .filter_map(|name: &&str| { - let counter_name = Self::peer_message_dropped(name); - try_create_int_counter(&counter_name, &counter_name) - .ok() - .map(|counter| (counter_name, counter)) - }) - .collect(), broadcast_messages: do_create_int_counter_vec( "near_broadcast_msg", "Broadcasted messages", @@ -149,16 +151,6 @@ impl NetworkMetrics { } } - pub fn peer_message_dropped(message_name: &str) -> String { - format!("near_{}_dropped", message_name.to_lowercase()) - } - - pub fn inc(&self, message_name: &str) { - if let Some(counter) = self.peer_messages.get(message_name) { - counter.inc(); - } - } - pub fn inc_broadcast(&self, message_name: &str) { self.broadcast_messages.with_label_values(&[message_name]).inc(); }