Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

Log info about low connectivity and unreachable validators #3916

Merged
merged 24 commits into from
Sep 27, 2021
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 22 additions & 1 deletion node/network/bridge/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ impl metrics::Metrics for Metrics {
prometheus::GaugeVec::new(
prometheus::Opts::new(
"parachain_desired_peer_count",
"The number of peers that the local node is expected to connect to on a parachain-related peer-set",
"The number of peers that the local node is expected to connect to on a parachain-related peer-set (either including or not including unresolvable authorities, depending on whether `ConnectToValidators` or `ConnectToValidatorsResolved` was used.)",
),
&["protocol"]
)?,
Expand Down Expand Up @@ -552,6 +552,27 @@ where
network_service = ns;
authority_discovery_service = ads;
}
NetworkBridgeMessage::ConnectToResolvedValidators {
validator_addrs,
peer_set,
} => {
tracing::trace!(
target: LOG_TARGET,
action = "ConnectToPeers",
peer_set = ?peer_set,
?validator_addrs,
"Received a resolved validator connection request",
);

metrics.note_desired_peer_count(peer_set, validator_addrs.len());

let all_addrs = validator_addrs.into_iter().flatten().collect();
network_service = validator_discovery.on_resolved_request(
all_addrs,
peer_set,
network_service,
).await;
}
NetworkBridgeMessage::NewGossipTopology {
our_neighbors,
} => {
Expand Down
16 changes: 13 additions & 3 deletions node/network/bridge/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ use polkadot_primitives::v1::AuthorityDiscoveryId;
use polkadot_subsystem::{
jaeger,
messages::{
ApprovalDistributionMessage, BitfieldDistributionMessage, StatementDistributionMessage,
ApprovalDistributionMessage, BitfieldDistributionMessage, GossipSupportMessage,
StatementDistributionMessage,
},
ActiveLeavesUpdate, FromOverseer, LeafStatus, OverseerSignal,
};
Expand Down Expand Up @@ -337,6 +338,13 @@ async fn assert_sends_validation_event_to_all(
ApprovalDistributionMessage::NetworkBridgeUpdateV1(e)
) if e == event.focus().expect("could not focus message")
);

assert_matches!(
virtual_overseer.recv().await,
AllMessages::GossipSupport(
GossipSupportMessage::NetworkBridgeUpdateV1(e)
) if e == event.focus().expect("could not focus message")
);
}

async fn assert_sends_collation_event_to_all(
Expand Down Expand Up @@ -1189,7 +1197,7 @@ fn send_messages_to_peers() {
fn spread_event_to_subsystems_is_up_to_date() {
// Number of subsystems expected to be interested in a network event,
// and hence the network event broadcasted to.
const EXPECTED_COUNT: usize = 3;
const EXPECTED_COUNT: usize = 4;

let mut cnt = 0_usize;
for msg in AllMessages::dispatch_iter(NetworkBridgeEvent::PeerDisconnected(PeerId::random())) {
Expand Down Expand Up @@ -1219,7 +1227,9 @@ fn spread_event_to_subsystems_is_up_to_date() {
AllMessages::ApprovalDistribution(_) => {
cnt += 1;
},
AllMessages::GossipSupport(_) => unreachable!("Not interested in network events"),
AllMessages::GossipSupport(_) => {
cnt += 1;
},
AllMessages::DisputeCoordinator(_) => unreachable!("Not interested in network events"),
AllMessages::DisputeParticipation(_) =>
unreachable!("Not interested in network events"),
Expand Down
66 changes: 42 additions & 24 deletions node/network/bridge/src/validator_discovery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,44 @@ impl<N: Network, AD: AuthorityDiscovery> Service<N, AD> {
Self { state: Default::default(), _phantom: PhantomData }
}

/// Connect to already resolved addresses:
pub async fn on_resolved_request(
&mut self,
newly_requested: HashSet<Multiaddr>,
peer_set: PeerSet,
mut network_service: N,
) -> N {
let state = &mut self.state[peer_set];
// clean up revoked requests
let multiaddr_to_remove: HashSet<_> =
state.previously_requested.difference(&newly_requested).cloned().collect();
let multiaddr_to_add: HashSet<_> =
newly_requested.difference(&state.previously_requested).cloned().collect();
state.previously_requested = newly_requested;

tracing::debug!(
target: LOG_TARGET,
?peer_set,
added = multiaddr_to_add.len(),
removed = multiaddr_to_remove.len(),
"New ConnectToValidators resolved request",
);
// ask the network to connect to these nodes and not disconnect
// from them until removed from the set
if let Err(e) = network_service
.add_to_peers_set(peer_set.into_protocol_name(), multiaddr_to_add)
.await
{
tracing::warn!(target: LOG_TARGET, err = ?e, "AuthorityDiscoveryService returned an invalid multiaddress");
}
// the addresses are known to be valid
let _ = network_service
.remove_from_peers_set(peer_set.into_protocol_name(), multiaddr_to_remove)
.await;

network_service
}

/// On a new connection request, a peer set update will be issued.
/// It will ask the network to connect to the validators and not disconnect
/// from them at least until the next request is issued for the same peer set.
Expand All @@ -59,7 +97,7 @@ impl<N: Network, AD: AuthorityDiscovery> Service<N, AD> {
validator_ids: Vec<AuthorityDiscoveryId>,
peer_set: PeerSet,
failed: oneshot::Sender<usize>,
mut network_service: N,
network_service: N,
mut authority_discovery_service: AD,
) -> (N, AD) {
// collect multiaddress of validators
Expand All @@ -82,39 +120,19 @@ impl<N: Network, AD: AuthorityDiscovery> Service<N, AD> {
}
}

let state = &mut self.state[peer_set];
// clean up revoked requests
let multiaddr_to_remove: HashSet<_> =
state.previously_requested.difference(&newly_requested).cloned().collect();
let multiaddr_to_add: HashSet<_> =
newly_requested.difference(&state.previously_requested).cloned().collect();
state.previously_requested = newly_requested;

tracing::debug!(
target: LOG_TARGET,
?peer_set,
?requested,
added = multiaddr_to_add.len(),
removed = multiaddr_to_remove.len(),
?failed_to_resolve,
"New ConnectToValidators request",
);
eskimor marked this conversation as resolved.
Show resolved Hide resolved
// ask the network to connect to these nodes and not disconnect
// from them until removed from the set
if let Err(e) = network_service
.add_to_peers_set(peer_set.into_protocol_name(), multiaddr_to_add)
.await
{
tracing::warn!(target: LOG_TARGET, err = ?e, "AuthorityDiscoveryService returned an invalid multiaddress");
}
// the addresses are known to be valid
let _ = network_service
.remove_from_peers_set(peer_set.into_protocol_name(), multiaddr_to_remove)
.await;

let r = self.on_resolved_request(newly_requested, peer_set, network_service).await;

let _ = failed.send(failed_to_resolve);

(network_service, authority_discovery_service)
(r, authority_discovery_service)
}
}

Expand Down
5 changes: 5 additions & 0 deletions node/network/gossip-support/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,26 @@ edition = "2018"
sp-application-crypto = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" }
sc-network = { git = "https://github.com/paritytech/substrate", branch = "master" }

polkadot-node-network-protocol = { path = "../protocol" }
polkadot-node-subsystem = { path = "../../subsystem" }
polkadot-node-subsystem-util = { path = "../../subsystem-util" }
polkadot-primitives = { path = "../../../primitives" }

futures = "0.3.17"
futures-timer = "3.0.2"
rand = { version = "0.8.3", default-features = false }
rand_chacha = { version = "0.3.1", default-features = false }
tracing = "0.1.27"

[dev-dependencies]
sp-keyring = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-consensus-babe = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" }

polkadot-node-subsystem-test-helpers = { path = "../../subsystem-test-helpers" }

assert_matches = "1.4.0"
async-trait = "0.1.51"
lazy_static = "1.4.0"
Loading