From 117466aa8e471562f921a90b69a6c265cb6c656f Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 12 Aug 2021 13:11:36 +0200 Subject: [PATCH] Remove request multiplexer (#3624) * WIP: Get rid of request multiplexer. * WIP * Receiver for handling of incoming requests. * Get rid of useless `Fault` abstraction. The things the type system let us do are not worth getting abstracted in its own type. Instead error handling is going to be merely a pattern. * Make most things compile again. * Port availability distribution away from request multiplexer. * Formatting. * Port dispute distribution over. * Fixup statement distribution. * Handle request directly in collator protocol. + Only allow fatal errors at top level. * Use direct request channel for availability recovery. * Finally get rid of request multiplexer Fixes #2842 and paves the way for more back pressure possibilities. * Fix overseer and statement distribution tests. * Fix collator protocol and network bridge tests. * Fix tests in availability recovery. * Fix availability distribution tests. * Fix dispute distribution tests. * Add missing dependency * Typos. * Review remarks. * More remarks. --- Cargo.lock | 33 +- .../availability-distribution/Cargo.toml | 1 + .../availability-distribution/src/error.rs | 42 +- .../availability-distribution/src/lib.rs | 54 ++- .../src/pov_requester/mod.rs | 2 +- .../src/requester/fetch_task/mod.rs | 2 +- .../src/responder.rs | 127 +++-- .../src/tests/mod.rs | 21 +- .../src/tests/state.rs | 45 +- node/network/availability-recovery/src/lib.rs | 70 ++- .../availability-recovery/src/tests.rs | 74 +-- node/network/bridge/Cargo.toml | 1 - node/network/bridge/src/lib.rs | 439 ++++++++---------- node/network/bridge/src/multiplexer.rs | 221 --------- node/network/bridge/src/tests.rs | 91 +--- .../network/bridge/src/validator_discovery.rs | 2 +- node/network/collator-protocol/Cargo.toml | 2 + .../src/collator_side/mod.rs | 179 ++++--- .../src/collator_side/tests.rs | 301 ++++++------ node/network/collator-protocol/src/error.rs | 61 ++- node/network/collator-protocol/src/lib.rs | 20 +- .../src/validator_side/mod.rs | 12 +- node/network/dispute-distribution/Cargo.toml | 1 + .../network/dispute-distribution/src/error.rs | 39 +- node/network/dispute-distribution/src/lib.rs | 43 +- .../src/receiver/error.rs | 64 +-- .../dispute-distribution/src/receiver/mod.rs | 61 ++- .../dispute-distribution/src/sender/error.rs | 27 +- .../dispute-distribution/src/tests/mod.rs | 58 +-- node/network/protocol/Cargo.toml | 1 + .../src/request_response/incoming/error.rs | 55 +++ .../src/request_response/incoming/mod.rs | 232 +++++++++ .../protocol/src/request_response/mod.rs | 22 +- .../{request.rs => outgoing.rs} | 253 ++-------- .../protocol/src/request_response/v1.rs | 2 +- .../network/statement-distribution/Cargo.toml | 3 +- .../statement-distribution/src/error.rs | 42 +- .../network/statement-distribution/src/lib.rs | 39 +- .../statement-distribution/src/responder.rs | 23 +- .../statement-distribution/src/tests.rs | 71 ++- node/overseer/all-subsystems-gen/src/lib.rs | 2 +- node/overseer/src/lib.rs | 29 -- node/overseer/src/tests.rs | 35 +- node/service/Cargo.toml | 1 + node/service/src/lib.rs | 27 +- node/service/src/overseer.rs | 29 +- node/subsystem-types/src/messages.rs | 37 +- node/subsystem-util/Cargo.toml | 1 + node/subsystem-util/src/error_handling.rs | 202 -------- node/subsystem-util/src/lib.rs | 3 - node/subsystem-util/src/runtime/error.rs | 21 +- 51 files changed, 1493 insertions(+), 1730 deletions(-) delete mode 100644 node/network/bridge/src/multiplexer.rs create mode 100644 node/network/protocol/src/request_response/incoming/error.rs create mode 100644 node/network/protocol/src/request_response/incoming/mod.rs rename node/network/protocol/src/request_response/{request.rs => outgoing.rs} (54%) delete mode 100644 node/subsystem-util/src/error_handling.rs diff --git a/Cargo.lock b/Cargo.lock index 6f238cdae79c..107fb866712e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1532,13 +1532,14 @@ dependencies = [ [[package]] name = "derive_more" -version = "0.99.14" +version = "0.99.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc7b9cef1e351660e5443924e4f43ab25fbbed3e9a5f052df3677deb4d6b320" +checksum = "40eebddd2156ce1bb37b20bbe5151340a31828b1f2d22ba4141f3531710e38df" dependencies = [ "convert_case", "proc-macro2", "quote", + "rustc_version 0.3.3", "syn", ] @@ -2476,7 +2477,7 @@ dependencies = [ "cc", "libc", "log", - "rustc_version", + "rustc_version 0.2.3", "winapi 0.3.9", ] @@ -2845,7 +2846,7 @@ dependencies = [ "itoa", "log", "net2", - "rustc_version", + "rustc_version 0.2.3", "time", "tokio 0.1.22", "tokio-buf", @@ -5650,7 +5651,7 @@ checksum = "f842b1982eb6c2fe34036a4fbfb06dd185a3f5c8edfaacdf7d1ea10b07de6252" dependencies = [ "lock_api 0.3.4", "parking_lot_core 0.6.2", - "rustc_version", + "rustc_version 0.2.3", ] [[package]] @@ -5684,7 +5685,7 @@ dependencies = [ "cloudabi 0.0.3", "libc", "redox_syscall 0.1.56", - "rustc_version", + "rustc_version 0.2.3", "smallvec 0.6.13", "winapi 0.3.9", ] @@ -5942,6 +5943,7 @@ name = "polkadot-availability-distribution" version = "0.9.9" dependencies = [ "assert_matches", + "derive_more", "futures 0.3.16", "futures-timer 3.0.2", "lru", @@ -6053,10 +6055,12 @@ version = "0.9.9" dependencies = [ "always-assert", "assert_matches", + "derive_more", "env_logger 0.8.4", "futures 0.3.16", "futures-timer 3.0.2", "log", + "parity-scale-codec", "polkadot-node-network-protocol", "polkadot-node-primitives", "polkadot-node-subsystem", @@ -6089,6 +6093,7 @@ version = "0.9.9" dependencies = [ "assert_matches", "async-trait", + "derive_more", "futures 0.3.16", "futures-timer 3.0.2", "lazy_static", @@ -6170,7 +6175,6 @@ dependencies = [ "sp-consensus", "sp-core", "sp-keyring", - "strum", "tracing", ] @@ -6522,6 +6526,7 @@ name = "polkadot-node-network-protocol" version = "0.9.9" dependencies = [ "async-trait", + "derive_more", "futures 0.3.16", "parity-scale-codec", "polkadot-node-jaeger", @@ -6628,6 +6633,7 @@ version = "0.9.9" dependencies = [ "assert_matches", "async-trait", + "derive_more", "env_logger 0.8.4", "futures 0.3.16", "futures-timer 3.0.2", @@ -7023,6 +7029,7 @@ dependencies = [ "polkadot-node-core-parachains-inherent", "polkadot-node-core-provisioner", "polkadot-node-core-runtime-api", + "polkadot-node-network-protocol", "polkadot-node-primitives", "polkadot-node-subsystem", "polkadot-node-subsystem-test-helpers", @@ -7144,6 +7151,7 @@ version = "0.9.9" dependencies = [ "arrayvec 0.5.2", "assert_matches", + "derive_more", "futures 0.3.16", "futures-timer 3.0.2", "indexmap", @@ -8152,6 +8160,15 @@ dependencies = [ "semver 0.9.0", ] +[[package]] +name = "rustc_version" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0dfe2087c51c460008730de8b57e6a320782fbfb312e1f4d520e6c6fae155ee" +dependencies = [ + "semver 0.11.0", +] + [[package]] name = "rustls" version = "0.18.0" @@ -9663,7 +9680,7 @@ dependencies = [ "rand 0.7.3", "rand_core 0.5.1", "ring", - "rustc_version", + "rustc_version 0.2.3", "sha2 0.9.2", "subtle 2.2.3", "x25519-dalek 0.6.0", diff --git a/node/network/availability-distribution/Cargo.toml b/node/network/availability-distribution/Cargo.toml index 2936e86cbbf6..8a9a666a7c85 100644 --- a/node/network/availability-distribution/Cargo.toml +++ b/node/network/availability-distribution/Cargo.toml @@ -20,6 +20,7 @@ sp-core = { git = "https://github.com/paritytech/substrate", branch = "master", sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" } thiserror = "1.0.26" rand = "0.8.3" +derive_more = "0.99.11" lru = "0.6.6" [dev-dependencies] diff --git a/node/network/availability-distribution/src/error.rs b/node/network/availability-distribution/src/error.rs index 12a3ec60ce82..881e00ac28a0 100644 --- a/node/network/availability-distribution/src/error.rs +++ b/node/network/availability-distribution/src/error.rs @@ -17,35 +17,31 @@ //! Error handling related code and Error/Result definitions. -use polkadot_node_network_protocol::request_response::request::RequestError; +use polkadot_node_network_protocol::request_response::outgoing::RequestError; use thiserror::Error; use futures::channel::oneshot; -use polkadot_node_subsystem_util::{runtime, unwrap_non_fatal, Fault}; +use polkadot_node_subsystem_util::runtime; use polkadot_subsystem::SubsystemError; use crate::LOG_TARGET; -#[derive(Debug, Error)] +#[derive(Debug, Error, derive_more::From)] #[error(transparent)] -pub struct Error(pub Fault); - -impl From for Error { - fn from(e: NonFatal) -> Self { - Self(Fault::from_non_fatal(e)) - } -} - -impl From for Error { - fn from(f: Fatal) -> Self { - Self(Fault::from_fatal(f)) - } +pub enum Error { + /// All fatal errors. + Fatal(Fatal), + /// All nonfatal/potentially recoverable errors. + NonFatal(NonFatal), } impl From for Error { fn from(o: runtime::Error) -> Self { - Self(Fault::from_other(o)) + match o { + runtime::Error::Fatal(f) => Self::Fatal(Fatal::Runtime(f)), + runtime::Error::NonFatal(f) => Self::NonFatal(NonFatal::Runtime(f)), + } } } @@ -107,15 +103,23 @@ pub enum NonFatal { Runtime(#[from] runtime::NonFatal), } +/// General result type for fatal/nonfatal errors. pub type Result = std::result::Result; +/// Results which are never fatal. +pub type NonFatalResult = std::result::Result; + /// Utility for eating top level errors and log them. /// /// We basically always want to try and continue on error. This utility function is meant to /// consume top-level errors by simply logging them pub fn log_error(result: Result<()>, ctx: &'static str) -> std::result::Result<(), Fatal> { - if let Some(error) = unwrap_non_fatal(result.map_err(|e| e.0))? { - tracing::warn!(target: LOG_TARGET, error = ?error, ctx); + match result { + Err(Error::Fatal(f)) => Err(f), + Err(Error::NonFatal(error)) => { + tracing::warn!(target: LOG_TARGET, error = ?error, ctx); + Ok(()) + }, + Ok(()) => Ok(()), } - Ok(()) } diff --git a/node/network/availability-distribution/src/lib.rs b/node/network/availability-distribution/src/lib.rs index 98396a4b3324..eea40f63c948 100644 --- a/node/network/availability-distribution/src/lib.rs +++ b/node/network/availability-distribution/src/lib.rs @@ -18,6 +18,7 @@ use futures::{future::Either, FutureExt, StreamExt, TryFutureExt}; use sp_keystore::SyncCryptoStorePtr; +use polkadot_node_network_protocol::request_response::{v1, IncomingRequestReceiver}; use polkadot_subsystem::{ messages::AvailabilityDistributionMessage, overseer, FromOverseer, OverseerSignal, SpawnedSubsystem, SubsystemContext, SubsystemError, @@ -38,7 +39,7 @@ mod pov_requester; /// Responding to erasure chunk requests: mod responder; -use responder::{answer_chunk_request_log, answer_pov_request_log}; +use responder::{run_chunk_receiver, run_pov_receiver}; mod metrics; /// Prometheus `Metrics` for availability distribution. @@ -53,10 +54,20 @@ const LOG_TARGET: &'static str = "parachain::availability-distribution"; pub struct AvailabilityDistributionSubsystem { /// Easy and efficient runtime access for this subsystem. runtime: RuntimeInfo, + /// Receivers to receive messages from. + recvs: IncomingRequestReceivers, /// Prometheus metrics. metrics: Metrics, } +/// Receivers to be passed into availability distribution. +pub struct IncomingRequestReceivers { + /// Receiver for incoming PoV requests. + pub pov_req_receiver: IncomingRequestReceiver, + /// Receiver for incoming availability chunk requests. + pub chunk_req_receiver: IncomingRequestReceiver, +} + impl overseer::Subsystem for AvailabilityDistributionSubsystem where Context: SubsystemContext, @@ -74,18 +85,41 @@ where impl AvailabilityDistributionSubsystem { /// Create a new instance of the availability distribution. - pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> Self { + pub fn new( + keystore: SyncCryptoStorePtr, + recvs: IncomingRequestReceivers, + metrics: Metrics, + ) -> Self { let runtime = RuntimeInfo::new(Some(keystore)); - Self { runtime, metrics } + Self { runtime, recvs, metrics } } /// Start processing work as passed on from the Overseer. - async fn run(mut self, mut ctx: Context) -> std::result::Result<(), Fatal> + async fn run(self, mut ctx: Context) -> std::result::Result<(), Fatal> where Context: SubsystemContext, Context: overseer::SubsystemContext, { - let mut requester = Requester::new(self.metrics.clone()).fuse(); + let Self { mut runtime, recvs, metrics } = self; + + let IncomingRequestReceivers { pov_req_receiver, chunk_req_receiver } = recvs; + let mut requester = Requester::new(metrics.clone()).fuse(); + + { + let sender = ctx.sender().clone(); + ctx.spawn( + "pov-receiver", + run_pov_receiver(sender.clone(), pov_req_receiver, metrics.clone()).boxed(), + ) + .map_err(Fatal::SpawnTask)?; + + ctx.spawn( + "chunk-receiver", + run_chunk_receiver(sender, chunk_req_receiver, metrics.clone()).boxed(), + ) + .map_err(Fatal::SpawnTask)?; + } + loop { let action = { let mut subsystem_next = ctx.recv().fuse(); @@ -110,19 +144,13 @@ impl AvailabilityDistributionSubsystem { log_error( requester .get_mut() - .update_fetching_heads(&mut ctx, &mut self.runtime, update) + .update_fetching_heads(&mut ctx, &mut runtime, update) .await, "Error in Requester::update_fetching_heads", )?; }, FromOverseer::Signal(OverseerSignal::BlockFinalized(..)) => {}, FromOverseer::Signal(OverseerSignal::Conclude) => return Ok(()), - FromOverseer::Communication { - msg: AvailabilityDistributionMessage::ChunkFetchingRequest(req), - } => answer_chunk_request_log(&mut ctx, req, &self.metrics).await, - FromOverseer::Communication { - msg: AvailabilityDistributionMessage::PoVFetchingRequest(req), - } => answer_pov_request_log(&mut ctx, req, &self.metrics).await, FromOverseer::Communication { msg: AvailabilityDistributionMessage::FetchPoV { @@ -136,7 +164,7 @@ impl AvailabilityDistributionSubsystem { log_error( pov_requester::fetch_pov( &mut ctx, - &mut self.runtime, + &mut runtime, relay_parent, from_validator, candidate_hash, diff --git a/node/network/availability-distribution/src/pov_requester/mod.rs b/node/network/availability-distribution/src/pov_requester/mod.rs index 216ced7d1eda..1aea7248503f 100644 --- a/node/network/availability-distribution/src/pov_requester/mod.rs +++ b/node/network/availability-distribution/src/pov_requester/mod.rs @@ -19,7 +19,7 @@ use futures::{channel::oneshot, future::BoxFuture, FutureExt}; use polkadot_node_network_protocol::request_response::{ - request::{RequestError, Requests}, + outgoing::{RequestError, Requests}, v1::{PoVFetchingRequest, PoVFetchingResponse}, OutgoingRequest, Recipient, }; diff --git a/node/network/availability-distribution/src/requester/fetch_task/mod.rs b/node/network/availability-distribution/src/requester/fetch_task/mod.rs index a088b52b884b..4800de26d523 100644 --- a/node/network/availability-distribution/src/requester/fetch_task/mod.rs +++ b/node/network/availability-distribution/src/requester/fetch_task/mod.rs @@ -24,7 +24,7 @@ use futures::{ use polkadot_erasure_coding::branch_hash; use polkadot_node_network_protocol::request_response::{ - request::{OutgoingRequest, Recipient, RequestError, Requests}, + outgoing::{OutgoingRequest, Recipient, RequestError, Requests}, v1::{ChunkFetchingRequest, ChunkFetchingResponse}, }; use polkadot_node_primitives::ErasureChunk; diff --git a/node/network/availability-distribution/src/responder.rs b/node/network/availability-distribution/src/responder.rs index e9b779c2ba73..a7b956232574 100644 --- a/node/network/availability-distribution/src/responder.rs +++ b/node/network/availability-distribution/src/responder.rs @@ -20,28 +20,93 @@ use std::sync::Arc; use futures::channel::oneshot; -use polkadot_node_network_protocol::request_response::{request::IncomingRequest, v1}; +use polkadot_node_network_protocol::{ + request_response::{incoming, v1, IncomingRequest, IncomingRequestReceiver}, + UnifiedReputationChange as Rep, +}; use polkadot_node_primitives::{AvailableData, ErasureChunk}; use polkadot_primitives::v1::{CandidateHash, ValidatorIndex}; -use polkadot_subsystem::{jaeger, messages::AvailabilityStoreMessage, SubsystemContext}; +use polkadot_subsystem::{jaeger, messages::AvailabilityStoreMessage, SubsystemSender}; use crate::{ - error::{NonFatal, Result}, + error::{NonFatal, NonFatalResult, Result}, metrics::{Metrics, FAILED, NOT_FOUND, SUCCEEDED}, LOG_TARGET, }; +const COST_INVALID_REQUEST: Rep = Rep::CostMajor("Received message could not be decoded."); + +/// Receiver task to be forked as a separate task to handle PoV requests. +pub async fn run_pov_receiver( + mut sender: Sender, + mut receiver: IncomingRequestReceiver, + metrics: Metrics, +) where + Sender: SubsystemSender, +{ + loop { + match receiver.recv(|| vec![COST_INVALID_REQUEST]).await { + Ok(msg) => { + answer_pov_request_log(&mut sender, msg, &metrics).await; + }, + Err(incoming::Error::Fatal(f)) => { + tracing::debug!( + target: LOG_TARGET, + error = ?f, + "Shutting down POV receiver." + ); + return + }, + Err(incoming::Error::NonFatal(error)) => { + tracing::debug!(target: LOG_TARGET, ?error, "Error decoding incoming PoV request."); + }, + } + } +} + +/// Receiver task to be forked as a separate task to handle chunk requests. +pub async fn run_chunk_receiver( + mut sender: Sender, + mut receiver: IncomingRequestReceiver, + metrics: Metrics, +) where + Sender: SubsystemSender, +{ + loop { + match receiver.recv(|| vec![COST_INVALID_REQUEST]).await { + Ok(msg) => { + answer_chunk_request_log(&mut sender, msg, &metrics).await; + }, + Err(incoming::Error::Fatal(f)) => { + tracing::debug!( + target: LOG_TARGET, + error = ?f, + "Shutting down chunk receiver." + ); + return + }, + Err(incoming::Error::NonFatal(error)) => { + tracing::debug!( + target: LOG_TARGET, + ?error, + "Error decoding incoming chunk request." + ); + }, + } + } +} + /// Variant of `answer_pov_request` that does Prometheus metric and logging on errors. /// /// Any errors of `answer_pov_request` will simply be logged. -pub async fn answer_pov_request_log( - ctx: &mut Context, +pub async fn answer_pov_request_log( + sender: &mut Sender, req: IncomingRequest, metrics: &Metrics, ) where - Context: SubsystemContext, + Sender: SubsystemSender, { - let res = answer_pov_request(ctx, req).await; + let res = answer_pov_request(sender, req).await; match res { Ok(result) => metrics.on_served_pov(if result { SUCCEEDED } else { NOT_FOUND }), Err(err) => { @@ -58,15 +123,15 @@ pub async fn answer_pov_request_log( /// Variant of `answer_chunk_request` that does Prometheus metric and logging on errors. /// /// Any errors of `answer_request` will simply be logged. -pub async fn answer_chunk_request_log( - ctx: &mut Context, +pub async fn answer_chunk_request_log( + sender: &mut Sender, req: IncomingRequest, metrics: &Metrics, ) -> () where - Context: SubsystemContext, + Sender: SubsystemSender, { - let res = answer_chunk_request(ctx, req).await; + let res = answer_chunk_request(sender, req).await; match res { Ok(result) => metrics.on_served_chunk(if result { SUCCEEDED } else { NOT_FOUND }), Err(err) => { @@ -83,16 +148,16 @@ where /// Answer an incoming PoV fetch request by querying the av store. /// /// Returns: `Ok(true)` if chunk was found and served. -pub async fn answer_pov_request( - ctx: &mut Context, +pub async fn answer_pov_request( + sender: &mut Sender, req: IncomingRequest, ) -> Result where - Context: SubsystemContext, + Sender: SubsystemSender, { let _span = jaeger::Span::new(req.payload.candidate_hash, "answer-pov-request"); - let av_data = query_available_data(ctx, req.payload.candidate_hash).await?; + let av_data = query_available_data(sender, req.payload.candidate_hash).await?; let result = av_data.is_some(); @@ -111,18 +176,18 @@ where /// Answer an incoming chunk request by querying the av store. /// /// Returns: `Ok(true)` if chunk was found and served. -pub async fn answer_chunk_request( - ctx: &mut Context, +pub async fn answer_chunk_request( + sender: &mut Sender, req: IncomingRequest, ) -> Result where - Context: SubsystemContext, + Sender: SubsystemSender, { let span = jaeger::Span::new(req.payload.candidate_hash, "answer-chunk-request"); let _child_span = span.child("answer-chunk-request").with_chunk_index(req.payload.index.0); - let chunk = query_chunk(ctx, req.payload.candidate_hash, req.payload.index).await?; + let chunk = query_chunk(sender, req.payload.candidate_hash, req.payload.index).await?; let result = chunk.is_some(); @@ -145,16 +210,19 @@ where } /// Query chunk from the availability store. -async fn query_chunk( - ctx: &mut Context, +async fn query_chunk( + sender: &mut Sender, candidate_hash: CandidateHash, validator_index: ValidatorIndex, -) -> Result> +) -> NonFatalResult> where - Context: SubsystemContext, + Sender: SubsystemSender, { let (tx, rx) = oneshot::channel(); - ctx.send_message(AvailabilityStoreMessage::QueryChunk(candidate_hash, validator_index, tx)) + sender + .send_message( + AvailabilityStoreMessage::QueryChunk(candidate_hash, validator_index, tx).into(), + ) .await; let result = rx.await.map_err(|e| { @@ -171,15 +239,16 @@ where } /// Query PoV from the availability store. -async fn query_available_data( - ctx: &mut Context, +async fn query_available_data( + sender: &mut Sender, candidate_hash: CandidateHash, -) -> Result> +) -> NonFatalResult> where - Context: SubsystemContext, + Sender: SubsystemSender, { let (tx, rx) = oneshot::channel(); - ctx.send_message(AvailabilityStoreMessage::QueryAvailableData(candidate_hash, tx)) + sender + .send_message(AvailabilityStoreMessage::QueryAvailableData(candidate_hash, tx).into()) .await; let result = rx.await.map_err(|e| NonFatal::QueryAvailableDataResponseChannel(e))?; diff --git a/node/network/availability-distribution/src/tests/mod.rs b/node/network/availability-distribution/src/tests/mod.rs index 068b35ce9dc3..b502c947dcff 100644 --- a/node/network/availability-distribution/src/tests/mod.rs +++ b/node/network/availability-distribution/src/tests/mod.rs @@ -18,6 +18,7 @@ use std::collections::HashSet; use futures::{executor, future, Future}; +use polkadot_node_network_protocol::request_response::IncomingRequest; use polkadot_primitives::v1::CoreState; use sp_keystore::SyncCryptoStorePtr; @@ -41,17 +42,21 @@ fn test_harness>( let pool = sp_core::testing::TaskExecutor::new(); let (context, virtual_overseer) = test_helpers::make_subsystem_context(pool.clone()); - let subsystem = AvailabilityDistributionSubsystem::new(keystore, Default::default()); - { - let subsystem = subsystem.run(context); + let (pov_req_receiver, pov_req_cfg) = IncomingRequest::get_config_receiver(); + let (chunk_req_receiver, chunk_req_cfg) = IncomingRequest::get_config_receiver(); + let subsystem = AvailabilityDistributionSubsystem::new( + keystore, + IncomingRequestReceivers { pov_req_receiver, chunk_req_receiver }, + Default::default(), + ); + let subsystem = subsystem.run(context); - let test_fut = test_fx(TestHarness { virtual_overseer, pool }); + let test_fut = test_fx(TestHarness { virtual_overseer, pov_req_cfg, chunk_req_cfg, pool }); - futures::pin_mut!(test_fut); - futures::pin_mut!(subsystem); + futures::pin_mut!(test_fut); + futures::pin_mut!(subsystem); - executor::block_on(future::join(test_fut, subsystem)).1.unwrap(); - } + executor::block_on(future::join(test_fut, subsystem)).1.unwrap(); } /// Simple basic check, whether the subsystem works as expected. diff --git a/node/network/availability-distribution/src/tests/state.rs b/node/network/availability-distribution/src/tests/state.rs index 1a402e9a0403..77a973473b64 100644 --- a/node/network/availability-distribution/src/tests/state.rs +++ b/node/network/availability-distribution/src/tests/state.rs @@ -30,7 +30,7 @@ use futures::{ use futures_timer::Delay; use sc_network as network; -use sc_network::{config as netconfig, IfDisconnected}; +use sc_network::{config as netconfig, config::RequestResponseConfig, IfDisconnected}; use sp_core::{testing::TaskExecutor, traits::SpawnNamed}; use sp_keystore::SyncCryptoStorePtr; @@ -59,6 +59,8 @@ use crate::LOG_TARGET; type VirtualOverseer = test_helpers::TestSubsystemContextHandle; pub struct TestHarness { pub virtual_overseer: VirtualOverseer, + pub pov_req_cfg: RequestResponseConfig, + pub chunk_req_cfg: RequestResponseConfig, pub pool: TaskExecutor, } @@ -152,9 +154,7 @@ impl TestState { /// Run, but fail after some timeout. pub async fn run(self, harness: TestHarness) { // Make sure test won't run forever. - let f = self - .run_inner(harness.pool, harness.virtual_overseer) - .timeout(Duration::from_secs(10)); + let f = self.run_inner(harness).timeout(Duration::from_secs(10)); assert!(f.await.is_some(), "Test ran into timeout"); } @@ -166,7 +166,7 @@ impl TestState { /// /// We try to be as agnostic about details as possible, how the subsystem achieves those goals /// should not be a matter to this test suite. - async fn run_inner(mut self, executor: TaskExecutor, virtual_overseer: VirtualOverseer) { + async fn run_inner(mut self, mut harness: TestHarness) { // We skip genesis here (in reality ActiveLeavesUpdate can also skip a block: let updates = { let mut advanced = self.relay_chain.iter(); @@ -191,12 +191,12 @@ impl TestState { // Test will fail if this does not happen until timeout. let mut remaining_stores = self.valid_chunks.len(); - let TestSubsystemContextHandle { tx, mut rx } = virtual_overseer; + let TestSubsystemContextHandle { tx, mut rx } = harness.virtual_overseer; // Spawning necessary as incoming queue can only hold a single item, we don't want to dead // lock ;-) let update_tx = tx.clone(); - executor.spawn( + harness.pool.spawn( "Sending active leaves updates", async move { for update in updates { @@ -219,16 +219,15 @@ impl TestState { )) => { for req in reqs { // Forward requests: - let in_req = to_incoming_req(&executor, req); - - executor.spawn( - "Request forwarding", - overseer_send( - tx.clone(), - AvailabilityDistributionMessage::ChunkFetchingRequest(in_req), - ) - .boxed(), - ); + let in_req = to_incoming_req(&harness.pool, req); + harness + .chunk_req_cfg + .inbound_queue + .as_mut() + .unwrap() + .send(in_req.into_raw()) + .await + .unwrap(); } }, AllMessages::AvailabilityStore(AvailabilityStoreMessage::QueryChunk( @@ -295,18 +294,6 @@ async fn overseer_signal( tx.send(FromOverseer::Signal(msg)).await.expect("Test subsystem no longer live"); } -async fn overseer_send( - mut tx: SingleItemSink>, - msg: impl Into, -) { - let msg = msg.into(); - tracing::trace!(target: LOG_TARGET, msg = ?msg, "sending message"); - tx.send(FromOverseer::Communication { msg }) - .await - .expect("Test subsystem no longer live"); - tracing::trace!(target: LOG_TARGET, "sent message"); -} - async fn overseer_recv(rx: &mut mpsc::UnboundedReceiver) -> AllMessages { tracing::trace!(target: LOG_TARGET, "waiting for message ..."); rx.next().await.expect("Test subsystem no longer live") diff --git a/node/network/availability-recovery/src/lib.rs b/node/network/availability-recovery/src/lib.rs index 6f2dee97288c..ea4d484a41f1 100644 --- a/node/network/availability-recovery/src/lib.rs +++ b/node/network/availability-recovery/src/lib.rs @@ -26,6 +26,7 @@ use std::{ use futures::{ channel::oneshot, future::{BoxFuture, FutureExt, RemoteHandle}, + pin_mut, prelude::*, stream::FuturesUnordered, task::{Context, Poll}, @@ -36,9 +37,10 @@ use rand::seq::SliceRandom; use polkadot_erasure_coding::{branch_hash, branches, obtain_chunks_v1, recovery_threshold}; use polkadot_node_network_protocol::{ request_response::{ - self as req_res, request::RequestError, OutgoingRequest, Recipient, Requests, + self as req_res, incoming, outgoing::RequestError, v1 as request_v1, + IncomingRequestReceiver, OutgoingRequest, Recipient, Requests, }, - IfDisconnected, + IfDisconnected, UnifiedReputationChange as Rep, }; use polkadot_node_primitives::{AvailableData, ErasureChunk}; use polkadot_node_subsystem_util::request_session_info; @@ -68,9 +70,13 @@ const N_PARALLEL: usize = 50; // Size of the LRU cache where we keep recovered data. const LRU_SIZE: usize = 16; +const COST_INVALID_REQUEST: Rep = Rep::CostMajor("Peer sent unparsable request"); + /// The Availability Recovery Subsystem. pub struct AvailabilityRecoverySubsystem { fast_path: bool, + /// Receiver for available data requests. + req_receiver: IncomingRequestReceiver, } struct RequestFromBackersPhase { @@ -750,13 +756,17 @@ where impl AvailabilityRecoverySubsystem { /// Create a new instance of `AvailabilityRecoverySubsystem` which starts with a fast path to request data from backers. - pub fn with_fast_path() -> Self { - Self { fast_path: true } + pub fn with_fast_path( + req_receiver: IncomingRequestReceiver, + ) -> Self { + Self { fast_path: true, req_receiver } } /// Create a new instance of `AvailabilityRecoverySubsystem` which requests only chunks - pub fn with_chunks_only() -> Self { - Self { fast_path: false } + pub fn with_chunks_only( + req_receiver: IncomingRequestReceiver, + ) -> Self { + Self { fast_path: false, req_receiver } } async fn run(self, mut ctx: Context) -> SubsystemResult<()> @@ -765,8 +775,11 @@ impl AvailabilityRecoverySubsystem { Context: overseer::SubsystemContext, { let mut state = State::default(); + let Self { fast_path, mut req_receiver } = self; loop { + let recv_req = req_receiver.recv(|| vec![COST_INVALID_REQUEST]).fuse(); + pin_mut!(recv_req); futures::select! { v = ctx.recv().fuse() => { match v? { @@ -789,7 +802,7 @@ impl AvailabilityRecoverySubsystem { &mut ctx, receipt, session_index, - maybe_backing_group.filter(|_| self.fast_path), + maybe_backing_group.filter(|_| fast_path), response_sender, ).await { tracing::warn!( @@ -799,24 +812,37 @@ impl AvailabilityRecoverySubsystem { ); } } - AvailabilityRecoveryMessage::AvailableDataFetchingRequest(req) => { - match query_full_data(&mut ctx, req.payload.candidate_hash).await { - Ok(res) => { - let _ = req.send_response(res.into()); - } - Err(e) => { - tracing::debug!( - target: LOG_TARGET, - err = ?e, - "Failed to query available data.", - ); - - let _ = req.send_response(None.into()); - } - } + } + } + } + } + in_req = recv_req => { + match in_req { + Ok(req) => { + match query_full_data(&mut ctx, req.payload.candidate_hash).await { + Ok(res) => { + let _ = req.send_response(res.into()); + } + Err(e) => { + tracing::debug!( + target: LOG_TARGET, + err = ?e, + "Failed to query available data.", + ); + + let _ = req.send_response(None.into()); } } } + Err(incoming::Error::Fatal(f)) => return Err(SubsystemError::with_origin("availability-recovery", f)), + Err(incoming::Error::NonFatal(err)) => { + tracing::debug!( + target: LOG_TARGET, + ?err, + "Decoding incoming request failed" + ); + continue + } } } output = state.interactions.select_next_some() => { diff --git a/node/network/availability-recovery/src/tests.rs b/node/network/availability-recovery/src/tests.rs index e59cd8588939..fcd2575026e1 100644 --- a/node/network/availability-recovery/src/tests.rs +++ b/node/network/availability-recovery/src/tests.rs @@ -21,9 +21,12 @@ use futures::{executor, future}; use futures_timer::Delay; use parity_scale_codec::Encode; +use polkadot_node_network_protocol::request_response::IncomingRequest; use super::*; +use sc_network::config::RequestResponseConfig; + use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks}; use polkadot_node_primitives::{BlockData, PoV}; use polkadot_node_subsystem_util::TimeoutExt; @@ -37,8 +40,8 @@ use polkadot_subsystem_testhelpers as test_helpers; type VirtualOverseer = test_helpers::TestSubsystemContextHandle; -fn test_harness_fast_path>( - test: impl FnOnce(VirtualOverseer) -> T, +fn test_harness_fast_path>( + test: impl FnOnce(VirtualOverseer, RequestResponseConfig) -> T, ) { let _ = env_logger::builder() .is_test(true) @@ -49,27 +52,29 @@ fn test_harness_fast_path>( let (context, virtual_overseer) = test_helpers::make_subsystem_context(pool.clone()); - let subsystem = AvailabilityRecoverySubsystem::with_fast_path(); - let subsystem = subsystem.run(context); + let (collation_req_receiver, req_cfg) = IncomingRequest::get_config_receiver(); + let subsystem = AvailabilityRecoverySubsystem::with_fast_path(collation_req_receiver); + let subsystem = async { + subsystem.run(context).await.unwrap(); + }; - let test_fut = test(virtual_overseer); + let test_fut = test(virtual_overseer, req_cfg); futures::pin_mut!(test_fut); futures::pin_mut!(subsystem); executor::block_on(future::join( async move { - let mut overseer = test_fut.await; + let (mut overseer, _req_cfg) = test_fut.await; overseer_signal(&mut overseer, OverseerSignal::Conclude).await; }, subsystem, )) .1 - .unwrap(); } -fn test_harness_chunks_only>( - test: impl FnOnce(VirtualOverseer) -> T, +fn test_harness_chunks_only>( + test: impl FnOnce(VirtualOverseer, RequestResponseConfig) -> T, ) { let _ = env_logger::builder() .is_test(true) @@ -80,17 +85,18 @@ fn test_harness_chunks_only>( let (context, virtual_overseer) = test_helpers::make_subsystem_context(pool.clone()); - let subsystem = AvailabilityRecoverySubsystem::with_chunks_only(); + let (collation_req_receiver, req_cfg) = IncomingRequest::get_config_receiver(); + let subsystem = AvailabilityRecoverySubsystem::with_chunks_only(collation_req_receiver); let subsystem = subsystem.run(context); - let test_fut = test(virtual_overseer); + let test_fut = test(virtual_overseer, req_cfg); futures::pin_mut!(test_fut); futures::pin_mut!(subsystem); executor::block_on(future::join( async move { - let mut overseer = test_fut.await; + let (mut overseer, _req_cfg) = test_fut.await; overseer_signal(&mut overseer, OverseerSignal::Conclude).await; }, subsystem, @@ -432,7 +438,7 @@ impl Default for TestState { fn availability_is_recovered_from_chunks_if_no_group_provided() { let test_state = TestState::default(); - test_harness_fast_path(|mut virtual_overseer| async move { + test_harness_fast_path(|mut virtual_overseer, req_cfg| async move { overseer_signal( &mut virtual_overseer, OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf { @@ -510,7 +516,7 @@ fn availability_is_recovered_from_chunks_if_no_group_provided() { // A request times out with `Unavailable` error. assert_eq!(rx.await.unwrap().unwrap_err(), RecoveryError::Unavailable); - virtual_overseer + (virtual_overseer, req_cfg) }); } @@ -518,7 +524,7 @@ fn availability_is_recovered_from_chunks_if_no_group_provided() { fn availability_is_recovered_from_chunks_even_if_backing_group_supplied_if_chunks_only() { let test_state = TestState::default(); - test_harness_chunks_only(|mut virtual_overseer| async move { + test_harness_chunks_only(|mut virtual_overseer, req_cfg| async move { overseer_signal( &mut virtual_overseer, OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf { @@ -596,7 +602,7 @@ fn availability_is_recovered_from_chunks_even_if_backing_group_supplied_if_chunk // A request times out with `Unavailable` error. assert_eq!(rx.await.unwrap().unwrap_err(), RecoveryError::Unavailable); - virtual_overseer + (virtual_overseer, req_cfg) }); } @@ -604,7 +610,7 @@ fn availability_is_recovered_from_chunks_even_if_backing_group_supplied_if_chunk fn bad_merkle_path_leads_to_recovery_error() { let mut test_state = TestState::default(); - test_harness_fast_path(|mut virtual_overseer| async move { + test_harness_fast_path(|mut virtual_overseer, req_cfg| async move { overseer_signal( &mut virtual_overseer, OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf { @@ -654,7 +660,7 @@ fn bad_merkle_path_leads_to_recovery_error() { // A request times out with `Unavailable` error. assert_eq!(rx.await.unwrap().unwrap_err(), RecoveryError::Unavailable); - virtual_overseer + (virtual_overseer, req_cfg) }); } @@ -662,7 +668,7 @@ fn bad_merkle_path_leads_to_recovery_error() { fn wrong_chunk_index_leads_to_recovery_error() { let mut test_state = TestState::default(); - test_harness_fast_path(|mut virtual_overseer| async move { + test_harness_fast_path(|mut virtual_overseer, req_cfg| async move { overseer_signal( &mut virtual_overseer, OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf { @@ -711,7 +717,7 @@ fn wrong_chunk_index_leads_to_recovery_error() { // A request times out with `Unavailable` error as there are no good peers. assert_eq!(rx.await.unwrap().unwrap_err(), RecoveryError::Unavailable); - virtual_overseer + (virtual_overseer, req_cfg) }); } @@ -719,7 +725,7 @@ fn wrong_chunk_index_leads_to_recovery_error() { fn invalid_erasure_coding_leads_to_invalid_error() { let mut test_state = TestState::default(); - test_harness_fast_path(|mut virtual_overseer| async move { + test_harness_fast_path(|mut virtual_overseer, req_cfg| async move { let pov = PoV { block_data: BlockData(vec![69; 64]) }; let (bad_chunks, bad_erasure_root) = derive_erasure_chunks_with_proofs_and_root( @@ -776,7 +782,7 @@ fn invalid_erasure_coding_leads_to_invalid_error() { // f+1 'valid' chunks can't produce correct data. assert_eq!(rx.await.unwrap().unwrap_err(), RecoveryError::Invalid); - virtual_overseer + (virtual_overseer, req_cfg) }); } @@ -784,7 +790,7 @@ fn invalid_erasure_coding_leads_to_invalid_error() { fn fast_path_backing_group_recovers() { let test_state = TestState::default(); - test_harness_fast_path(|mut virtual_overseer| async move { + test_harness_fast_path(|mut virtual_overseer, req_cfg| async move { overseer_signal( &mut virtual_overseer, OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf { @@ -826,7 +832,7 @@ fn fast_path_backing_group_recovers() { // Recovered data should match the original one. assert_eq!(rx.await.unwrap().unwrap(), test_state.available_data); - virtual_overseer + (virtual_overseer, req_cfg) }); } @@ -834,7 +840,7 @@ fn fast_path_backing_group_recovers() { fn no_answers_in_fast_path_causes_chunk_requests() { let test_state = TestState::default(); - test_harness_fast_path(|mut virtual_overseer| async move { + test_harness_fast_path(|mut virtual_overseer, req_cfg| async move { overseer_signal( &mut virtual_overseer, OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf { @@ -888,7 +894,7 @@ fn no_answers_in_fast_path_causes_chunk_requests() { // Recovered data should match the original one. assert_eq!(rx.await.unwrap().unwrap(), test_state.available_data); - virtual_overseer + (virtual_overseer, req_cfg) }); } @@ -896,7 +902,7 @@ fn no_answers_in_fast_path_causes_chunk_requests() { fn task_canceled_when_receivers_dropped() { let test_state = TestState::default(); - test_harness_chunks_only(|mut virtual_overseer| async move { + test_harness_chunks_only(|mut virtual_overseer, req_cfg| async move { overseer_signal( &mut virtual_overseer, OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf { @@ -925,7 +931,7 @@ fn task_canceled_when_receivers_dropped() { for _ in 0..test_state.validators.len() { match virtual_overseer.recv().timeout(TIMEOUT).await { - None => return virtual_overseer, + None => return (virtual_overseer, req_cfg), Some(_) => continue, } } @@ -938,7 +944,7 @@ fn task_canceled_when_receivers_dropped() { fn chunks_retry_until_all_nodes_respond() { let test_state = TestState::default(); - test_harness_chunks_only(|mut virtual_overseer| async move { + test_harness_chunks_only(|mut virtual_overseer, req_cfg| async move { overseer_signal( &mut virtual_overseer, OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf { @@ -992,7 +998,7 @@ fn chunks_retry_until_all_nodes_respond() { // Recovered data should match the original one. assert_eq!(rx.await.unwrap().unwrap_err(), RecoveryError::Unavailable); - virtual_overseer + (virtual_overseer, req_cfg) }); } @@ -1000,7 +1006,7 @@ fn chunks_retry_until_all_nodes_respond() { fn returns_early_if_we_have_the_data() { let test_state = TestState::default(); - test_harness_chunks_only(|mut virtual_overseer| async move { + test_harness_chunks_only(|mut virtual_overseer, req_cfg| async move { overseer_signal( &mut virtual_overseer, OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf { @@ -1029,7 +1035,7 @@ fn returns_early_if_we_have_the_data() { test_state.respond_to_available_data_query(&mut virtual_overseer, true).await; assert_eq!(rx.await.unwrap().unwrap(), test_state.available_data); - virtual_overseer + (virtual_overseer, req_cfg) }); } @@ -1037,7 +1043,7 @@ fn returns_early_if_we_have_the_data() { fn does_not_query_local_validator() { let test_state = TestState::default(); - test_harness_chunks_only(|mut virtual_overseer| async move { + test_harness_chunks_only(|mut virtual_overseer, req_cfg| async move { overseer_signal( &mut virtual_overseer, OverseerSignal::ActiveLeaves(ActiveLeavesUpdate::start_work(ActivatedLeaf { @@ -1088,6 +1094,6 @@ fn does_not_query_local_validator() { .await; assert_eq!(rx.await.unwrap().unwrap(), test_state.available_data); - virtual_overseer + (virtual_overseer, req_cfg) }); } diff --git a/node/network/bridge/Cargo.toml b/node/network/bridge/Cargo.toml index ee41be92e990..098b9aefe83e 100644 --- a/node/network/bridge/Cargo.toml +++ b/node/network/bridge/Cargo.toml @@ -16,7 +16,6 @@ polkadot-subsystem = { package = "polkadot-node-subsystem", path = "../../subsys polkadot-overseer = { path = "../../overseer" } polkadot-node-network-protocol = { path = "../protocol" } polkadot-node-subsystem-util = { path = "../../subsystem-util"} -strum = "0.20.0" parking_lot = "0.11.1" [dev-dependencies] diff --git a/node/network/bridge/src/lib.rs b/node/network/bridge/src/lib.rs index a02b85076a78..608a790d2931 100644 --- a/node/network/bridge/src/lib.rs +++ b/node/network/bridge/src/lib.rs @@ -22,7 +22,6 @@ use futures::{prelude::*, stream::BoxStream}; use parity_scale_codec::{Decode, Encode}; use parking_lot::Mutex; -use polkadot_subsystem::messages::DisputeDistributionMessage; use sc_network::Event as NetworkEvent; use sp_consensus::SyncOracle; @@ -35,10 +34,7 @@ use polkadot_overseer::gen::{OverseerError, Subsystem}; use polkadot_primitives::v1::{BlockNumber, Hash}; use polkadot_subsystem::{ errors::{SubsystemError, SubsystemResult}, - messages::{ - AllMessages, CollatorProtocolMessage, NetworkBridgeEvent, NetworkBridgeMessage, - StatementDistributionMessage, - }, + messages::{AllMessages, CollatorProtocolMessage, NetworkBridgeEvent, NetworkBridgeMessage}, overseer, ActivatedLeaf, ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, SubsystemContext, SubsystemSender, }; @@ -61,10 +57,6 @@ mod validator_discovery; mod network; use network::{send_message, Network}; -/// Request multiplexer for combining the multiple request sources into a single `Stream` of `AllMessages`. -mod multiplexer; -pub use multiplexer::RequestMultiplexer; - use crate::network::get_peer_id_by_authority_id; #[cfg(test)] @@ -276,7 +268,6 @@ pub struct NetworkBridge { /// `Network` trait implementing type. network_service: N, authority_discovery_service: AD, - request_multiplexer: RequestMultiplexer, sync_oracle: Box, metrics: Metrics, } @@ -289,17 +280,10 @@ impl NetworkBridge { pub fn new( network_service: N, authority_discovery_service: AD, - request_multiplexer: RequestMultiplexer, sync_oracle: Box, metrics: Metrics, ) -> Self { - NetworkBridge { - network_service, - authority_discovery_service, - request_multiplexer, - sync_oracle, - metrics, - } + NetworkBridge { network_service, authority_discovery_service, sync_oracle, metrics } } } @@ -335,8 +319,6 @@ enum UnexpectedAbort { SubsystemError(SubsystemError), /// The stream of incoming events concluded. EventStreamConcluded, - /// The stream of incoming requests concluded. - RequestStreamConcluded, } impl From for UnexpectedAbort { @@ -610,247 +592,226 @@ async fn handle_network_messages( mut network_service: impl Network, network_stream: BoxStream<'static, NetworkEvent>, mut authority_discovery_service: AD, - mut request_multiplexer: RequestMultiplexer, metrics: Metrics, shared: Shared, ) -> Result<(), UnexpectedAbort> { let mut network_stream = network_stream.fuse(); loop { - futures::select! { - network_event = network_stream.next() => match network_event { - None => return Err(UnexpectedAbort::EventStreamConcluded), - Some(NetworkEvent::Dht(_)) - | Some(NetworkEvent::SyncConnected { .. }) - | Some(NetworkEvent::SyncDisconnected { .. }) => {} - Some(NetworkEvent::NotificationStreamOpened { remote: peer, protocol, role, .. }) => { - let role = ObservedRole::from(role); - let peer_set = match PeerSet::try_from_protocol_name(&protocol) { - None => continue, - Some(peer_set) => peer_set, + match network_stream.next().await { + None => return Err(UnexpectedAbort::EventStreamConcluded), + Some(NetworkEvent::Dht(_)) | + Some(NetworkEvent::SyncConnected { .. }) | + Some(NetworkEvent::SyncDisconnected { .. }) => {}, + Some(NetworkEvent::NotificationStreamOpened { + remote: peer, protocol, role, .. + }) => { + let role = ObservedRole::from(role); + let peer_set = match PeerSet::try_from_protocol_name(&protocol) { + None => continue, + Some(peer_set) => peer_set, + }; + + tracing::debug!( + target: LOG_TARGET, + action = "PeerConnected", + peer_set = ?peer_set, + peer = ?peer, + role = ?role + ); + + let local_view = { + let mut shared = shared.0.lock(); + let peer_map = match peer_set { + PeerSet::Validation => &mut shared.validation_peers, + PeerSet::Collation => &mut shared.collation_peers, }; - tracing::debug!( - target: LOG_TARGET, - action = "PeerConnected", - peer_set = ?peer_set, - peer = ?peer, - role = ?role - ); - - let local_view = { - let mut shared = shared.0.lock(); - let peer_map = match peer_set { - PeerSet::Validation => &mut shared.validation_peers, - PeerSet::Collation => &mut shared.collation_peers, - }; - - match peer_map.entry(peer.clone()) { - hash_map::Entry::Occupied(_) => continue, - hash_map::Entry::Vacant(vacant) => { - vacant.insert(PeerData { view: View::default() }); - } - } - - metrics.on_peer_connected(peer_set); - metrics.note_peer_count(peer_set, peer_map.len()); + match peer_map.entry(peer.clone()) { + hash_map::Entry::Occupied(_) => continue, + hash_map::Entry::Vacant(vacant) => { + vacant.insert(PeerData { view: View::default() }); + }, + } - shared.local_view.clone().unwrap_or(View::default()) - }; + metrics.on_peer_connected(peer_set); + metrics.note_peer_count(peer_set, peer_map.len()); - let maybe_authority = - authority_discovery_service - .get_authority_id_by_peer_id(peer).await; + shared.local_view.clone().unwrap_or(View::default()) + }; - match peer_set { - PeerSet::Validation => { - dispatch_validation_events_to_all( - vec![ - NetworkBridgeEvent::PeerConnected(peer.clone(), role, maybe_authority), - NetworkBridgeEvent::PeerViewChange( - peer.clone(), - View::default(), - ), - ], - &mut sender, - ).await; + let maybe_authority = + authority_discovery_service.get_authority_id_by_peer_id(peer).await; - send_message( - &mut network_service, - vec![peer], - PeerSet::Validation, - WireMessage::::ViewUpdate( - local_view, + match peer_set { + PeerSet::Validation => { + dispatch_validation_events_to_all( + vec![ + NetworkBridgeEvent::PeerConnected( + peer.clone(), + role, + maybe_authority, ), - &metrics, - ); - } - PeerSet::Collation => { - dispatch_collation_events_to_all( - vec![ - NetworkBridgeEvent::PeerConnected(peer.clone(), role, maybe_authority), - NetworkBridgeEvent::PeerViewChange( - peer.clone(), - View::default(), - ), - ], - &mut sender, - ).await; + NetworkBridgeEvent::PeerViewChange(peer.clone(), View::default()), + ], + &mut sender, + ) + .await; - send_message( - &mut network_service, - vec![peer], - PeerSet::Collation, - WireMessage::::ViewUpdate( - local_view, + send_message( + &mut network_service, + vec![peer], + PeerSet::Validation, + WireMessage::::ViewUpdate(local_view), + &metrics, + ); + }, + PeerSet::Collation => { + dispatch_collation_events_to_all( + vec![ + NetworkBridgeEvent::PeerConnected( + peer.clone(), + role, + maybe_authority, ), - &metrics, - ); - } - } + NetworkBridgeEvent::PeerViewChange(peer.clone(), View::default()), + ], + &mut sender, + ) + .await; + + send_message( + &mut network_service, + vec![peer], + PeerSet::Collation, + WireMessage::::ViewUpdate(local_view), + &metrics, + ); + }, } - Some(NetworkEvent::NotificationStreamClosed { remote: peer, protocol }) => { - let peer_set = match PeerSet::try_from_protocol_name(&protocol) { - None => continue, - Some(peer_set) => peer_set, + }, + Some(NetworkEvent::NotificationStreamClosed { remote: peer, protocol }) => { + let peer_set = match PeerSet::try_from_protocol_name(&protocol) { + None => continue, + Some(peer_set) => peer_set, + }; + + tracing::debug!( + target: LOG_TARGET, + action = "PeerDisconnected", + peer_set = ?peer_set, + peer = ?peer + ); + + let was_connected = { + let mut shared = shared.0.lock(); + let peer_map = match peer_set { + PeerSet::Validation => &mut shared.validation_peers, + PeerSet::Collation => &mut shared.collation_peers, }; - tracing::debug!( - target: LOG_TARGET, - action = "PeerDisconnected", - peer_set = ?peer_set, - peer = ?peer - ); - - let was_connected = { - let mut shared = shared.0.lock(); - let peer_map = match peer_set { - PeerSet::Validation => &mut shared.validation_peers, - PeerSet::Collation => &mut shared.collation_peers, - }; - - let w = peer_map.remove(&peer).is_some(); + let w = peer_map.remove(&peer).is_some(); - metrics.on_peer_disconnected(peer_set); - metrics.note_peer_count(peer_set, peer_map.len()); + metrics.on_peer_disconnected(peer_set); + metrics.note_peer_count(peer_set, peer_map.len()); - w - }; + w + }; - if was_connected { - match peer_set { - PeerSet::Validation => dispatch_validation_event_to_all( + if was_connected { + match peer_set { + PeerSet::Validation => + dispatch_validation_event_to_all( NetworkBridgeEvent::PeerDisconnected(peer), &mut sender, - ).await, - PeerSet::Collation => dispatch_collation_event_to_all( + ) + .await, + PeerSet::Collation => + dispatch_collation_event_to_all( NetworkBridgeEvent::PeerDisconnected(peer), &mut sender, - ).await, - } + ) + .await, } } - Some(NetworkEvent::NotificationsReceived { remote, messages }) => { - let v_messages: Result, _> = messages - .iter() - .filter(|(protocol, _)| { - protocol == &PeerSet::Validation.into_protocol_name() - }) - .map(|(_, msg_bytes)| { - WireMessage::decode(&mut msg_bytes.as_ref()) - .map(|m| (m, msg_bytes.len())) - }) - .collect(); - - let v_messages = match v_messages { - Err(_) => { - tracing::debug!( - target: LOG_TARGET, - action = "ReportPeer" - ); - - network_service.report_peer(remote, MALFORMED_MESSAGE_COST); - continue; - } - Ok(v) => v, - }; - - let c_messages: Result, _> = messages - .iter() - .filter(|(protocol, _)| { - protocol == &PeerSet::Collation.into_protocol_name() - }) - .map(|(_, msg_bytes)| { - WireMessage::decode(&mut msg_bytes.as_ref()) - .map(|m| (m, msg_bytes.len())) - }) - .collect(); - - match c_messages { - Err(_) => { - tracing::debug!( + }, + Some(NetworkEvent::NotificationsReceived { remote, messages }) => { + let v_messages: Result, _> = messages + .iter() + .filter(|(protocol, _)| protocol == &PeerSet::Validation.into_protocol_name()) + .map(|(_, msg_bytes)| { + WireMessage::decode(&mut msg_bytes.as_ref()).map(|m| (m, msg_bytes.len())) + }) + .collect(); + + let v_messages = match v_messages { + Err(_) => { + tracing::debug!(target: LOG_TARGET, action = "ReportPeer"); + + network_service.report_peer(remote, MALFORMED_MESSAGE_COST); + continue + }, + Ok(v) => v, + }; + + let c_messages: Result, _> = messages + .iter() + .filter(|(protocol, _)| protocol == &PeerSet::Collation.into_protocol_name()) + .map(|(_, msg_bytes)| { + WireMessage::decode(&mut msg_bytes.as_ref()).map(|m| (m, msg_bytes.len())) + }) + .collect(); + + match c_messages { + Err(_) => { + tracing::debug!(target: LOG_TARGET, action = "ReportPeer"); + + network_service.report_peer(remote, MALFORMED_MESSAGE_COST); + continue + }, + Ok(c_messages) => + if v_messages.is_empty() && c_messages.is_empty() { + continue + } else { + tracing::trace!( target: LOG_TARGET, - action = "ReportPeer" + action = "PeerMessages", + peer = ?remote, + num_validation_messages = %v_messages.len(), + num_collation_messages = %c_messages.len() ); - network_service.report_peer(remote, MALFORMED_MESSAGE_COST); - continue; - } - Ok(c_messages) => { - if v_messages.is_empty() && c_messages.is_empty() { - continue; - } else { - tracing::trace!( - target: LOG_TARGET, - action = "PeerMessages", - peer = ?remote, - num_validation_messages = %v_messages.len(), - num_collation_messages = %c_messages.len() + if !v_messages.is_empty() { + let (events, reports) = handle_peer_messages( + remote.clone(), + PeerSet::Validation, + &mut shared.0.lock().validation_peers, + v_messages, + &metrics, ); - if !v_messages.is_empty() { - let (events, reports) = handle_peer_messages( - remote.clone(), - PeerSet::Validation, - &mut shared.0.lock().validation_peers, - v_messages, - &metrics, - ); - - for report in reports { - network_service.report_peer(remote.clone(), report); - } - - dispatch_validation_events_to_all(events, &mut sender).await; + for report in reports { + network_service.report_peer(remote.clone(), report); } - if !c_messages.is_empty() { - let (events, reports) = handle_peer_messages( - remote.clone(), - PeerSet::Collation, - &mut shared.0.lock().collation_peers, - c_messages, - &metrics, - ); - - for report in reports { - network_service.report_peer(remote.clone(), report); - } + dispatch_validation_events_to_all(events, &mut sender).await; + } + if !c_messages.is_empty() { + let (events, reports) = handle_peer_messages( + remote.clone(), + PeerSet::Collation, + &mut shared.0.lock().collation_peers, + c_messages, + &metrics, + ); - dispatch_collation_events_to_all(events, &mut sender).await; + for report in reports { + network_service.report_peer(remote.clone(), report); } + + dispatch_collation_events_to_all(events, &mut sender).await; } - } - } - } - }, - req_res_event = request_multiplexer.next() => match req_res_event { - None => return Err(UnexpectedAbort::RequestStreamConcluded), - Some(Err(err)) => { - network_service.report_peer(err.peer, MALFORMED_MESSAGE_COST); - } - Some(Ok(msg)) => { - sender.send_message(msg).await; + }, } }, } @@ -881,28 +842,14 @@ where { let shared = Shared::default(); - let NetworkBridge { - network_service, - mut request_multiplexer, - authority_discovery_service, - metrics, - sync_oracle, - } = bridge; - - let statement_receiver = request_multiplexer - .get_statement_fetching() - .expect("Gets initialized, must be `Some` on startup. qed."); - - let dispute_receiver = request_multiplexer - .get_dispute_sending() - .expect("Gets initialized, must be `Some` on startup. qed."); + let NetworkBridge { network_service, authority_discovery_service, metrics, sync_oracle } = + bridge; let (remote, network_event_handler) = handle_network_messages( ctx.sender().clone(), network_service.clone(), network_stream, authority_discovery_service.clone(), - request_multiplexer, metrics.clone(), shared.clone(), ) @@ -910,11 +857,6 @@ where ctx.spawn("network-bridge-network-worker", Box::pin(remote))?; - ctx.send_message(DisputeDistributionMessage::DisputeSendingReceiver(dispute_receiver)) - .await; - ctx.send_message(StatementDistributionMessage::StatementFetchingReceiver(statement_receiver)) - .await; - let subsystem_event_handler = handle_subsystem_messages( ctx, network_service, @@ -951,13 +893,6 @@ where ); Err(SubsystemError::Context("Incoming network event stream concluded.".to_string())) }, - Err(UnexpectedAbort::RequestStreamConcluded) => { - tracing::info!( - target: LOG_TARGET, - "Shutting down Network Bridge: underlying request stream concluded" - ); - Err(SubsystemError::Context("Incoming network request stream concluded".to_string())) - }, } } diff --git a/node/network/bridge/src/multiplexer.rs b/node/network/bridge/src/multiplexer.rs deleted file mode 100644 index 0d8c8b63595a..000000000000 --- a/node/network/bridge/src/multiplexer.rs +++ /dev/null @@ -1,221 +0,0 @@ -// Copyright 2021 Parity Technologies (UK) Ltd. -// This file is part of Polkadot. - -// Polkadot is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. - -// Polkadot is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. - -// You should have received a copy of the GNU General Public License -// along with Polkadot. If not, see . - -use std::{pin::Pin, unreachable}; - -use futures::{ - channel::mpsc, - stream::{FusedStream, Stream}, - task::{Context, Poll}, -}; -use strum::IntoEnumIterator; - -use parity_scale_codec::{Decode, Error as DecodingError}; - -use sc_network::{config as network, PeerId}; - -use polkadot_node_network_protocol::request_response::{ - request::IncomingRequest, v1, Protocol, RequestResponseConfig, -}; -use polkadot_overseer::AllMessages; - -/// Multiplex incoming network requests. -/// -/// This multiplexer consumes all request streams and makes them a `Stream` of a single message -/// type, useful for the network bridge to send them via the `Overseer` to other subsystems. -/// -/// The resulting stream will end once any of its input ends. -/// -// TODO: Get rid of this: -pub struct RequestMultiplexer { - receivers: Vec<(Protocol, mpsc::Receiver)>, - statement_fetching: Option>, - dispute_sending: Option>, - next_poll: usize, -} - -/// Multiplexing can fail in case of invalid messages. -#[derive(Debug, PartialEq, Eq)] -pub struct RequestMultiplexError { - /// The peer that sent the invalid message. - pub peer: PeerId, - /// The error that occurred. - pub error: DecodingError, -} - -impl RequestMultiplexer { - /// Create a new `RequestMultiplexer`. - /// - /// This function uses `Protocol::get_config` for each available protocol and creates a - /// `RequestMultiplexer` from it. The returned `RequestResponseConfig`s must be passed to the - /// network implementation. - pub fn new() -> (Self, Vec) { - let (mut receivers, cfgs): (Vec<_>, Vec<_>) = Protocol::iter() - .map(|p| { - let (rx, cfg) = p.get_config(); - ((p, rx), cfg) - }) - .unzip(); - - // Ok this code is ugly as hell, it is also a hack, see https://github.com/paritytech/polkadot/issues/2842. - // But it works and is executed on startup so, if anything is wrong here it will be noticed immediately. - let index = receivers - .iter() - .enumerate() - .find_map( - |(i, (p, _))| if let Protocol::StatementFetching = p { Some(i) } else { None }, - ) - .expect("Statement fetching must be registered. qed."); - let statement_fetching = Some(receivers.remove(index).1); - - let index = receivers - .iter() - .enumerate() - .find_map(|(i, (p, _))| if let Protocol::DisputeSending = p { Some(i) } else { None }) - .expect("Dispute sending must be registered. qed."); - let dispute_sending = Some(receivers.remove(index).1); - - (Self { receivers, statement_fetching, dispute_sending, next_poll: 0 }, cfgs) - } - - /// Get the receiver for handling statement fetching requests. - /// - /// This function will only return `Some` once. - pub fn get_statement_fetching(&mut self) -> Option> { - std::mem::take(&mut self.statement_fetching) - } - - /// Get the receiver for handling dispute sending requests. - /// - /// This function will only return `Some` once. - pub fn get_dispute_sending(&mut self) -> Option> { - std::mem::take(&mut self.dispute_sending) - } -} - -impl Stream for RequestMultiplexer { - type Item = Result; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let len = self.receivers.len(); - let mut count = len; - let mut i = self.next_poll; - let mut result = Poll::Ready(None); - // Poll streams in round robin fashion: - while count > 0 { - // % safe, because count initialized to len, loop would not be entered if 0, also - // length of receivers is fixed. - let (p, rx): &mut (_, _) = &mut self.receivers[i % len]; - // Avoid panic: - if rx.is_terminated() { - // Early return, we don't want to update next_poll. - return Poll::Ready(None) - } - i += 1; - count -= 1; - match Pin::new(rx).poll_next(cx) { - Poll::Pending => result = Poll::Pending, - // We are done, once a single receiver is done. - Poll::Ready(None) => return Poll::Ready(None), - Poll::Ready(Some(v)) => { - result = Poll::Ready(Some(multiplex_single(*p, v))); - break - }, - } - } - self.next_poll = i; - result - } -} - -impl FusedStream for RequestMultiplexer { - fn is_terminated(&self) -> bool { - let len = self.receivers.len(); - if len == 0 { - return true - } - let (_, rx) = &self.receivers[self.next_poll % len]; - rx.is_terminated() - } -} - -/// Convert a single raw incoming request into a `MultiplexMessage`. -fn multiplex_single( - p: Protocol, - network::IncomingRequest { payload, peer, pending_response }: network::IncomingRequest, -) -> Result { - let r = match p { - Protocol::ChunkFetching => AllMessages::from(IncomingRequest::new( - peer, - decode_with_peer::(peer, payload)?, - pending_response, - )), - Protocol::CollationFetching => AllMessages::from(IncomingRequest::new( - peer, - decode_with_peer::(peer, payload)?, - pending_response, - )), - Protocol::PoVFetching => AllMessages::from(IncomingRequest::new( - peer, - decode_with_peer::(peer, payload)?, - pending_response, - )), - Protocol::AvailableDataFetching => AllMessages::from(IncomingRequest::new( - peer, - decode_with_peer::(peer, payload)?, - pending_response, - )), - Protocol::StatementFetching => { - unreachable!("Statement fetching requests are handled directly. qed."); - }, - Protocol::DisputeSending => { - unreachable!("Dispute sending request are handled directly. qed."); - }, - }; - Ok(r) -} - -fn decode_with_peer( - peer: PeerId, - payload: Vec, -) -> Result { - Req::decode(&mut payload.as_ref()).map_err(|error| RequestMultiplexError { peer, error }) -} - -#[cfg(test)] -mod tests { - use futures::{prelude::*, stream::FusedStream}; - - use super::RequestMultiplexer; - #[test] - fn check_exhaustion_safety() { - // Create and end streams: - fn drop_configs() -> RequestMultiplexer { - let (multiplexer, _) = RequestMultiplexer::new(); - multiplexer - } - let multiplexer = drop_configs(); - futures::executor::block_on(async move { - let mut f = multiplexer; - assert!(f.next().await.is_none()); - assert!(f.is_terminated()); - assert!(f.next().await.is_none()); - assert!(f.is_terminated()); - assert!(f.next().await.is_none()); - assert!(f.is_terminated()); - }); - } -} diff --git a/node/network/bridge/src/tests.rs b/node/network/bridge/src/tests.rs index f22108ba3a2f..89551de89c25 100644 --- a/node/network/bridge/src/tests.rs +++ b/node/network/bridge/src/tests.rs @@ -28,7 +28,7 @@ use std::{ use sc_network::{Event as NetworkEvent, IfDisconnected}; -use polkadot_node_network_protocol::{request_response::request::Requests, view, ObservedRole}; +use polkadot_node_network_protocol::{request_response::outgoing::Requests, view, ObservedRole}; use polkadot_node_subsystem_test_helpers::{ SingleItemSink, SingleItemStream, TestSubsystemContextHandle, }; @@ -41,7 +41,7 @@ use polkadot_subsystem::{ }, ActiveLeavesUpdate, FromOverseer, LeafStatus, OverseerSignal, }; -use sc_network::{config::RequestResponseConfig, Multiaddr}; +use sc_network::Multiaddr; use sp_keyring::Sr25519Keyring; use crate::{network::Network, validator_discovery::AuthorityDiscovery, Rep}; @@ -61,7 +61,6 @@ pub enum NetworkAction { struct TestNetwork { net_events: Arc>>>, action_tx: Arc>>, - _req_configs: Vec, } #[derive(Clone, Debug)] @@ -74,9 +73,7 @@ struct TestNetworkHandle { net_tx: SingleItemSink, } -fn new_test_network( - req_configs: Vec, -) -> (TestNetwork, TestNetworkHandle, TestAuthorityDiscovery) { +fn new_test_network() -> (TestNetwork, TestNetworkHandle, TestAuthorityDiscovery) { let (net_tx, net_rx) = polkadot_node_subsystem_test_helpers::single_item_sink(); let (action_tx, action_rx) = metered::unbounded(); @@ -84,7 +81,6 @@ fn new_test_network( TestNetwork { net_events: Arc::new(Mutex::new(Some(net_rx))), action_tx: Arc::new(Mutex::new(action_tx)), - _req_configs: req_configs, }, TestNetworkHandle { action_rx, net_tx }, TestAuthorityDiscovery, @@ -285,8 +281,7 @@ fn test_harness>( test: impl FnOnce(TestHarness) -> T, ) { let pool = sp_core::testing::TaskExecutor::new(); - let (request_multiplexer, req_configs) = RequestMultiplexer::new(); - let (mut network, network_handle, discovery) = new_test_network(req_configs); + let (mut network, network_handle, discovery) = new_test_network(); let (context, virtual_overseer) = polkadot_node_subsystem_test_helpers::make_subsystem_context(pool); let network_stream = network.event_stream(); @@ -294,7 +289,6 @@ fn test_harness>( let bridge = NetworkBridge { network_service: network, authority_discovery_service: discovery, - request_multiplexer, metrics: Metrics(None), sync_oracle, }; @@ -642,17 +636,6 @@ fn peer_view_updates_sent_via_overseer() { let view = view![Hash::repeat_byte(1)]; - assert_matches!( - virtual_overseer.recv().await, - AllMessages::DisputeDistribution(DisputeDistributionMessage::DisputeSendingReceiver(_)) - ); - assert_matches!( - virtual_overseer.recv().await, - AllMessages::StatementDistribution( - StatementDistributionMessage::StatementFetchingReceiver(_) - ) - ); - // bridge will inform about all connected peers. { assert_sends_validation_event_to_all( @@ -696,17 +679,6 @@ fn peer_messages_sent_via_overseer() { .connect_peer(peer.clone(), PeerSet::Validation, ObservedRole::Full) .await; - assert_matches!( - virtual_overseer.recv().await, - AllMessages::DisputeDistribution(DisputeDistributionMessage::DisputeSendingReceiver(_)) - ); - assert_matches!( - virtual_overseer.recv().await, - AllMessages::StatementDistribution( - StatementDistributionMessage::StatementFetchingReceiver(_) - ) - ); - // bridge will inform about all connected peers. { assert_sends_validation_event_to_all( @@ -770,17 +742,6 @@ fn peer_disconnect_from_just_one_peerset() { let peer = PeerId::random(); - assert_matches!( - virtual_overseer.recv().await, - AllMessages::DisputeDistribution(DisputeDistributionMessage::DisputeSendingReceiver(_)) - ); - assert_matches!( - virtual_overseer.recv().await, - AllMessages::StatementDistribution( - StatementDistributionMessage::StatementFetchingReceiver(_) - ) - ); - network_handle .connect_peer(peer.clone(), PeerSet::Validation, ObservedRole::Full) .await; @@ -864,17 +825,6 @@ fn relays_collation_protocol_messages() { let peer_a = PeerId::random(); let peer_b = PeerId::random(); - assert_matches!( - virtual_overseer.recv().await, - AllMessages::DisputeDistribution(DisputeDistributionMessage::DisputeSendingReceiver(_)) - ); - assert_matches!( - virtual_overseer.recv().await, - AllMessages::StatementDistribution( - StatementDistributionMessage::StatementFetchingReceiver(_) - ) - ); - network_handle .connect_peer(peer_a.clone(), PeerSet::Validation, ObservedRole::Full) .await; @@ -975,17 +925,6 @@ fn different_views_on_different_peer_sets() { .connect_peer(peer.clone(), PeerSet::Collation, ObservedRole::Full) .await; - assert_matches!( - virtual_overseer.recv().await, - AllMessages::DisputeDistribution(DisputeDistributionMessage::DisputeSendingReceiver(_)) - ); - assert_matches!( - virtual_overseer.recv().await, - AllMessages::StatementDistribution( - StatementDistributionMessage::StatementFetchingReceiver(_) - ) - ); - // bridge will inform about all connected peers. { assert_sends_validation_event_to_all( @@ -1149,17 +1088,6 @@ fn send_messages_to_peers() { .connect_peer(peer.clone(), PeerSet::Collation, ObservedRole::Full) .await; - assert_matches!( - virtual_overseer.recv().await, - AllMessages::DisputeDistribution(DisputeDistributionMessage::DisputeSendingReceiver(_)) - ); - assert_matches!( - virtual_overseer.recv().await, - AllMessages::StatementDistribution( - StatementDistributionMessage::StatementFetchingReceiver(_) - ) - ); - // bridge will inform about all connected peers. { assert_sends_validation_event_to_all( @@ -1328,17 +1256,6 @@ fn our_view_updates_decreasing_order_and_limited_to_max() { .await; } - assert_matches!( - virtual_overseer.recv().await, - AllMessages::DisputeDistribution(DisputeDistributionMessage::DisputeSendingReceiver(_)) - ); - assert_matches!( - virtual_overseer.recv().await, - AllMessages::StatementDistribution( - StatementDistributionMessage::StatementFetchingReceiver(_) - ) - ); - let our_views = (1..=MAX_VIEW_HEADS).rev().map(|start| { OurView::new( (start..=MAX_VIEW_HEADS) diff --git a/node/network/bridge/src/validator_discovery.rs b/node/network/bridge/src/validator_discovery.rs index 051aeed747b1..2d6d21668983 100644 --- a/node/network/bridge/src/validator_discovery.rs +++ b/node/network/bridge/src/validator_discovery.rs @@ -125,7 +125,7 @@ mod tests { use async_trait::async_trait; use futures::stream::BoxStream; - use polkadot_node_network_protocol::{request_response::request::Requests, PeerId}; + use polkadot_node_network_protocol::{request_response::outgoing::Requests, PeerId}; use sc_network::{Event as NetworkEvent, IfDisconnected}; use sp_keyring::Sr25519Keyring; use std::{borrow::Cow, collections::HashMap}; diff --git a/node/network/collator-protocol/Cargo.toml b/node/network/collator-protocol/Cargo.toml index bbf85edd77a0..e7fc3e025762 100644 --- a/node/network/collator-protocol/Cargo.toml +++ b/node/network/collator-protocol/Cargo.toml @@ -6,6 +6,7 @@ edition = "2018" [dependencies] always-assert = "0.1.2" +derive_more = "0.99.14" futures = "0.3.15" futures-timer = "3" thiserror = "1.0.26" @@ -29,5 +30,6 @@ assert_matches = "1.4.0" sp-core = { git = "https://github.com/paritytech/substrate", branch = "master", features = ["std"] } sp-keyring = { git = "https://github.com/paritytech/substrate", branch = "master" } sc-network = { git = "https://github.com/paritytech/substrate", branch = "master" } +parity-scale-codec = { version = "2.0.0", features = ["std"] } polkadot-subsystem-testhelpers = { package = "polkadot-node-subsystem-test-helpers", path = "../../subsystem-test-helpers" } diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index a95a1799c9a6..49eb2ed86391 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -20,15 +20,17 @@ use std::{ time::Duration, }; -use futures::{channel::oneshot, select, stream::FuturesUnordered, Future, FutureExt, StreamExt}; +use futures::{ + channel::oneshot, pin_mut, select, stream::FuturesUnordered, Future, FutureExt, StreamExt, +}; use sp_core::Pair; use polkadot_node_network_protocol::{ peer_set::PeerSet, request_response::{ - request::OutgoingResponse, - v1::{CollationFetchingRequest, CollationFetchingResponse}, - IncomingRequest, + incoming::{self, OutgoingResponse}, + v1::{self as request_v1, CollationFetchingRequest, CollationFetchingResponse}, + IncomingRequest, IncomingRequestReceiver, }, v1 as protocol_v1, OurView, PeerId, UnifiedReputationChange as Rep, View, }; @@ -49,11 +51,12 @@ use polkadot_subsystem::{ }; use super::{Result, LOG_TARGET}; -use crate::error::{log_error, Fatal, NonFatal}; +use crate::error::{log_error, Fatal, FatalResult, NonFatal}; #[cfg(test)] mod tests; +const COST_INVALID_REQUEST: Rep = Rep::CostMajor("Peer sent unparsable request"); const COST_UNEXPECTED_MESSAGE: Rep = Rep::CostMinor("An unexpected message"); const COST_APPARENT_FLOOD: Rep = Rep::CostMinor("Message received when previous one was still being processed"); @@ -684,74 +687,6 @@ where ); } }, - CollationFetchingRequest(incoming) => { - let _span = state - .span_per_relay_parent - .get(&incoming.payload.relay_parent) - .map(|s| s.child("request-collation")); - match state.collating_on { - Some(our_para_id) => - if our_para_id == incoming.payload.para_id { - let (receipt, pov) = if let Some(collation) = - state.collations.get_mut(&incoming.payload.relay_parent) - { - collation.status.advance_to_requested(); - (collation.receipt.clone(), collation.pov.clone()) - } else { - tracing::warn!( - target: LOG_TARGET, - relay_parent = %incoming.payload.relay_parent, - "received a `RequestCollation` for a relay parent we don't have collation stored.", - ); - - return Ok(()) - }; - - state.metrics.on_collation_sent_requested(); - - let _span = _span.as_ref().map(|s| s.child("sending")); - - let waiting = state - .waiting_collation_fetches - .entry(incoming.payload.relay_parent) - .or_default(); - - if !waiting.waiting_peers.insert(incoming.peer) { - tracing::debug!( - target: LOG_TARGET, - "Dropping incoming request as peer has a request in flight already." - ); - ctx.send_message(NetworkBridgeMessage::ReportPeer( - incoming.peer, - COST_APPARENT_FLOOD, - )) - .await; - return Ok(()) - } - - if waiting.collation_fetch_active { - waiting.waiting.push_back(incoming); - } else { - waiting.collation_fetch_active = true; - send_collation(state, incoming, receipt, pov).await; - } - } else { - tracing::warn!( - target: LOG_TARGET, - for_para_id = %incoming.payload.para_id, - our_para_id = %our_para_id, - "received a `CollationFetchingRequest` for unexpected para_id", - ); - }, - None => { - tracing::warn!( - target: LOG_TARGET, - for_para_id = %incoming.payload.para_id, - "received a `RequestCollation` while not collating on any para", - ); - }, - } - }, _ => {}, } @@ -875,6 +810,80 @@ where Ok(()) } +/// Process an incoming network request for a collation. +async fn handle_incoming_request( + ctx: &mut Context, + state: &mut State, + req: IncomingRequest, +) -> Result<()> +where + Context: SubsystemContext, + Context: overseer::SubsystemContext, +{ + let _span = state + .span_per_relay_parent + .get(&req.payload.relay_parent) + .map(|s| s.child("request-collation")); + + match state.collating_on { + Some(our_para_id) if our_para_id == req.payload.para_id => { + let (receipt, pov) = + if let Some(collation) = state.collations.get_mut(&req.payload.relay_parent) { + collation.status.advance_to_requested(); + (collation.receipt.clone(), collation.pov.clone()) + } else { + tracing::warn!( + target: LOG_TARGET, + relay_parent = %req.payload.relay_parent, + "received a `RequestCollation` for a relay parent we don't have collation stored.", + ); + + return Ok(()) + }; + + state.metrics.on_collation_sent_requested(); + + let _span = _span.as_ref().map(|s| s.child("sending")); + + let waiting = + state.waiting_collation_fetches.entry(req.payload.relay_parent).or_default(); + + if !waiting.waiting_peers.insert(req.peer) { + tracing::debug!( + target: LOG_TARGET, + "Dropping incoming request as peer has a request in flight already." + ); + ctx.send_message(NetworkBridgeMessage::ReportPeer(req.peer, COST_APPARENT_FLOOD)) + .await; + return Ok(()) + } + + if waiting.collation_fetch_active { + waiting.waiting.push_back(req); + } else { + waiting.collation_fetch_active = true; + send_collation(state, req, receipt, pov).await; + } + }, + Some(our_para_id) => { + tracing::warn!( + target: LOG_TARGET, + for_para_id = %req.payload.para_id, + our_para_id = %our_para_id, + "received a `CollationFetchingRequest` for unexpected para_id", + ); + }, + None => { + tracing::warn!( + target: LOG_TARGET, + for_para_id = %req.payload.para_id, + "received a `RequestCollation` while not collating on any para", + ); + }, + } + Ok(()) +} + /// Our view has changed. async fn handle_peer_view_change( ctx: &mut Context, @@ -994,8 +1003,9 @@ pub(crate) async fn run( mut ctx: Context, local_peer_id: PeerId, collator_pair: CollatorPair, + mut req_receiver: IncomingRequestReceiver, metrics: Metrics, -) -> Result<()> +) -> FatalResult<()> where Context: SubsystemContext, Context: overseer::SubsystemContext, @@ -1006,6 +1016,8 @@ where let mut runtime = RuntimeInfo::new(None); loop { + let recv_req = req_receiver.recv(|| vec![COST_INVALID_REQUEST]).fuse(); + pin_mut!(recv_req); select! { msg = ctx.recv().fuse() => match msg.map_err(Fatal::SubsystemReceive)? { FromOverseer::Communication { msg } => { @@ -1039,6 +1051,25 @@ where send_collation(&mut state, next, receipt, pov).await; } } + in_req = recv_req => { + match in_req { + Ok(req) => { + log_error( + handle_incoming_request(&mut ctx, &mut state, req).await, + "Handling incoming request" + )?; + } + Err(incoming::Error::Fatal(f)) => return Err(f.into()), + Err(incoming::Error::NonFatal(err)) => { + tracing::debug!( + target: LOG_TARGET, + ?err, + "Decoding incoming request failed" + ); + continue + } + } + } } } } diff --git a/node/network/collator-protocol/src/collator_side/tests.rs b/node/network/collator-protocol/src/collator_side/tests.rs index acb96749e8b7..bef02c42cd55 100644 --- a/node/network/collator-protocol/src/collator_side/tests.rs +++ b/node/network/collator-protocol/src/collator_side/tests.rs @@ -19,14 +19,17 @@ use super::*; use std::{sync::Arc, time::Duration}; use assert_matches::assert_matches; -use futures::{executor, future, Future}; +use futures::{executor, future, Future, SinkExt}; use futures_timer::Delay; -use sp_core::{crypto::Pair, Decode}; +use parity_scale_codec::{Decode, Encode}; + +use sc_network::config::IncomingRequest as RawIncomingRequest; +use sp_core::crypto::Pair; use sp_keyring::Sr25519Keyring; use sp_runtime::traits::AppVerify; -use polkadot_node_network_protocol::{our_view, request_response::request::IncomingRequest, view}; +use polkadot_node_network_protocol::{our_view, request_response::IncomingRequest, view}; use polkadot_node_primitives::BlockData; use polkadot_node_subsystem_util::TimeoutExt; use polkadot_primitives::v1::{ @@ -194,9 +197,10 @@ type VirtualOverseer = test_helpers::TestSubsystemContextHandle>( +fn test_harness>( local_peer_id: PeerId, collator_pair: CollatorPair, test: impl FnOnce(TestHarness) -> T, @@ -211,22 +215,26 @@ fn test_harness>( let (context, virtual_overseer) = test_helpers::make_subsystem_context(pool.clone()); - let subsystem = run(context, local_peer_id, collator_pair, Metrics::default()); + let (collation_req_receiver, req_cfg) = IncomingRequest::get_config_receiver(); + let subsystem = async { + run(context, local_peer_id, collator_pair, collation_req_receiver, Default::default()) + .await + .unwrap(); + }; - let test_fut = test(TestHarness { virtual_overseer }); + let test_fut = test(TestHarness { virtual_overseer, req_cfg }); futures::pin_mut!(test_fut); futures::pin_mut!(subsystem); executor::block_on(future::join( async move { - let mut overseer = test_fut.await; - overseer_signal(&mut overseer, OverseerSignal::Conclude).await; + let mut test_harness = test_fut.await; + overseer_signal(&mut test_harness.virtual_overseer, OverseerSignal::Conclude).await; }, subsystem, )) .1 - .unwrap(); } const TIMEOUT: Duration = Duration::from_millis(100); @@ -506,6 +514,7 @@ fn advertise_and_send_collation() { test_harness(local_peer_id, collator_pair, |test_harness| async move { let mut virtual_overseer = test_harness.virtual_overseer; + let mut req_cfg = test_harness.req_cfg; setup_system(&mut virtual_overseer, &test_state).await; @@ -537,34 +546,41 @@ fn advertise_and_send_collation() { expect_advertise_collation_msg(&mut virtual_overseer, &peer, test_state.relay_parent).await; // Request a collation. - let (tx, rx) = oneshot::channel(); - overseer_send( - &mut virtual_overseer, - CollatorProtocolMessage::CollationFetchingRequest(IncomingRequest::new( + let (pending_response, rx) = oneshot::channel(); + req_cfg + .inbound_queue + .as_mut() + .unwrap() + .send(RawIncomingRequest { peer, - CollationFetchingRequest { + payload: CollationFetchingRequest { relay_parent: test_state.relay_parent, para_id: test_state.para_id, - }, - tx, - )), - ) - .await; + } + .encode(), + pending_response, + }) + .await + .unwrap(); // Second request by same validator should get dropped and peer reported: { - let (tx, rx) = oneshot::channel(); - overseer_send( - &mut virtual_overseer, - CollatorProtocolMessage::CollationFetchingRequest(IncomingRequest::new( + let (pending_response, rx) = oneshot::channel(); + + req_cfg + .inbound_queue + .as_mut() + .unwrap() + .send(RawIncomingRequest { peer, - CollationFetchingRequest { + payload: CollationFetchingRequest { relay_parent: test_state.relay_parent, para_id: test_state.para_id, - }, - tx, - )), - ) - .await; + } + .encode(), + pending_response, + }) + .await + .unwrap(); assert_matches!( overseer_recv(&mut virtual_overseer).await, AllMessages::NetworkBridge(NetworkBridgeMessage::ReportPeer(bad_peer, _)) => { @@ -598,19 +614,23 @@ fn advertise_and_send_collation() { let peer = test_state.validator_peer_id[2].clone(); // Re-request a collation. - let (tx, rx) = oneshot::channel(); - overseer_send( - &mut virtual_overseer, - CollatorProtocolMessage::CollationFetchingRequest(IncomingRequest::new( + let (pending_response, rx) = oneshot::channel(); + + req_cfg + .inbound_queue + .as_mut() + .unwrap() + .send(RawIncomingRequest { peer, - CollationFetchingRequest { + payload: CollationFetchingRequest { relay_parent: old_relay_parent, para_id: test_state.para_id, - }, - tx, - )), - ) - .await; + } + .encode(), + pending_response, + }) + .await + .unwrap(); // Re-requesting collation should fail: rx.await.unwrap_err(); @@ -629,7 +649,7 @@ fn advertise_and_send_collation() { .await; expect_advertise_collation_msg(&mut virtual_overseer, &peer, test_state.relay_parent).await; - virtual_overseer + TestHarness { virtual_overseer, req_cfg } }); } @@ -662,18 +682,16 @@ fn collators_declare_to_connected_peers() { let local_peer_id = test_state.local_peer_id.clone(); let collator_pair = test_state.collator_pair.clone(); - test_harness(local_peer_id, collator_pair, |test_harness| async move { - let mut virtual_overseer = test_harness.virtual_overseer; - + test_harness(local_peer_id, collator_pair, |mut test_harness| async move { let peer = test_state.validator_peer_id[0].clone(); let validator_id = test_state.current_group_validator_authority_ids()[0].clone(); - setup_system(&mut virtual_overseer, &test_state).await; + setup_system(&mut test_harness.virtual_overseer, &test_state).await; // A validator connected to us - connect_peer(&mut virtual_overseer, peer.clone(), Some(validator_id)).await; - expect_declare_msg(&mut virtual_overseer, &test_state, &peer).await; - virtual_overseer + connect_peer(&mut test_harness.virtual_overseer, peer.clone(), Some(validator_id)).await; + expect_declare_msg(&mut test_harness.virtual_overseer, &test_state, &peer).await; + test_harness }) } @@ -683,8 +701,8 @@ fn collations_are_only_advertised_to_validators_with_correct_view() { let local_peer_id = test_state.local_peer_id.clone(); let collator_pair = test_state.collator_pair.clone(); - test_harness(local_peer_id, collator_pair, |test_harness| async move { - let mut virtual_overseer = test_harness.virtual_overseer; + test_harness(local_peer_id, collator_pair, |mut test_harness| async move { + let virtual_overseer = &mut test_harness.virtual_overseer; let peer = test_state.current_group_validator_peer_ids()[0].clone(); let validator_id = test_state.current_group_validator_authority_ids()[0].clone(); @@ -692,31 +710,30 @@ fn collations_are_only_advertised_to_validators_with_correct_view() { let peer2 = test_state.current_group_validator_peer_ids()[1].clone(); let validator_id2 = test_state.current_group_validator_authority_ids()[1].clone(); - setup_system(&mut virtual_overseer, &test_state).await; + setup_system(virtual_overseer, &test_state).await; // A validator connected to us - connect_peer(&mut virtual_overseer, peer.clone(), Some(validator_id)).await; + connect_peer(virtual_overseer, peer.clone(), Some(validator_id)).await; // Connect the second validator - connect_peer(&mut virtual_overseer, peer2.clone(), Some(validator_id2)).await; + connect_peer(virtual_overseer, peer2.clone(), Some(validator_id2)).await; - expect_declare_msg(&mut virtual_overseer, &test_state, &peer).await; - expect_declare_msg(&mut virtual_overseer, &test_state, &peer2).await; + expect_declare_msg(virtual_overseer, &test_state, &peer).await; + expect_declare_msg(virtual_overseer, &test_state, &peer2).await; // And let it tell us that it is has the same view. - send_peer_view_change(&mut virtual_overseer, &peer2, vec![test_state.relay_parent]).await; + send_peer_view_change(virtual_overseer, &peer2, vec![test_state.relay_parent]).await; - distribute_collation(&mut virtual_overseer, &test_state, true).await; + distribute_collation(virtual_overseer, &test_state, true).await; - expect_advertise_collation_msg(&mut virtual_overseer, &peer2, test_state.relay_parent) - .await; + expect_advertise_collation_msg(virtual_overseer, &peer2, test_state.relay_parent).await; // The other validator announces that it changed its view. - send_peer_view_change(&mut virtual_overseer, &peer, vec![test_state.relay_parent]).await; + send_peer_view_change(virtual_overseer, &peer, vec![test_state.relay_parent]).await; // After changing the view we should receive the advertisement - expect_advertise_collation_msg(&mut virtual_overseer, &peer, test_state.relay_parent).await; - virtual_overseer + expect_advertise_collation_msg(virtual_overseer, &peer, test_state.relay_parent).await; + test_harness }) } @@ -726,8 +743,8 @@ fn collate_on_two_different_relay_chain_blocks() { let local_peer_id = test_state.local_peer_id.clone(); let collator_pair = test_state.collator_pair.clone(); - test_harness(local_peer_id, collator_pair, |test_harness| async move { - let mut virtual_overseer = test_harness.virtual_overseer; + test_harness(local_peer_id, collator_pair, |mut test_harness| async move { + let virtual_overseer = &mut test_harness.virtual_overseer; let peer = test_state.current_group_validator_peer_ids()[0].clone(); let validator_id = test_state.current_group_validator_authority_ids()[0].clone(); @@ -735,34 +752,33 @@ fn collate_on_two_different_relay_chain_blocks() { let peer2 = test_state.current_group_validator_peer_ids()[1].clone(); let validator_id2 = test_state.current_group_validator_authority_ids()[1].clone(); - setup_system(&mut virtual_overseer, &test_state).await; + setup_system(virtual_overseer, &test_state).await; // A validator connected to us - connect_peer(&mut virtual_overseer, peer.clone(), Some(validator_id)).await; + connect_peer(virtual_overseer, peer.clone(), Some(validator_id)).await; // Connect the second validator - connect_peer(&mut virtual_overseer, peer2.clone(), Some(validator_id2)).await; + connect_peer(virtual_overseer, peer2.clone(), Some(validator_id2)).await; - expect_declare_msg(&mut virtual_overseer, &test_state, &peer).await; - expect_declare_msg(&mut virtual_overseer, &test_state, &peer2).await; + expect_declare_msg(virtual_overseer, &test_state, &peer).await; + expect_declare_msg(virtual_overseer, &test_state, &peer2).await; - distribute_collation(&mut virtual_overseer, &test_state, true).await; + distribute_collation(virtual_overseer, &test_state, true).await; let old_relay_parent = test_state.relay_parent; // Advance to a new round, while informing the subsystem that the old and the new relay parent are active. - test_state.advance_to_new_round(&mut virtual_overseer, true).await; + test_state.advance_to_new_round(virtual_overseer, true).await; - distribute_collation(&mut virtual_overseer, &test_state, true).await; + distribute_collation(virtual_overseer, &test_state, true).await; - send_peer_view_change(&mut virtual_overseer, &peer, vec![old_relay_parent]).await; - expect_advertise_collation_msg(&mut virtual_overseer, &peer, old_relay_parent).await; + send_peer_view_change(virtual_overseer, &peer, vec![old_relay_parent]).await; + expect_advertise_collation_msg(virtual_overseer, &peer, old_relay_parent).await; - send_peer_view_change(&mut virtual_overseer, &peer2, vec![test_state.relay_parent]).await; + send_peer_view_change(virtual_overseer, &peer2, vec![test_state.relay_parent]).await; - expect_advertise_collation_msg(&mut virtual_overseer, &peer2, test_state.relay_parent) - .await; - virtual_overseer + expect_advertise_collation_msg(virtual_overseer, &peer2, test_state.relay_parent).await; + test_harness }) } @@ -772,32 +788,32 @@ fn validator_reconnect_does_not_advertise_a_second_time() { let local_peer_id = test_state.local_peer_id.clone(); let collator_pair = test_state.collator_pair.clone(); - test_harness(local_peer_id, collator_pair, |test_harness| async move { - let mut virtual_overseer = test_harness.virtual_overseer; + test_harness(local_peer_id, collator_pair, |mut test_harness| async move { + let virtual_overseer = &mut test_harness.virtual_overseer; let peer = test_state.current_group_validator_peer_ids()[0].clone(); let validator_id = test_state.current_group_validator_authority_ids()[0].clone(); - setup_system(&mut virtual_overseer, &test_state).await; + setup_system(virtual_overseer, &test_state).await; // A validator connected to us - connect_peer(&mut virtual_overseer, peer.clone(), Some(validator_id.clone())).await; - expect_declare_msg(&mut virtual_overseer, &test_state, &peer).await; + connect_peer(virtual_overseer, peer.clone(), Some(validator_id.clone())).await; + expect_declare_msg(virtual_overseer, &test_state, &peer).await; - distribute_collation(&mut virtual_overseer, &test_state, true).await; + distribute_collation(virtual_overseer, &test_state, true).await; - send_peer_view_change(&mut virtual_overseer, &peer, vec![test_state.relay_parent]).await; - expect_advertise_collation_msg(&mut virtual_overseer, &peer, test_state.relay_parent).await; + send_peer_view_change(virtual_overseer, &peer, vec![test_state.relay_parent]).await; + expect_advertise_collation_msg(virtual_overseer, &peer, test_state.relay_parent).await; // Disconnect and reconnect directly - disconnect_peer(&mut virtual_overseer, peer.clone()).await; - connect_peer(&mut virtual_overseer, peer.clone(), Some(validator_id)).await; - expect_declare_msg(&mut virtual_overseer, &test_state, &peer).await; + disconnect_peer(virtual_overseer, peer.clone()).await; + connect_peer(virtual_overseer, peer.clone(), Some(validator_id)).await; + expect_declare_msg(virtual_overseer, &test_state, &peer).await; - send_peer_view_change(&mut virtual_overseer, &peer, vec![test_state.relay_parent]).await; + send_peer_view_change(virtual_overseer, &peer, vec![test_state.relay_parent]).await; - assert!(overseer_recv_with_timeout(&mut virtual_overseer, TIMEOUT).await.is_none()); - virtual_overseer + assert!(overseer_recv_with_timeout(virtual_overseer, TIMEOUT).await.is_none()); + test_harness }) } @@ -808,20 +824,20 @@ fn collators_reject_declare_messages() { let collator_pair = test_state.collator_pair.clone(); let collator_pair2 = CollatorPair::generate().0; - test_harness(local_peer_id, collator_pair, |test_harness| async move { - let mut virtual_overseer = test_harness.virtual_overseer; + test_harness(local_peer_id, collator_pair, |mut test_harness| async move { + let virtual_overseer = &mut test_harness.virtual_overseer; let peer = test_state.current_group_validator_peer_ids()[0].clone(); let validator_id = test_state.current_group_validator_authority_ids()[0].clone(); - setup_system(&mut virtual_overseer, &test_state).await; + setup_system(virtual_overseer, &test_state).await; // A validator connected to us - connect_peer(&mut virtual_overseer, peer.clone(), Some(validator_id)).await; - expect_declare_msg(&mut virtual_overseer, &test_state, &peer).await; + connect_peer(virtual_overseer, peer.clone(), Some(validator_id)).await; + expect_declare_msg(virtual_overseer, &test_state, &peer).await; overseer_send( - &mut virtual_overseer, + virtual_overseer, CollatorProtocolMessage::NetworkBridgeUpdateV1(NetworkBridgeEvent::PeerMessage( peer.clone(), protocol_v1::CollatorProtocolMessage::Declare( @@ -834,13 +850,13 @@ fn collators_reject_declare_messages() { .await; assert_matches!( - overseer_recv(&mut virtual_overseer).await, + overseer_recv(virtual_overseer).await, AllMessages::NetworkBridge(NetworkBridgeMessage::DisconnectPeer( p, PeerSet::Collation, )) if p == peer ); - virtual_overseer + test_harness }) } @@ -862,67 +878,61 @@ where let local_peer_id = test_state.local_peer_id.clone(); let collator_pair = test_state.collator_pair.clone(); - test_harness(local_peer_id, collator_pair, |test_harness| async move { - let mut virtual_overseer = test_harness.virtual_overseer; + test_harness(local_peer_id, collator_pair, |mut test_harness| async move { + let virtual_overseer = &mut test_harness.virtual_overseer; + let req_cfg = &mut test_harness.req_cfg; - setup_system(&mut virtual_overseer, &test_state).await; + setup_system(virtual_overseer, &test_state).await; let DistributeCollation { candidate, pov_block } = - distribute_collation(&mut virtual_overseer, &test_state, true).await; + distribute_collation(virtual_overseer, &test_state, true).await; for (val, peer) in test_state .current_group_validator_authority_ids() .into_iter() .zip(test_state.current_group_validator_peer_ids()) { - connect_peer(&mut virtual_overseer, peer.clone(), Some(val.clone())).await; + connect_peer(virtual_overseer, peer.clone(), Some(val.clone())).await; } // We declare to the connected validators that we are a collator. // We need to catch all `Declare` messages to the validators we've // previosly connected to. for peer_id in test_state.current_group_validator_peer_ids() { - expect_declare_msg(&mut virtual_overseer, &test_state, &peer_id).await; + expect_declare_msg(virtual_overseer, &test_state, &peer_id).await; } let validator_0 = test_state.current_group_validator_peer_ids()[0].clone(); let validator_1 = test_state.current_group_validator_peer_ids()[1].clone(); // Send info about peer's view. - send_peer_view_change(&mut virtual_overseer, &validator_0, vec![test_state.relay_parent]) - .await; - send_peer_view_change(&mut virtual_overseer, &validator_1, vec![test_state.relay_parent]) - .await; + send_peer_view_change(virtual_overseer, &validator_0, vec![test_state.relay_parent]).await; + send_peer_view_change(virtual_overseer, &validator_1, vec![test_state.relay_parent]).await; // The peer is interested in a leaf that we have a collation for; // advertise it. - expect_advertise_collation_msg( - &mut virtual_overseer, - &validator_0, - test_state.relay_parent, - ) - .await; - expect_advertise_collation_msg( - &mut virtual_overseer, - &validator_1, - test_state.relay_parent, - ) - .await; + expect_advertise_collation_msg(virtual_overseer, &validator_0, test_state.relay_parent) + .await; + expect_advertise_collation_msg(virtual_overseer, &validator_1, test_state.relay_parent) + .await; // Request a collation. - let (tx, rx) = oneshot::channel(); - overseer_send( - &mut virtual_overseer, - CollatorProtocolMessage::CollationFetchingRequest(IncomingRequest::new( - validator_0, - CollationFetchingRequest { + let (pending_response, rx) = oneshot::channel(); + req_cfg + .inbound_queue + .as_mut() + .unwrap() + .send(RawIncomingRequest { + peer: validator_0, + payload: CollationFetchingRequest { relay_parent: test_state.relay_parent, para_id: test_state.para_id, - }, - tx, - )), - ) - .await; + } + .encode(), + pending_response, + }) + .await + .unwrap(); // Keep the feedback channel alive because we need to use it to inform about the finished transfer. let feedback_tx = assert_matches!( @@ -942,19 +952,22 @@ where ); // Let the second validator request the collation. - let (tx, rx) = oneshot::channel(); - overseer_send( - &mut virtual_overseer, - CollatorProtocolMessage::CollationFetchingRequest(IncomingRequest::new( - validator_1, - CollationFetchingRequest { + let (pending_response, rx) = oneshot::channel(); + req_cfg + .inbound_queue + .as_mut() + .unwrap() + .send(RawIncomingRequest { + peer: validator_1, + payload: CollationFetchingRequest { relay_parent: test_state.relay_parent, para_id: test_state.para_id, - }, - tx, - )), - ) - .await; + } + .encode(), + pending_response, + }) + .await + .unwrap(); let rx = handle_first_response(rx, feedback_tx).await; @@ -975,6 +988,6 @@ where } ); - virtual_overseer + test_harness }); } diff --git a/node/network/collator-protocol/src/error.rs b/node/network/collator-protocol/src/error.rs index aded43d74d1d..ff02d87dbaff 100644 --- a/node/network/collator-protocol/src/error.rs +++ b/node/network/collator-protocol/src/error.rs @@ -17,40 +17,45 @@ //! Error handling related code and Error/Result definitions. -use polkadot_node_primitives::UncheckedSignedFullStatement; -use polkadot_subsystem::errors::SubsystemError; use thiserror::Error; -use polkadot_node_subsystem_util::{runtime, unwrap_non_fatal, Fault}; +use polkadot_node_network_protocol::request_response::incoming; +use polkadot_node_primitives::UncheckedSignedFullStatement; +use polkadot_node_subsystem_util::runtime; +use polkadot_subsystem::errors::SubsystemError; use crate::LOG_TARGET; /// General result. pub type Result = std::result::Result; - -/// Result for fatal only failures. +/// Result with only fatal errors. pub type FatalResult = std::result::Result; /// Errors for statement distribution. -#[derive(Debug, Error)] +#[derive(Debug, Error, derive_more::From)] #[error(transparent)] -pub struct Error(pub Fault); - -impl From for Error { - fn from(e: NonFatal) -> Self { - Self(Fault::from_non_fatal(e)) - } +pub enum Error { + /// All fatal errors. + Fatal(Fatal), + /// All nonfatal/potentially recoverable errors. + NonFatal(NonFatal), } -impl From for Error { - fn from(f: Fatal) -> Self { - Self(Fault::from_fatal(f)) +impl From for Error { + fn from(o: runtime::Error) -> Self { + match o { + runtime::Error::Fatal(f) => Self::Fatal(Fatal::Runtime(f)), + runtime::Error::NonFatal(f) => Self::NonFatal(NonFatal::Runtime(f)), + } } } -impl From for Error { - fn from(o: runtime::Error) -> Self { - Self(Fault::from_other(o)) +impl From for Error { + fn from(o: incoming::Error) -> Self { + match o { + incoming::Error::Fatal(f) => Self::Fatal(Fatal::IncomingRequest(f)), + incoming::Error::NonFatal(f) => Self::NonFatal(NonFatal::IncomingRequest(f)), + } } } @@ -64,18 +69,26 @@ pub enum Fatal { /// Errors coming from runtime::Runtime. #[error("Error while accessing runtime information")] Runtime(#[from] runtime::Fatal), + + /// Errors coming from receiving incoming requests. + #[error("Retrieving next incoming request failed")] + IncomingRequest(#[from] incoming::Fatal), } /// Errors for fetching of runtime information. #[derive(Debug, Error)] pub enum NonFatal { /// Signature was invalid on received statement. - #[error("CollationSeconded contained statement with invalid signature.")] + #[error("CollationSeconded contained statement with invalid signature")] InvalidStatementSignature(UncheckedSignedFullStatement), /// Errors coming from runtime::Runtime. #[error("Error while accessing runtime information")] Runtime(#[from] runtime::NonFatal), + + /// Errors coming from receiving incoming requests. + #[error("Retrieving next incoming request failed")] + IncomingRequest(#[from] incoming::NonFatal), } /// Utility for eating top level errors and log them. @@ -83,8 +96,12 @@ pub enum NonFatal { /// We basically always want to try and continue on error. This utility function is meant to /// consume top-level errors by simply logging them. pub fn log_error(result: Result<()>, ctx: &'static str) -> FatalResult<()> { - if let Some(error) = unwrap_non_fatal(result.map_err(|e| e.0))? { - tracing::warn!(target: LOG_TARGET, error = ?error, ctx) + match result { + Err(Error::Fatal(f)) => Err(f), + Err(Error::NonFatal(error)) => { + tracing::warn!(target: LOG_TARGET, error = ?error, ctx); + Ok(()) + }, + Ok(()) => Ok(()), } - Ok(()) } diff --git a/node/network/collator-protocol/src/lib.rs b/node/network/collator-protocol/src/lib.rs index 12305fd0957e..0aa53156e759 100644 --- a/node/network/collator-protocol/src/lib.rs +++ b/node/network/collator-protocol/src/lib.rs @@ -26,7 +26,10 @@ use futures::{FutureExt, TryFutureExt}; use sp_keystore::SyncCryptoStorePtr; -use polkadot_node_network_protocol::{PeerId, UnifiedReputationChange as Rep}; +use polkadot_node_network_protocol::{ + request_response::{v1 as request_v1, IncomingRequestReceiver}, + PeerId, UnifiedReputationChange as Rep, +}; use polkadot_primitives::v1::CollatorPair; use polkadot_subsystem::{ @@ -36,7 +39,7 @@ use polkadot_subsystem::{ }; mod error; -use error::Result; +use error::{FatalResult, Result}; mod collator_side; mod validator_side; @@ -73,7 +76,12 @@ pub enum ProtocolSide { metrics: validator_side::Metrics, }, /// Collators operate on a parachain. - Collator(PeerId, CollatorPair, collator_side::Metrics), + Collator( + PeerId, + CollatorPair, + IncomingRequestReceiver, + collator_side::Metrics, + ), } /// The collator protocol subsystem. @@ -90,7 +98,7 @@ impl CollatorProtocolSubsystem { Self { protocol_side } } - async fn run(self, ctx: Context) -> Result<()> + async fn run(self, ctx: Context) -> FatalResult<()> where Context: overseer::SubsystemContext, Context: SubsystemContext, @@ -98,8 +106,8 @@ impl CollatorProtocolSubsystem { match self.protocol_side { ProtocolSide::Validator { keystore, eviction_policy, metrics } => validator_side::run(ctx, keystore, eviction_policy, metrics).await, - ProtocolSide::Collator(local_peer_id, collator_pair, metrics) => - collator_side::run(ctx, local_peer_id, collator_pair, metrics).await, + ProtocolSide::Collator(local_peer_id, collator_pair, req_receiver, metrics) => + collator_side::run(ctx, local_peer_id, collator_pair, req_receiver, metrics).await, } } } diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index 0f75593f59d4..3f77e499de08 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -36,7 +36,7 @@ use polkadot_node_network_protocol::{ peer_set::PeerSet, request_response as req_res, request_response::{ - request::{Recipient, RequestError}, + outgoing::{Recipient, RequestError}, v1::{CollationFetchingRequest, CollationFetchingResponse}, OutgoingRequest, Requests, }, @@ -54,6 +54,8 @@ use polkadot_subsystem::{ overseer, FromOverseer, OverseerSignal, PerLeafSpan, SubsystemContext, SubsystemSender, }; +use crate::error::FatalResult; + use super::{modify_reputation, Result, LOG_TARGET}; #[cfg(test)] @@ -1079,12 +1081,6 @@ async fn process_msg( ); } }, - CollationFetchingRequest(_) => { - tracing::warn!( - target: LOG_TARGET, - "CollationFetchingRequest message is not expected on the validator side of the protocol", - ); - }, Seconded(parent, stmt) => { if let Some(collation_event) = state.pending_candidates.remove(&parent) { let (collator_id, pending_collation) = collation_event; @@ -1146,7 +1142,7 @@ pub(crate) async fn run( keystore: SyncCryptoStorePtr, eviction_policy: crate::CollatorEvictionPolicy, metrics: Metrics, -) -> Result<()> +) -> FatalResult<()> where Context: overseer::SubsystemContext, Context: SubsystemContext, diff --git a/node/network/dispute-distribution/Cargo.toml b/node/network/dispute-distribution/Cargo.toml index 2818c5e900e3..12e3dc8d72d6 100644 --- a/node/network/dispute-distribution/Cargo.toml +++ b/node/network/dispute-distribution/Cargo.toml @@ -7,6 +7,7 @@ edition = "2018" [dependencies] futures = "0.3.15" tracing = "0.1.26" +derive_more = "0.99.14" parity-scale-codec = { version = "2.0.0", features = ["std"] } polkadot-primitives = { path = "../../../primitives" } polkadot-erasure-coding = { path = "../../../erasure-coding" } diff --git a/node/network/dispute-distribution/src/error.rs b/node/network/dispute-distribution/src/error.rs index 8d236ced4312..7b7d7a64238f 100644 --- a/node/network/dispute-distribution/src/error.rs +++ b/node/network/dispute-distribution/src/error.rs @@ -19,32 +19,25 @@ use thiserror::Error; -use polkadot_node_subsystem_util::{runtime, unwrap_non_fatal, Fault}; +use polkadot_node_subsystem_util::runtime; use polkadot_subsystem::SubsystemError; use crate::{sender, LOG_TARGET}; -#[derive(Debug, Error)] +#[derive(Debug, Error, derive_more::From)] #[error(transparent)] -pub struct Error(pub Fault); - -impl From for Error { - fn from(e: NonFatal) -> Self { - Self(Fault::from_non_fatal(e)) - } -} - -impl From for Error { - fn from(f: Fatal) -> Self { - Self(Fault::from_fatal(f)) - } +pub enum Error { + /// Fatal errors of dispute distribution. + Fatal(Fatal), + /// Non fatal errors of dispute distribution. + NonFatal(NonFatal), } impl From for Error { - fn from(e: sender::Error) -> Self { - match e.0 { - Fault::Fatal(f) => Self(Fault::Fatal(Fatal::Sender(f))), - Fault::Err(nf) => Self(Fault::Err(NonFatal::Sender(nf))), + fn from(o: sender::Error) -> Self { + match o { + sender::Error::Fatal(f) => Self::Fatal(Fatal::Sender(f)), + sender::Error::NonFatal(f) => Self::NonFatal(NonFatal::Sender(f)), } } } @@ -90,8 +83,12 @@ pub type FatalResult = std::result::Result; /// We basically always want to try and continue on error. This utility function is meant to /// consume top-level errors by simply logging them pub fn log_error(result: Result<()>, ctx: &'static str) -> std::result::Result<(), Fatal> { - if let Some(error) = unwrap_non_fatal(result.map_err(|e| e.0))? { - tracing::warn!(target: LOG_TARGET, error = ?error, ctx); + match result { + Err(Error::Fatal(f)) => Err(f), + Err(Error::NonFatal(error)) => { + tracing::warn!(target: LOG_TARGET, error = ?error, ctx); + Ok(()) + }, + Ok(()) => Ok(()), } - Ok(()) } diff --git a/node/network/dispute-distribution/src/lib.rs b/node/network/dispute-distribution/src/lib.rs index 702639ff6359..d1890f4c6b20 100644 --- a/node/network/dispute-distribution/src/lib.rs +++ b/node/network/dispute-distribution/src/lib.rs @@ -29,6 +29,7 @@ use futures::{channel::mpsc, FutureExt, StreamExt, TryFutureExt}; use polkadot_node_network_protocol::authority_discovery::AuthorityDiscovery; use sp_keystore::SyncCryptoStorePtr; +use polkadot_node_network_protocol::request_response::{incoming::IncomingRequestReceiver, v1}; use polkadot_node_primitives::DISPUTE_WINDOW; use polkadot_node_subsystem_util::{runtime, runtime::RuntimeInfo}; use polkadot_subsystem::{ @@ -103,6 +104,9 @@ pub struct DisputeDistributionSubsystem { /// Receive messages from `SendTask`. sender_rx: mpsc::Receiver, + /// Receiver for incoming requests. + req_receiver: Option>, + /// Authority discovery service. authority_discovery: AD, @@ -133,14 +137,26 @@ where AD: AuthorityDiscovery + Clone, { /// Create a new instance of the availability distribution. - pub fn new(keystore: SyncCryptoStorePtr, authority_discovery: AD, metrics: Metrics) -> Self { + pub fn new( + keystore: SyncCryptoStorePtr, + req_receiver: IncomingRequestReceiver, + authority_discovery: AD, + metrics: Metrics, + ) -> Self { let runtime = RuntimeInfo::new_with_config(runtime::Config { keystore: Some(keystore), session_cache_lru_size: DISPUTE_WINDOW as usize, }); let (tx, sender_rx) = mpsc::channel(1); let disputes_sender = DisputeSender::new(tx, metrics.clone()); - Self { runtime, disputes_sender, sender_rx, authority_discovery, metrics } + Self { + runtime, + disputes_sender, + sender_rx, + req_receiver: Some(req_receiver), + authority_discovery, + metrics, + } } /// Start processing work as passed on from the Overseer. @@ -151,6 +167,17 @@ where + Sync + Send, { + let receiver = DisputesReceiver::new( + ctx.sender().clone(), + self.req_receiver + .take() + .expect("Must be provided on `new` and we take ownership here. qed."), + self.authority_discovery.clone(), + self.metrics.clone(), + ); + ctx.spawn("disputes-receiver", receiver.run().boxed()) + .map_err(Fatal::SpawnTask)?; + loop { let message = MuxedMessage::receive(&mut ctx, &mut self.sender_rx).await; match message { @@ -202,18 +229,6 @@ where match msg { DisputeDistributionMessage::SendDispute(dispute_msg) => self.disputes_sender.start_sender(ctx, &mut self.runtime, dispute_msg).await?, - // This message will only arrive once: - DisputeDistributionMessage::DisputeSendingReceiver(receiver) => { - let receiver = DisputesReceiver::new( - ctx.sender().clone(), - receiver, - self.authority_discovery.clone(), - self.metrics.clone(), - ); - - ctx.spawn("disputes-receiver", receiver.run().boxed()) - .map_err(Fatal::SpawnTask)?; - }, } Ok(()) } diff --git a/node/network/dispute-distribution/src/receiver/error.rs b/node/network/dispute-distribution/src/receiver/error.rs index 9134adf95769..e9c92f171dd3 100644 --- a/node/network/dispute-distribution/src/receiver/error.rs +++ b/node/network/dispute-distribution/src/receiver/error.rs @@ -19,43 +19,48 @@ use thiserror::Error; -use polkadot_node_network_protocol::{request_response::request::ReceiveError, PeerId}; -use polkadot_node_subsystem_util::{runtime, unwrap_non_fatal, Fault}; +use polkadot_node_network_protocol::{request_response::incoming, PeerId}; +use polkadot_node_subsystem_util::runtime; use crate::LOG_TARGET; -#[derive(Debug, Error)] +#[derive(Debug, Error, derive_more::From)] #[error(transparent)] -pub struct Error(pub Fault); - -impl From for Error { - fn from(e: NonFatal) -> Self { - Self(Fault::from_non_fatal(e)) - } +pub enum Error { + /// All fatal errors. + Fatal(Fatal), + /// All nonfatal/potentially recoverable errors. + NonFatal(NonFatal), } -impl From for Error { - fn from(f: Fatal) -> Self { - Self(Fault::from_fatal(f)) +impl From for Error { + fn from(o: runtime::Error) -> Self { + match o { + runtime::Error::Fatal(f) => Self::Fatal(Fatal::Runtime(f)), + runtime::Error::NonFatal(f) => Self::NonFatal(NonFatal::Runtime(f)), + } } } -impl From for Error { - fn from(o: runtime::Error) -> Self { - Self(Fault::from_other(o)) +impl From for Error { + fn from(o: incoming::Error) -> Self { + match o { + incoming::Error::Fatal(f) => Self::Fatal(Fatal::IncomingRequest(f)), + incoming::Error::NonFatal(f) => Self::NonFatal(NonFatal::IncomingRequest(f)), + } } } /// Fatal errors of this subsystem. #[derive(Debug, Error)] pub enum Fatal { - /// Request channel returned `None`. Likely a system shutdown. - #[error("Request channel stream finished.")] - RequestChannelFinished, - /// Errors coming from runtime::Runtime. #[error("Error while accessing runtime information")] Runtime(#[from] runtime::Fatal), + + /// Errors coming from receiving incoming requests. + #[error("Retrieving next incoming request failed.")] + IncomingRequest(#[from] incoming::Fatal), } /// Non-fatal errors of this subsystem. @@ -65,10 +70,6 @@ pub enum NonFatal { #[error("Sending back response to peer {0} failed.")] SendResponse(PeerId), - /// Getting request from raw request failed. - #[error("Decoding request failed.")] - FromRawRequest(#[source] ReceiveError), - /// Setting reputation for peer failed. #[error("Changing peer's ({0}) reputation failed.")] SetPeerReputation(PeerId), @@ -88,20 +89,27 @@ pub enum NonFatal { /// Errors coming from runtime::Runtime. #[error("Error while accessing runtime information")] Runtime(#[from] runtime::NonFatal), + + /// Errors coming from receiving incoming requests. + #[error("Retrieving next incoming request failed.")] + IncomingRequest(#[from] incoming::NonFatal), } pub type Result = std::result::Result; -pub type FatalResult = std::result::Result; pub type NonFatalResult = std::result::Result; /// Utility for eating top level errors and log them. /// /// We basically always want to try and continue on error. This utility function is meant to -/// consume top-level errors by simply logging them +/// consume top-level errors by simply logging them. pub fn log_error(result: Result<()>) -> std::result::Result<(), Fatal> { - if let Some(error) = unwrap_non_fatal(result.map_err(|e| e.0))? { - tracing::warn!(target: LOG_TARGET, error = ?error); + match result { + Err(Error::Fatal(f)) => Err(f), + Err(Error::NonFatal(error)) => { + tracing::warn!(target: LOG_TARGET, error = ?error); + Ok(()) + }, + Ok(()) => Ok(()), } - Ok(()) } diff --git a/node/network/dispute-distribution/src/receiver/mod.rs b/node/network/dispute-distribution/src/receiver/mod.rs index 1d7c61164acf..5bd8b6712ba8 100644 --- a/node/network/dispute-distribution/src/receiver/mod.rs +++ b/node/network/dispute-distribution/src/receiver/mod.rs @@ -21,19 +21,20 @@ use std::{ }; use futures::{ - channel::{mpsc, oneshot}, + channel::oneshot, future::{poll_fn, BoxFuture}, + pin_mut, stream::{FusedStream, FuturesUnordered, StreamExt}, - FutureExt, Stream, + Future, FutureExt, Stream, }; use lru::LruCache; use polkadot_node_network_protocol::{ authority_discovery::AuthorityDiscovery, request_response::{ - request::{OutgoingResponse, OutgoingResponseSender}, + incoming::{OutgoingResponse, OutgoingResponseSender}, v1::{DisputeRequest, DisputeResponse}, - IncomingRequest, + IncomingRequest, IncomingRequestReceiver, }, PeerId, UnifiedReputationChange as Rep, }; @@ -50,7 +51,7 @@ use crate::{ }; mod error; -use self::error::{log_error, Fatal, FatalResult, NonFatal, NonFatalResult, Result}; +use self::error::{log_error, NonFatal, NonFatalResult, Result}; const COST_INVALID_REQUEST: Rep = Rep::CostMajor("Received message could not be decoded."); const COST_INVALID_SIGNATURE: Rep = Rep::Malicious("Signatures were invalid."); @@ -72,7 +73,7 @@ pub struct DisputesReceiver { sender: Sender, /// Channel to retrieve incoming requests from. - receiver: mpsc::Receiver, + receiver: IncomingRequestReceiver, /// Authority discovery service: authority_discovery: AD, @@ -103,26 +104,27 @@ enum MuxedMessage { ConfirmedImport(NonFatalResult<(PeerId, ImportStatementsResult)>), /// A new request has arrived and should be handled. - NewRequest(sc_network::config::IncomingRequest), + NewRequest(IncomingRequest), } impl MuxedMessage { async fn receive( pending_imports: &mut PendingImports, - pending_requests: &mut mpsc::Receiver, - ) -> FatalResult { + pending_requests: &mut IncomingRequestReceiver, + ) -> Result { poll_fn(|ctx| { - if let Poll::Ready(v) = pending_requests.poll_next_unpin(ctx) { - let r = match v { - None => Err(Fatal::RequestChannelFinished), - Some(msg) => Ok(MuxedMessage::NewRequest(msg)), - }; - return Poll::Ready(r) + let next_req = pending_requests.recv(|| vec![COST_INVALID_REQUEST]); + pin_mut!(next_req); + if let Poll::Ready(r) = next_req.poll(ctx) { + return match r { + Err(e) => Poll::Ready(Err(e.into())), + Ok(v) => Poll::Ready(Ok(Self::NewRequest(v))), + } } // In case of Ready(None) return `Pending` below - we want to wait for the next request // in that case. if let Poll::Ready(Some(v)) = pending_imports.poll_next_unpin(ctx) { - return Poll::Ready(Ok(MuxedMessage::ConfirmedImport(v))) + return Poll::Ready(Ok(Self::ConfirmedImport(v))) } Poll::Pending }) @@ -137,7 +139,7 @@ where /// Create a new receiver which can be `run`. pub fn new( sender: Sender, - receiver: mpsc::Receiver, + receiver: IncomingRequestReceiver, authority_discovery: AD, metrics: Metrics, ) -> Self { @@ -165,17 +167,14 @@ where loop { match log_error(self.run_inner().await) { Ok(()) => {}, - Err(Fatal::RequestChannelFinished) => { + Err(fatal) => { tracing::debug!( target: LOG_TARGET, - "Incoming request stream exhausted - shutting down?" + error = ?fatal, + "Shutting down" ); return }, - Err(err) => { - tracing::warn!(target: LOG_TARGET, ?err, "Dispute receiver died."); - return - }, } } } @@ -184,7 +183,7 @@ where async fn run_inner(&mut self) -> Result<()> { let msg = MuxedMessage::receive(&mut self.pending_imports, &mut self.receiver).await?; - let raw = match msg { + let incoming = match msg { // We need to clean up futures, to make sure responses are sent: MuxedMessage::ConfirmedImport(m_bad) => { self.ban_bad_peer(m_bad)?; @@ -195,14 +194,14 @@ where self.metrics.on_received_request(); - let peer = raw.peer; + let peer = incoming.peer; // Only accept messages from validators: - if self.authority_discovery.get_authority_id_by_peer_id(raw.peer).await.is_none() { - raw.pending_response - .send(sc_network::config::OutgoingResponse { + if self.authority_discovery.get_authority_id_by_peer_id(peer).await.is_none() { + incoming + .send_outgoing_response(OutgoingResponse { result: Err(()), - reputation_changes: vec![COST_NOT_A_VALIDATOR.into_base_rep()], + reputation_changes: vec![COST_NOT_A_VALIDATOR], sent_feedback: None, }) .map_err(|_| NonFatal::SendResponse(peer))?; @@ -210,10 +209,6 @@ where return Err(NonFatal::NotAValidator(peer).into()) } - let incoming = - IncomingRequest::::try_from_raw(raw, vec![COST_INVALID_REQUEST]) - .map_err(NonFatal::FromRawRequest)?; - // Immediately drop requests from peers that already have requests in flight or have // been banned recently (flood protection): if self.pending_imports.peer_is_pending(&peer) || self.banned_peers.contains(&peer) { diff --git a/node/network/dispute-distribution/src/sender/error.rs b/node/network/dispute-distribution/src/sender/error.rs index 72bba74b6001..4961fc5685b1 100644 --- a/node/network/dispute-distribution/src/sender/error.rs +++ b/node/network/dispute-distribution/src/sender/error.rs @@ -20,33 +20,30 @@ use thiserror::Error; use polkadot_node_primitives::disputes::DisputeMessageCheckError; -use polkadot_node_subsystem_util::{runtime, Fault}; +use polkadot_node_subsystem_util::runtime; use polkadot_subsystem::SubsystemError; -#[derive(Debug, Error)] +#[derive(Debug, Error, derive_more::From)] #[error(transparent)] -pub struct Error(pub Fault); - -impl From for Error { - fn from(e: NonFatal) -> Self { - Self(Fault::from_non_fatal(e)) - } -} - -impl From for Error { - fn from(f: Fatal) -> Self { - Self(Fault::from_fatal(f)) - } +pub enum Error { + /// All fatal errors. + Fatal(Fatal), + /// All nonfatal/potentially recoverable errors. + NonFatal(NonFatal), } impl From for Error { fn from(o: runtime::Error) -> Self { - Self(Fault::from_other(o)) + match o { + runtime::Error::Fatal(f) => Self::Fatal(Fatal::Runtime(f)), + runtime::Error::NonFatal(f) => Self::NonFatal(NonFatal::Runtime(f)), + } } } /// Fatal errors of this subsystem. #[derive(Debug, Error)] +#[error(transparent)] pub enum Fatal { /// Spawning a running task failed. #[error("Spawning subsystem task failed")] diff --git a/node/network/dispute-distribution/src/tests/mod.rs b/node/network/dispute-distribution/src/tests/mod.rs index 9eb6363c3b95..3a8c742f7ef4 100644 --- a/node/network/dispute-distribution/src/tests/mod.rs +++ b/node/network/dispute-distribution/src/tests/mod.rs @@ -28,7 +28,12 @@ use futures::{ use futures_timer::Delay; use parity_scale_codec::{Decode, Encode}; -use polkadot_node_network_protocol::{request_response::v1::DisputeRequest, PeerId}; +use sc_network::config::RequestResponseConfig; + +use polkadot_node_network_protocol::{ + request_response::{v1::DisputeRequest, IncomingRequest}, + PeerId, +}; use sp_keyring::Sr25519Keyring; use polkadot_node_network_protocol::{ @@ -62,8 +67,8 @@ pub mod mock; #[test] fn send_dispute_sends_dispute() { - let test = |mut handle: TestSubsystemContextHandle| async move { - let (_, _) = handle_subsystem_startup(&mut handle, None).await; + let test = |mut handle: TestSubsystemContextHandle, _req_cfg| async move { + let _ = handle_subsystem_startup(&mut handle, None).await; let relay_parent = Hash::random(); let candidate = make_candidate_receipt(relay_parent); @@ -110,8 +115,10 @@ fn send_dispute_sends_dispute() { #[test] fn received_request_triggers_import() { - let test = |mut handle: TestSubsystemContextHandle| async move { - let (_, mut req_tx) = handle_subsystem_startup(&mut handle, None).await; + let test = |mut handle: TestSubsystemContextHandle, + mut req_cfg: RequestResponseConfig| async move { + let req_tx = req_cfg.inbound_queue.as_mut().unwrap(); + let _ = handle_subsystem_startup(&mut handle, None).await; let relay_parent = Hash::random(); let candidate = make_candidate_receipt(relay_parent); @@ -119,8 +126,7 @@ fn received_request_triggers_import() { // Non validator request should get dropped: let rx_response = - send_network_dispute_request(&mut req_tx, PeerId::random(), message.clone().into()) - .await; + send_network_dispute_request(req_tx, PeerId::random(), message.clone().into()).await; assert_matches!( rx_response.await, @@ -141,7 +147,7 @@ fn received_request_triggers_import() { // subsequent requests should get dropped. nested_network_dispute_request( &mut handle, - &mut req_tx, + req_tx, MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Alice), message.clone().into(), ImportStatementsResult::InvalidImport, @@ -208,7 +214,7 @@ fn received_request_triggers_import() { // Subsequent sends from Alice should fail (peer is banned): { let rx_response = send_network_dispute_request( - &mut req_tx, + req_tx, MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Alice), message.clone().into(), ) @@ -229,7 +235,7 @@ fn received_request_triggers_import() { // But should work fine for Bob: nested_network_dispute_request( &mut handle, - &mut req_tx, + req_tx, MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Bob), message.clone().into(), ImportStatementsResult::ValidImport, @@ -246,11 +252,11 @@ fn received_request_triggers_import() { #[test] fn disputes_are_recovered_at_startup() { - let test = |mut handle: TestSubsystemContextHandle| async move { + let test = |mut handle: TestSubsystemContextHandle, _| async move { let relay_parent = Hash::random(); let candidate = make_candidate_receipt(relay_parent); - let (_, _) = handle_subsystem_startup(&mut handle, Some(candidate.hash())).await; + let _ = handle_subsystem_startup(&mut handle, Some(candidate.hash())).await; let message = make_dispute_message(candidate.clone(), ALICE_INDEX, FERDIE_INDEX).await; // Requests needed session info: @@ -302,8 +308,8 @@ fn disputes_are_recovered_at_startup() { #[test] fn send_dispute_gets_cleaned_up() { - let test = |mut handle: TestSubsystemContextHandle| async move { - let (old_head, _) = handle_subsystem_startup(&mut handle, None).await; + let test = |mut handle: TestSubsystemContextHandle, _| async move { + let old_head = handle_subsystem_startup(&mut handle, None).await; let relay_parent = Hash::random(); let candidate = make_candidate_receipt(relay_parent); @@ -367,8 +373,8 @@ fn send_dispute_gets_cleaned_up() { #[test] fn dispute_retries_and_works_across_session_boundaries() { - let test = |mut handle: TestSubsystemContextHandle| async move { - let (old_head, _) = handle_subsystem_startup(&mut handle, None).await; + let test = |mut handle: TestSubsystemContextHandle, _| async move { + let old_head = handle_subsystem_startup(&mut handle, None).await; let relay_parent = Hash::random(); let candidate = make_candidate_receipt(relay_parent); @@ -689,14 +695,7 @@ async fn check_sent_requests( async fn handle_subsystem_startup( handle: &mut TestSubsystemContextHandle, ongoing_dispute: Option, -) -> (Hash, mpsc::Sender) { - let (request_tx, request_rx) = mpsc::channel(5); - handle - .send(FromOverseer::Communication { - msg: DisputeDistributionMessage::DisputeSendingReceiver(request_rx), - }) - .await; - +) -> Hash { let relay_parent = Hash::random(); activate_leaf( handle, @@ -707,7 +706,7 @@ async fn handle_subsystem_startup( ongoing_dispute.into_iter().map(|c| (MOCK_SESSION_INDEX, c)).collect(), ) .await; - (relay_parent, request_tx) + relay_parent } /// Launch subsystem and provided test function @@ -715,14 +714,19 @@ async fn handle_subsystem_startup( /// which simulates the overseer. fn test_harness(test: TestFn) where - TestFn: FnOnce(TestSubsystemContextHandle) -> Fut, + TestFn: FnOnce( + TestSubsystemContextHandle, + RequestResponseConfig, + ) -> Fut, Fut: Future, { sp_tracing::try_init_simple(); let keystore = make_ferdie_keystore(); + let (req_receiver, req_cfg) = IncomingRequest::get_config_receiver(); let subsystem = DisputeDistributionSubsystem::new( keystore, + req_receiver, MOCK_AUTHORITY_DISCOVERY.clone(), Metrics::new_dummy(), ); @@ -739,5 +743,5 @@ where }, } }; - subsystem_test_harness(test, subsystem); + subsystem_test_harness(|handle| test(handle, req_cfg), subsystem); } diff --git a/node/network/protocol/Cargo.toml b/node/network/protocol/Cargo.toml index 4b6292aeefcc..6fbdc4268ee6 100644 --- a/node/network/protocol/Cargo.toml +++ b/node/network/protocol/Cargo.toml @@ -14,5 +14,6 @@ parity-scale-codec = { version = "2.0.0", default-features = false, features = [ sc-network = { git = "https://github.com/paritytech/substrate", branch = "master" } sc-authority-discovery = { git = "https://github.com/paritytech/substrate", branch = "master" } strum = { version = "0.20", features = ["derive"] } +derive_more = "0.99.11" futures = "0.3.15" thiserror = "1.0.26" diff --git a/node/network/protocol/src/request_response/incoming/error.rs b/node/network/protocol/src/request_response/incoming/error.rs new file mode 100644 index 000000000000..d7ffe6b1fd4c --- /dev/null +++ b/node/network/protocol/src/request_response/incoming/error.rs @@ -0,0 +1,55 @@ +// Copyright 2021 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! Error handling related code and Error/Result definitions. + +use sc_network::PeerId; +use thiserror::Error; + +use parity_scale_codec::Error as DecodingError; + +/// Errors that happen during reception/decoding of incoming requests. +#[derive(Debug, Error, derive_more::From)] +#[error(transparent)] +pub enum Error { + /// All fatal errors. + Fatal(Fatal), + /// All nonfatal/potentially recoverable errors. + NonFatal(NonFatal), +} + +/// Fatal errors when receiving incoming requests. +#[derive(Debug, Error)] +pub enum Fatal { + /// Incoming request stream exhausted. Should only happen on shutdown. + #[error("Incoming request channel got closed.")] + RequestChannelExhausted, +} + +/// Non-fatal errors when receiving incoming requests. +#[derive(Debug, Error)] +pub enum NonFatal { + /// Decoding failed, we were able to change the peer's reputation accordingly. + #[error("Decoding request failed for peer {0}.")] + DecodingError(PeerId, #[source] DecodingError), + + /// Decoding failed, but sending reputation change failed. + #[error("Decoding request failed for peer {0}, and changing reputation failed.")] + DecodingErrorNoReputationChange(PeerId, #[source] DecodingError), +} + +/// General result based on above `Error`. +pub type Result = std::result::Result; diff --git a/node/network/protocol/src/request_response/incoming/mod.rs b/node/network/protocol/src/request_response/incoming/mod.rs new file mode 100644 index 000000000000..efc24babd23e --- /dev/null +++ b/node/network/protocol/src/request_response/incoming/mod.rs @@ -0,0 +1,232 @@ +// Copyright 2021 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use std::marker::PhantomData; + +use futures::{ + channel::{mpsc, oneshot}, + StreamExt, +}; + +use parity_scale_codec::{Decode, Encode}; + +use sc_network::{config as netconfig, config::RequestResponseConfig, PeerId}; + +use super::IsRequest; +use crate::UnifiedReputationChange; + +mod error; +pub use error::{Error, Fatal, NonFatal, Result}; + +/// A request coming in, including a sender for sending responses. +/// +/// Typed `IncomingRequest`s, see `IncomingRequest::get_config_receiver` and substrate +/// `NetworkConfiguration` for more information. +#[derive(Debug)] +pub struct IncomingRequest { + /// `PeerId` of sending peer. + pub peer: PeerId, + /// The sent request. + pub payload: Req, + /// Sender for sending response back. + pub pending_response: OutgoingResponseSender, +} + +impl IncomingRequest +where + Req: IsRequest + Decode + Encode, + Req::Response: Encode, +{ + /// Create configuration for `NetworkConfiguration::request_response_porotocols` and a + /// corresponding typed receiver. + /// + /// This Register that config with substrate networking and receive incoming requests via the + /// returned `IncomingRequestReceiver`. + pub fn get_config_receiver() -> (IncomingRequestReceiver, RequestResponseConfig) { + let (raw, cfg) = Req::PROTOCOL.get_config(); + (IncomingRequestReceiver { raw, phantom: PhantomData {} }, cfg) + } + + /// Create new `IncomingRequest`. + pub fn new( + peer: PeerId, + payload: Req, + pending_response: oneshot::Sender, + ) -> Self { + Self { + peer, + payload, + pending_response: OutgoingResponseSender { pending_response, phantom: PhantomData {} }, + } + } + + /// Try building from raw substrate request. + /// + /// This function will fail if the request cannot be decoded and will apply passed in + /// reputation changes in that case. + /// + /// Params: + /// - The raw request to decode + /// - Reputation changes to apply for the peer in case decoding fails. + fn try_from_raw( + raw: sc_network::config::IncomingRequest, + reputation_changes: Vec, + ) -> std::result::Result { + let sc_network::config::IncomingRequest { payload, peer, pending_response } = raw; + let payload = match Req::decode(&mut payload.as_ref()) { + Ok(payload) => payload, + Err(err) => { + let reputation_changes = + reputation_changes.into_iter().map(|r| r.into_base_rep()).collect(); + let response = sc_network::config::OutgoingResponse { + result: Err(()), + reputation_changes, + sent_feedback: None, + }; + + if let Err(_) = pending_response.send(response) { + return Err(NonFatal::DecodingErrorNoReputationChange(peer, err)) + } + return Err(NonFatal::DecodingError(peer, err)) + }, + }; + Ok(Self::new(peer, payload, pending_response)) + } + + /// Convert into raw untyped substrate `IncomingRequest`. + /// + /// This is mostly useful for testing. + pub fn into_raw(self) -> sc_network::config::IncomingRequest { + sc_network::config::IncomingRequest { + peer: self.peer, + payload: self.payload.encode(), + pending_response: self.pending_response.pending_response, + } + } + + /// Send the response back. + /// + /// Calls [`OutgoingResponseSender::send_response`]. + pub fn send_response(self, resp: Req::Response) -> std::result::Result<(), Req::Response> { + self.pending_response.send_response(resp) + } + + /// Send response with additional options. + /// + /// Calls [`OutgoingResponseSender::send_outgoing_response`]. + pub fn send_outgoing_response( + self, + resp: OutgoingResponse<::Response>, + ) -> std::result::Result<(), ()> { + self.pending_response.send_outgoing_response(resp) + } +} + +/// Sender for sending back responses on an `IncomingRequest`. +#[derive(Debug)] +pub struct OutgoingResponseSender { + pending_response: oneshot::Sender, + phantom: PhantomData, +} + +impl OutgoingResponseSender +where + Req: IsRequest + Decode, + Req::Response: Encode, +{ + /// Send the response back. + /// + /// On success we return `Ok(())`, on error we return the not sent `Response`. + /// + /// `netconfig::OutgoingResponse` exposes a way of modifying the peer's reputation. If needed we + /// can change this function to expose this feature as well. + pub fn send_response(self, resp: Req::Response) -> std::result::Result<(), Req::Response> { + self.pending_response + .send(netconfig::OutgoingResponse { + result: Ok(resp.encode()), + reputation_changes: Vec::new(), + sent_feedback: None, + }) + .map_err(|_| resp) + } + + /// Send response with additional options. + /// + /// This variant allows for waiting for the response to be sent out, allows for changing peer's + /// reputation and allows for not sending a response at all (for only changing the peer's + /// reputation). + pub fn send_outgoing_response( + self, + resp: OutgoingResponse<::Response>, + ) -> std::result::Result<(), ()> { + let OutgoingResponse { result, reputation_changes, sent_feedback } = resp; + + let response = netconfig::OutgoingResponse { + result: result.map(|v| v.encode()), + reputation_changes: reputation_changes.into_iter().map(|c| c.into_base_rep()).collect(), + sent_feedback, + }; + + self.pending_response.send(response).map_err(|_| ()) + } +} + +/// Typed variant of [`netconfig::OutgoingResponse`]. +/// +/// Responses to `IncomingRequest`s. +pub struct OutgoingResponse { + /// The payload of the response. + /// + /// `Err(())` if none is available e.g. due to an error while handling the request. + pub result: std::result::Result, + + /// Reputation changes accrued while handling the request. To be applied to the reputation of + /// the peer sending the request. + pub reputation_changes: Vec, + + /// If provided, the `oneshot::Sender` will be notified when the request has been sent to the + /// peer. + pub sent_feedback: Option>, +} + +/// Receiver for incoming requests. +/// +/// Takes care of decoding and handling of invalid encoded requests. +pub struct IncomingRequestReceiver { + raw: mpsc::Receiver, + phantom: PhantomData, +} + +impl IncomingRequestReceiver +where + Req: IsRequest + Decode + Encode, + Req::Response: Encode, +{ + /// Try to receive the next incoming request. + /// + /// Any received request will be decoded, on decoding errors the provided reputation changes + /// will be applied and an error will be reported. + pub async fn recv(&mut self, reputation_changes: F) -> Result> + where + F: FnOnce() -> Vec, + { + let req = match self.raw.next().await { + None => return Err(Fatal::RequestChannelExhausted.into()), + Some(raw) => IncomingRequest::::try_from_raw(raw, reputation_changes())?, + }; + Ok(req) + } +} diff --git a/node/network/protocol/src/request_response/mod.rs b/node/network/protocol/src/request_response/mod.rs index e2b2464f5d54..4f922f553993 100644 --- a/node/network/protocol/src/request_response/mod.rs +++ b/node/network/protocol/src/request_response/mod.rs @@ -40,11 +40,14 @@ use strum::EnumIter; pub use sc_network::{config as network, config::RequestResponseConfig}; -/// All requests that can be sent to the network bridge. -pub mod request; -pub use request::{ - IncomingRequest, OutgoingRequest, OutgoingResult, Recipient, Requests, ResponseSender, -}; +/// Everything related to handling of incoming requests. +pub mod incoming; +/// Everything related to handling of outgoing requests. +pub mod outgoing; + +pub use incoming::{IncomingRequest, IncomingRequestReceiver}; + +pub use outgoing::{OutgoingRequest, OutgoingResult, Recipient, Requests, ResponseSender}; ///// Multiplexer for incoming requests. // pub mod multiplexer; @@ -248,3 +251,12 @@ impl Protocol { } } } + +/// Common properties of any `Request`. +pub trait IsRequest { + /// Each request has a corresponding `Response`. + type Response; + + /// What protocol this `Request` implements. + const PROTOCOL: Protocol; +} diff --git a/node/network/protocol/src/request_response/request.rs b/node/network/protocol/src/request_response/outgoing.rs similarity index 54% rename from node/network/protocol/src/request_response/request.rs rename to node/network/protocol/src/request_response/outgoing.rs index 8e39c7204ebd..38e3c44c7dae 100644 --- a/node/network/protocol/src/request_response/request.rs +++ b/node/network/protocol/src/request_response/outgoing.rs @@ -14,32 +14,17 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use std::marker::PhantomData; - use futures::{channel::oneshot, prelude::Future}; +use thiserror::Error; use parity_scale_codec::{Decode, Encode, Error as DecodingError}; + use sc_network as network; -use sc_network::{config as netconfig, PeerId}; -use thiserror::Error; +use sc_network::PeerId; use polkadot_primitives::v1::AuthorityDiscoveryId; -use crate::UnifiedReputationChange; - -use super::{v1, Protocol}; - -/// Used by the network to send us a response to a request. -pub type ResponseSender = oneshot::Sender, network::RequestFailure>>; - -/// Common properties of any `Request`. -pub trait IsRequest { - /// Each request has a corresponding `Response`. - type Response; - - /// What protocol this `Request` implements. - const PROTOCOL: Protocol; -} +use super::{v1, IsRequest, Protocol}; /// All requests that can be sent to the network bridge via `NetworkBridgeMessage::SendRequest`. #[derive(Debug)] @@ -90,13 +75,23 @@ impl Requests { } } -/// Potential recipients of an outgoing request. -#[derive(Debug, Eq, Hash, PartialEq, Clone)] -pub enum Recipient { - /// Recipient is a regular peer and we know its peer id. - Peer(PeerId), - /// Recipient is a validator, we address it via this `AuthorityDiscoveryId`. - Authority(AuthorityDiscoveryId), +/// Used by the network to send us a response to a request. +pub type ResponseSender = oneshot::Sender, network::RequestFailure>>; + +/// Any error that can occur when sending a request. +#[derive(Debug, Error)] +pub enum RequestError { + /// Response could not be decoded. + #[error("Response could not be decoded")] + InvalidResponse(#[source] DecodingError), + + /// Some error in substrate/libp2p happened. + #[error("Some network error occurred")] + NetworkError(#[source] network::RequestFailure), + + /// Response got canceled by networking. + #[error("Response channel got canceled")] + Canceled(#[source] oneshot::Canceled), } /// A request to be sent to the network bridge, including a sender for sending responses/failures. @@ -119,32 +114,13 @@ pub struct OutgoingRequest { pub pending_response: ResponseSender, } -/// Any error that can occur when sending a request. -#[derive(Debug, Error)] -pub enum RequestError { - /// Response could not be decoded. - #[error("Response could not be decoded")] - InvalidResponse(#[source] DecodingError), - - /// Some error in substrate/libp2p happened. - #[error("Some network error occurred")] - NetworkError(#[source] network::RequestFailure), - - /// Response got canceled by networking. - #[error("Response channel got canceled")] - Canceled(#[source] oneshot::Canceled), -} - -/// Things that can go wrong when decoding an incoming request. -#[derive(Debug, Error)] -pub enum ReceiveError { - /// Decoding failed, we were able to change the peer's reputation accordingly. - #[error("Decoding request failed for peer {0}.")] - DecodingError(PeerId, #[source] DecodingError), - - /// Decoding failed, but sending reputation change failed. - #[error("Decoding request failed for peer {0}, and changing reputation failed.")] - DecodingErrorNoReputationChange(PeerId, #[source] DecodingError), +/// Potential recipients of an outgoing request. +#[derive(Debug, Eq, Hash, PartialEq, Clone)] +pub enum Recipient { + /// Recipient is a regular peer and we know its peer id. + Peer(PeerId), + /// Recipient is a validator, we address it via this `AuthorityDiscoveryId`. + Authority(AuthorityDiscoveryId), } /// Responses received for an `OutgoingRequest`. @@ -179,6 +155,18 @@ where } } +/// Future for actually receiving a typed response for an `OutgoingRequest`. +async fn receive_response( + rec: oneshot::Receiver, network::RequestFailure>>, +) -> OutgoingResult +where + Req: IsRequest, + Req::Response: Decode, +{ + let raw = rec.await??; + Ok(Decode::decode(&mut raw.as_ref())?) +} + impl From for RequestError { fn from(err: DecodingError) -> Self { Self::InvalidResponse(err) @@ -196,164 +184,3 @@ impl From for RequestError { Self::Canceled(err) } } - -/// A request coming in, including a sender for sending responses. -/// -/// `IncomingRequest`s are produced by `RequestMultiplexer` on behalf of the network bridge. -#[derive(Debug)] -pub struct IncomingRequest { - /// `PeerId` of sending peer. - pub peer: PeerId, - /// The sent request. - pub payload: Req, - /// Sender for sending response back. - pub pending_response: OutgoingResponseSender, -} - -/// Sender for sending back responses on an `IncomingRequest`. -#[derive(Debug)] -pub struct OutgoingResponseSender { - pending_response: oneshot::Sender, - phantom: PhantomData, -} - -impl OutgoingResponseSender -where - Req: IsRequest + Decode, - Req::Response: Encode, -{ - /// Send the response back. - /// - /// On success we return `Ok(())`, on error we return the not sent `Response`. - /// - /// `netconfig::OutgoingResponse` exposes a way of modifying the peer's reputation. If needed we - /// can change this function to expose this feature as well. - pub fn send_response(self, resp: Req::Response) -> Result<(), Req::Response> { - self.pending_response - .send(netconfig::OutgoingResponse { - result: Ok(resp.encode()), - reputation_changes: Vec::new(), - sent_feedback: None, - }) - .map_err(|_| resp) - } - - /// Send response with additional options. - /// - /// This variant allows for waiting for the response to be sent out, allows for changing peer's - /// reputation and allows for not sending a response at all (for only changing the peer's - /// reputation). - pub fn send_outgoing_response( - self, - resp: OutgoingResponse<::Response>, - ) -> Result<(), ()> { - let OutgoingResponse { result, reputation_changes, sent_feedback } = resp; - - let response = netconfig::OutgoingResponse { - result: result.map(|v| v.encode()), - reputation_changes: reputation_changes.into_iter().map(|c| c.into_base_rep()).collect(), - sent_feedback, - }; - - self.pending_response.send(response).map_err(|_| ()) - } -} - -/// Typed variant of [`netconfig::OutgoingResponse`]. -/// -/// Responses to `IncomingRequest`s. -pub struct OutgoingResponse { - /// The payload of the response. - /// - /// `Err(())` if none is available e.g. due an error while handling the request. - pub result: Result, - - /// Reputation changes accrued while handling the request. To be applied to the reputation of - /// the peer sending the request. - pub reputation_changes: Vec, - - /// If provided, the `oneshot::Sender` will be notified when the request has been sent to the - /// peer. - pub sent_feedback: Option>, -} - -impl IncomingRequest -where - Req: IsRequest + Decode, - Req::Response: Encode, -{ - /// Create new `IncomingRequest`. - pub fn new( - peer: PeerId, - payload: Req, - pending_response: oneshot::Sender, - ) -> Self { - Self { - peer, - payload, - pending_response: OutgoingResponseSender { pending_response, phantom: PhantomData {} }, - } - } - - /// Try building from raw substrate request. - /// - /// This function will fail if the request cannot be decoded and will apply passed in - /// reputation changes in that case. - /// - /// Params: - /// - The raw request to decode - /// - Reputation changes to apply for the peer in case decoding fails. - pub fn try_from_raw( - raw: sc_network::config::IncomingRequest, - reputation_changes: Vec, - ) -> Result { - let sc_network::config::IncomingRequest { payload, peer, pending_response } = raw; - let payload = match Req::decode(&mut payload.as_ref()) { - Ok(payload) => payload, - Err(err) => { - let reputation_changes = - reputation_changes.into_iter().map(|r| r.into_base_rep()).collect(); - let response = sc_network::config::OutgoingResponse { - result: Err(()), - reputation_changes, - sent_feedback: None, - }; - - if let Err(_) = pending_response.send(response) { - return Err(ReceiveError::DecodingErrorNoReputationChange(peer, err)) - } - return Err(ReceiveError::DecodingError(peer, err)) - }, - }; - Ok(Self::new(peer, payload, pending_response)) - } - - /// Send the response back. - /// - /// Calls [`OutgoingResponseSender::send_response`]. - pub fn send_response(self, resp: Req::Response) -> Result<(), Req::Response> { - self.pending_response.send_response(resp) - } - - /// Send response with additional options. - /// - /// Calls [`OutgoingResponseSender::send_outgoing_response`]. - pub fn send_outgoing_response( - self, - resp: OutgoingResponse<::Response>, - ) -> Result<(), ()> { - self.pending_response.send_outgoing_response(resp) - } -} - -/// Future for actually receiving a typed response for an `OutgoingRequest`. -async fn receive_response( - rec: oneshot::Receiver, network::RequestFailure>>, -) -> OutgoingResult -where - Req: IsRequest, - Req::Response: Decode, -{ - let raw = rec.await??; - Ok(Decode::decode(&mut raw.as_ref())?) -} diff --git a/node/network/protocol/src/request_response/v1.rs b/node/network/protocol/src/request_response/v1.rs index c94fcc79d6e5..184bcf5f030e 100644 --- a/node/network/protocol/src/request_response/v1.rs +++ b/node/network/protocol/src/request_response/v1.rs @@ -25,7 +25,7 @@ use polkadot_primitives::v1::{ CandidateHash, CandidateReceipt, CommittedCandidateReceipt, Hash, Id as ParaId, ValidatorIndex, }; -use super::{request::IsRequest, Protocol}; +use super::{IsRequest, Protocol}; /// Request an availability chunk. #[derive(Debug, Copy, Clone, Encode, Decode)] diff --git a/node/network/statement-distribution/Cargo.toml b/node/network/statement-distribution/Cargo.toml index 14ec199a7fc8..e2fe56ca2dc6 100644 --- a/node/network/statement-distribution/Cargo.toml +++ b/node/network/statement-distribution/Cargo.toml @@ -11,7 +11,6 @@ tracing = "0.1.26" polkadot-primitives = { path = "../../../primitives" } sp-staking = { git = "https://github.com/paritytech/substrate", branch = "master", default-features = false } sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" } -sc-network = { git = "https://github.com/paritytech/substrate", branch = "master" } polkadot-subsystem = { package = "polkadot-node-subsystem", path = "../../subsystem" } polkadot-node-primitives = { path = "../../primitives" } polkadot-node-subsystem-util = { path = "../../subsystem-util" } @@ -19,6 +18,7 @@ polkadot-node-network-protocol = { path = "../../network/protocol" } arrayvec = "0.5.2" indexmap = "1.7.0" parity-scale-codec = { version = "2.0.0", default-features = false, features = ["derive"] } +derive_more = "0.99.11" thiserror = "1.0.26" [dev-dependencies] @@ -30,4 +30,5 @@ sp-application-crypto = { git = "https://github.com/paritytech/substrate", branc sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" } sc-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" } +sc-network = { git = "https://github.com/paritytech/substrate", branch = "master" } futures-timer = "3.0.2" diff --git a/node/network/statement-distribution/src/error.rs b/node/network/statement-distribution/src/error.rs index 32adecf24f8b..4eb28eedc0ed 100644 --- a/node/network/statement-distribution/src/error.rs +++ b/node/network/statement-distribution/src/error.rs @@ -18,11 +18,11 @@ //! Error handling related code and Error/Result definitions. use polkadot_node_network_protocol::PeerId; +use polkadot_node_subsystem_util::runtime; use polkadot_primitives::v1::{CandidateHash, Hash}; use polkadot_subsystem::SubsystemError; -use thiserror::Error; -use polkadot_node_subsystem_util::{runtime, unwrap_non_fatal, Fault}; +use thiserror::Error; use crate::LOG_TARGET; @@ -34,29 +34,25 @@ pub type NonFatalResult = std::result::Result; pub type FatalResult = std::result::Result; /// Errors for statement distribution. -#[derive(Debug, Error)] +#[derive(Debug, Error, derive_more::From)] #[error(transparent)] -pub struct Error(pub Fault); - -impl From for Error { - fn from(e: NonFatal) -> Self { - Self(Fault::from_non_fatal(e)) - } -} - -impl From for Error { - fn from(f: Fatal) -> Self { - Self(Fault::from_fatal(f)) - } +pub enum Error { + /// Fatal errors of dispute distribution. + Fatal(Fatal), + /// Non fatal errors of dispute distribution. + NonFatal(NonFatal), } impl From for Error { fn from(o: runtime::Error) -> Self { - Self(Fault::from_other(o)) + match o { + runtime::Error::Fatal(f) => Self::Fatal(Fatal::Runtime(f)), + runtime::Error::NonFatal(f) => Self::NonFatal(NonFatal::Runtime(f)), + } } } -/// Fatal runtime errors. +/// Fatal errors. #[derive(Debug, Error)] pub enum Fatal { /// Requester channel is never closed. @@ -112,9 +108,13 @@ pub enum NonFatal { /// /// We basically always want to try and continue on error. This utility function is meant to /// consume top-level errors by simply logging them. -pub fn log_error(result: Result<()>, ctx: &'static str) -> FatalResult<()> { - if let Some(error) = unwrap_non_fatal(result.map_err(|e| e.0))? { - tracing::debug!(target: LOG_TARGET, error = ?error, ctx) +pub fn log_error(result: Result<()>, ctx: &'static str) -> std::result::Result<(), Fatal> { + match result { + Err(Error::Fatal(f)) => Err(f), + Err(Error::NonFatal(error)) => { + tracing::warn!(target: LOG_TARGET, error = ?error, ctx); + Ok(()) + }, + Ok(()) => Ok(()), } - Ok(()) } diff --git a/node/network/statement-distribution/src/lib.rs b/node/network/statement-distribution/src/lib.rs index 0aa8ea999f03..ffe54342a13b 100644 --- a/node/network/statement-distribution/src/lib.rs +++ b/node/network/statement-distribution/src/lib.rs @@ -27,6 +27,7 @@ use parity_scale_codec::Encode; use polkadot_node_network_protocol::{ peer_set::{IsAuthority, PeerSet}, + request_response::{v1 as request_v1, IncomingRequestReceiver}, v1::{self as protocol_v1, StatementMetadata}, IfDisconnected, PeerId, UnifiedReputationChange as Rep, View, }; @@ -57,7 +58,7 @@ use futures::{ }; use indexmap::{map::Entry as IEntry, IndexMap}; use sp_keystore::SyncCryptoStorePtr; -use util::{runtime::RuntimeInfo, Fault}; +use util::runtime::RuntimeInfo; use std::collections::{hash_map::Entry, HashMap, HashSet}; @@ -106,6 +107,8 @@ const MAX_LARGE_STATEMENTS_PER_SENDER: usize = 20; pub struct StatementDistribution { /// Pointer to a keystore, which is required for determining this nodes validator index. keystore: SyncCryptoStorePtr, + /// Receiver for incoming large statement requests. + req_receiver: Option>, // Prometheus metrics metrics: Metrics, } @@ -130,8 +133,12 @@ where impl StatementDistribution { /// Create a new Statement Distribution Subsystem - pub fn new(keystore: SyncCryptoStorePtr, metrics: Metrics) -> StatementDistribution { - StatementDistribution { keystore, metrics } + pub fn new( + keystore: SyncCryptoStorePtr, + req_receiver: IncomingRequestReceiver, + metrics: Metrics, + ) -> StatementDistribution { + StatementDistribution { keystore, req_receiver: Some(req_receiver), metrics } } } @@ -1526,7 +1533,7 @@ async fn handle_network_update( impl StatementDistribution { async fn run( - self, + mut self, mut ctx: (impl SubsystemContext + overseer::SubsystemContext), ) -> std::result::Result<(), Fatal> { @@ -1542,6 +1549,16 @@ impl StatementDistribution { // Sender/Receiver for getting news from our responder task. let (res_sender, mut res_receiver) = mpsc::channel(1); + ctx.spawn( + "large-statement-responder", + respond( + self.req_receiver.take().expect("Mandatory argument to new. qed"), + res_sender.clone(), + ) + .boxed(), + ) + .map_err(Fatal::SpawnTask)?; + loop { let message = MuxedMessage::receive(&mut ctx, &mut req_receiver, &mut res_receiver).await; @@ -1556,16 +1573,14 @@ impl StatementDistribution { &mut authorities, &mut active_heads, &req_sender, - &res_sender, result?, ) .await; match result { Ok(true) => break, Ok(false) => {}, - Err(Error(Fault::Fatal(f))) => return Err(f), - Err(Error(Fault::Err(error))) => - tracing::debug!(target: LOG_TARGET, ?error), + Err(Error::Fatal(f)) => return Err(f), + Err(Error::NonFatal(error)) => tracing::debug!(target: LOG_TARGET, ?error), } }, MuxedMessage::Requester(result) => { @@ -1749,7 +1764,6 @@ impl StatementDistribution { authorities: &mut HashMap, active_heads: &mut HashMap, req_sender: &mpsc::Sender, - res_sender: &mpsc::Sender, message: FromOverseer, ) -> Result { let metrics = &self.metrics; @@ -1868,13 +1882,6 @@ impl StatementDistribution { ) .await; }, - StatementDistributionMessage::StatementFetchingReceiver(receiver) => { - ctx.spawn( - "large-statement-responder", - respond(receiver, res_sender.clone()).boxed(), - ) - .map_err(Fatal::SpawnTask)?; - }, }, } Ok(false) diff --git a/node/network/statement-distribution/src/responder.rs b/node/network/statement-distribution/src/responder.rs index e42302cd82a4..409e8a4d274c 100644 --- a/node/network/statement-distribution/src/responder.rs +++ b/node/network/statement-distribution/src/responder.rs @@ -22,9 +22,9 @@ use futures::{ use polkadot_node_network_protocol::{ request_response::{ - request::OutgoingResponse, + incoming::{self, OutgoingResponse}, v1::{StatementFetchingRequest, StatementFetchingResponse}, - IncomingRequest, MAX_PARALLEL_STATEMENT_REQUESTS, + IncomingRequestReceiver, MAX_PARALLEL_STATEMENT_REQUESTS, }, PeerId, UnifiedReputationChange as Rep, }; @@ -51,7 +51,7 @@ pub enum ResponderMessage { /// `CommittedCandidateReceipt` from peers, whether this can be used to re-assemble one ore /// many `SignedFullStatement`s needs to be verified by the caller. pub async fn respond( - mut receiver: mpsc::Receiver, + mut receiver: IncomingRequestReceiver, mut sender: mpsc::Sender, ) { let mut pending_out = FuturesUnordered::new(); @@ -74,23 +74,16 @@ pub async fn respond( pending_out.next().await; } - let raw = match receiver.next().await { - None => { - tracing::debug!(target: LOG_TARGET, "Shutting down request responder"); + let req = match receiver.recv(|| vec![COST_INVALID_REQUEST]).await { + Err(incoming::Error::Fatal(f)) => { + tracing::debug!(target: LOG_TARGET, error = ?f, "Shutting down request responder"); return }, - Some(v) => v, - }; - - let req = match IncomingRequest::::try_from_raw( - raw, - vec![COST_INVALID_REQUEST], - ) { - Err(err) => { + Err(incoming::Error::NonFatal(err)) => { tracing::debug!(target: LOG_TARGET, ?err, "Decoding request failed"); continue }, - Ok(payload) => payload, + Ok(v) => v, }; let (tx, rx) = oneshot::channel(); diff --git a/node/network/statement-distribution/src/tests.rs b/node/network/statement-distribution/src/tests.rs index 5984f97be92b..2c41d5e7ddf0 100644 --- a/node/network/statement-distribution/src/tests.rs +++ b/node/network/statement-distribution/src/tests.rs @@ -22,7 +22,7 @@ use parity_scale_codec::{Decode, Encode}; use polkadot_node_network_protocol::{ request_response::{ v1::{StatementFetchingRequest, StatementFetchingResponse}, - Recipient, Requests, + IncomingRequest, Recipient, Requests, }, view, ObservedRole, }; @@ -699,11 +699,14 @@ fn receiving_from_one_sends_to_another_and_to_candidate_backing() { let pool = sp_core::testing::TaskExecutor::new(); let (ctx, mut handle) = polkadot_node_subsystem_test_helpers::make_subsystem_context(pool); + let (statement_req_receiver, _) = IncomingRequest::get_config_receiver(); + let bg = async move { - let s = StatementDistribution { - metrics: Default::default(), - keystore: Arc::new(LocalKeystore::in_memory()), - }; + let s = StatementDistribution::new( + Arc::new(LocalKeystore::in_memory()), + statement_req_receiver, + Default::default(), + ); s.run(ctx).await.unwrap(); }; @@ -888,21 +891,18 @@ fn receiving_large_statement_from_one_sends_to_another_and_to_candidate_backing( let pool = sp_core::testing::TaskExecutor::new(); let (ctx, mut handle) = polkadot_node_subsystem_test_helpers::make_subsystem_context(pool); + let (statement_req_receiver, mut req_cfg) = IncomingRequest::get_config_receiver(); + let bg = async move { - let s = - StatementDistribution { metrics: Default::default(), keystore: make_ferdie_keystore() }; + let s = StatementDistribution::new( + make_ferdie_keystore(), + statement_req_receiver, + Default::default(), + ); s.run(ctx).await.unwrap(); }; - let (mut tx_reqs, rx_reqs) = mpsc::channel(1); - let test_fut = async move { - handle - .send(FromOverseer::Communication { - msg: StatementDistributionMessage::StatementFetchingReceiver(rx_reqs), - }) - .await; - // register our active heads. handle .send(FromOverseer::Signal(OverseerSignal::ActiveLeaves( @@ -1290,7 +1290,7 @@ fn receiving_large_statement_from_one_sends_to_another_and_to_candidate_backing( payload: inner_req.encode(), pending_response, }; - tx_reqs.send(req).await.unwrap(); + req_cfg.inbound_queue.as_mut().unwrap().send(req).await.unwrap(); assert_matches!( response_rx.await.unwrap().result, Err(()) => {} @@ -1308,7 +1308,7 @@ fn receiving_large_statement_from_one_sends_to_another_and_to_candidate_backing( payload: inner_req.encode(), pending_response, }; - tx_reqs.send(req).await.unwrap(); + req_cfg.inbound_queue.as_mut().unwrap().send(req).await.unwrap(); assert_matches!( response_rx.await.unwrap().result, Err(()) => {} @@ -1325,7 +1325,7 @@ fn receiving_large_statement_from_one_sends_to_another_and_to_candidate_backing( payload: inner_req.encode(), pending_response, }; - tx_reqs.send(req).await.unwrap(); + req_cfg.inbound_queue.as_mut().unwrap().send(req).await.unwrap(); let StatementFetchingResponse::Statement(committed) = Decode::decode(&mut response_rx.await.unwrap().result.unwrap().as_ref()).unwrap(); assert_eq!(committed, candidate); @@ -1390,21 +1390,18 @@ fn share_prioritizes_backing_group() { let pool = sp_core::testing::TaskExecutor::new(); let (ctx, mut handle) = polkadot_node_subsystem_test_helpers::make_subsystem_context(pool); + let (statement_req_receiver, mut req_cfg) = IncomingRequest::get_config_receiver(); + let bg = async move { - let s = - StatementDistribution { metrics: Default::default(), keystore: make_ferdie_keystore() }; + let s = StatementDistribution::new( + make_ferdie_keystore(), + statement_req_receiver, + Default::default(), + ); s.run(ctx).await.unwrap(); }; - let (mut tx_reqs, rx_reqs) = mpsc::channel(1); - let test_fut = async move { - handle - .send(FromOverseer::Communication { - msg: StatementDistributionMessage::StatementFetchingReceiver(rx_reqs), - }) - .await; - // register our active heads. handle .send(FromOverseer::Signal(OverseerSignal::ActiveLeaves( @@ -1632,7 +1629,7 @@ fn share_prioritizes_backing_group() { payload: inner_req.encode(), pending_response, }; - tx_reqs.send(req).await.unwrap(); + req_cfg.inbound_queue.as_mut().unwrap().send(req).await.unwrap(); let StatementFetchingResponse::Statement(committed) = Decode::decode(&mut response_rx.await.unwrap().result.unwrap().as_ref()).unwrap(); assert_eq!(committed, candidate); @@ -1679,21 +1676,17 @@ fn peer_cant_flood_with_large_statements() { let pool = sp_core::testing::TaskExecutor::new(); let (ctx, mut handle) = polkadot_node_subsystem_test_helpers::make_subsystem_context(pool); + let (statement_req_receiver, _) = IncomingRequest::get_config_receiver(); let bg = async move { - let s = - StatementDistribution { metrics: Default::default(), keystore: make_ferdie_keystore() }; + let s = StatementDistribution::new( + make_ferdie_keystore(), + statement_req_receiver, + Default::default(), + ); s.run(ctx).await.unwrap(); }; - let (_, rx_reqs) = mpsc::channel(1); - let test_fut = async move { - handle - .send(FromOverseer::Communication { - msg: StatementDistributionMessage::StatementFetchingReceiver(rx_reqs), - }) - .await; - // register our active heads. handle .send(FromOverseer::Signal(OverseerSignal::ActiveLeaves( diff --git a/node/overseer/all-subsystems-gen/src/lib.rs b/node/overseer/all-subsystems-gen/src/lib.rs index 48228c75b27a..b210f94f16a6 100644 --- a/node/overseer/all-subsystems-gen/src/lib.rs +++ b/node/overseer/all-subsystems-gen/src/lib.rs @@ -106,7 +106,7 @@ fn impl_subsystems_gen(item: TokenStream) -> Result { if generic_types.contains(&ty_ident) { if let Some(previous) = duplicate_generic_detection.replace(ty_ident) { - return Err(Error::new(previous.span(), "Generic type parameters may only be used for exactly one field, but is used more than once.")) + return Err(Error::new(previous.span(), "Generic type parameters may only be used for exactly one field, but is used more than once.")); } } diff --git a/node/overseer/src/lib.rs b/node/overseer/src/lib.rs index 2e2ddfd2d90a..e8de058863c6 100644 --- a/node/overseer/src/lib.rs +++ b/node/overseer/src/lib.rs @@ -920,32 +920,3 @@ where self.spawner.spawn_blocking(name, j); } } - -// Additional `From` implementations, in order to deal with incoming network messages. -// Kept out of the proc macro, for sake of simplicity reduce the need to make even -// more types to the proc macro logic. - -use polkadot_node_network_protocol::request_response::{ - request::IncomingRequest, v1 as req_res_v1, -}; - -impl From> for AllMessages { - fn from(req: IncomingRequest) -> Self { - From::::from(From::from(req)) - } -} -impl From> for AllMessages { - fn from(req: IncomingRequest) -> Self { - From::::from(From::from(req)) - } -} -impl From> for AllMessages { - fn from(req: IncomingRequest) -> Self { - From::::from(From::from(req)) - } -} -impl From> for AllMessages { - fn from(req: IncomingRequest) -> Self { - From::::from(From::from(req)) - } -} diff --git a/node/overseer/src/tests.rs b/node/overseer/src/tests.rs index d0a79c086e6e..4d438ea5030f 100644 --- a/node/overseer/src/tests.rs +++ b/node/overseer/src/tests.rs @@ -14,17 +14,23 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use futures::{channel::mpsc, executor, pending, pin_mut, poll, select, stream, FutureExt}; +use futures::{executor, pending, pin_mut, poll, select, stream, FutureExt}; use std::{collections::HashMap, sync::atomic, task::Poll}; use polkadot_node_network_protocol::{PeerId, UnifiedReputationChange}; -use polkadot_node_primitives::{BlockData, CollationGenerationConfig, CollationResult, PoV}; +use polkadot_node_primitives::{ + BlockData, CollationGenerationConfig, CollationResult, DisputeMessage, InvalidDisputeVote, PoV, + UncheckedDisputeMessage, ValidDisputeVote, +}; use polkadot_node_subsystem_types::{ jaeger, messages::{NetworkBridgeEvent, RuntimeApiRequest}, ActivatedLeaf, LeafStatus, }; -use polkadot_primitives::v1::{CandidateHash, CollatorPair}; +use polkadot_primitives::v1::{ + CandidateHash, CollatorPair, InvalidDisputeStatementKind, ValidDisputeStatementKind, + ValidatorIndex, +}; use crate::{self as overseer, gen::Delay, HeadSupportsParachains, Overseer}; use metered_channel as metered; @@ -772,8 +778,27 @@ fn test_dispute_participation_msg() -> DisputeParticipationMessage { } fn test_dispute_distribution_msg() -> DisputeDistributionMessage { - let (_, receiver) = mpsc::channel(1); - DisputeDistributionMessage::DisputeSendingReceiver(receiver) + let dummy_dispute_message = UncheckedDisputeMessage { + candidate_receipt: Default::default(), + session_index: 0, + invalid_vote: InvalidDisputeVote { + validator_index: ValidatorIndex(0), + signature: Default::default(), + kind: InvalidDisputeStatementKind::Explicit, + }, + valid_vote: ValidDisputeVote { + validator_index: ValidatorIndex(0), + signature: Default::default(), + kind: ValidDisputeStatementKind::Explicit, + }, + }; + + DisputeDistributionMessage::SendDispute( + // We just need dummy data here: + unsafe { + std::mem::transmute::(dummy_dispute_message) + }, + ) } fn test_chain_selection_msg() -> ChainSelectionMessage { diff --git a/node/service/Cargo.toml b/node/service/Cargo.toml index 286b392fae98..36bdd89862c9 100644 --- a/node/service/Cargo.toml +++ b/node/service/Cargo.toml @@ -80,6 +80,7 @@ polkadot-rpc = { path = "../../rpc" } polkadot-subsystem = { package = "polkadot-node-subsystem", path = "../subsystem" } polkadot-node-subsystem-util = { path = "../subsystem-util" } polkadot-runtime-parachains = { path = "../../runtime/parachains" } +polkadot-node-network-protocol = { path = "../network/protocol" } # Polkadot Runtimes polkadot-runtime = { path = "../../runtime/polkadot" } diff --git a/node/service/src/lib.rs b/node/service/src/lib.rs index 7217d6a41e99..5011549cd70d 100644 --- a/node/service/src/lib.rs +++ b/node/service/src/lib.rs @@ -37,7 +37,6 @@ mod tests; #[cfg(feature = "full-node")] use { grandpa::{self, FinalityProofProvider as GrandpaFinalityProofProvider}, - polkadot_network_bridge::RequestMultiplexer, polkadot_node_core_approval_voting::Config as ApprovalVotingConfig, polkadot_node_core_av_store::Config as AvailabilityConfig, polkadot_node_core_av_store::Error as AvailabilityError, @@ -632,6 +631,8 @@ where Executor: NativeExecutionDispatch + 'static, OverseerGenerator: OverseerGen, { + use polkadot_node_network_protocol::request_response::IncomingRequest; + let role = config.role.clone(); let force_authoring = config.force_authoring; let backoff_authoring_blocks = { @@ -681,11 +682,18 @@ where config.network.extra_sets.extend(peer_sets_info(is_authority)); } - let request_multiplexer = { - let (multiplexer, configs) = RequestMultiplexer::new(); - config.network.request_response_protocols.extend(configs); - multiplexer - }; + let (pov_req_receiver, cfg) = IncomingRequest::get_config_receiver(); + config.network.request_response_protocols.push(cfg); + let (chunk_req_receiver, cfg) = IncomingRequest::get_config_receiver(); + config.network.request_response_protocols.push(cfg); + let (collation_req_receiver, cfg) = IncomingRequest::get_config_receiver(); + config.network.request_response_protocols.push(cfg); + let (available_data_req_receiver, cfg) = IncomingRequest::get_config_receiver(); + config.network.request_response_protocols.push(cfg); + let (statement_req_receiver, cfg) = IncomingRequest::get_config_receiver(); + config.network.request_response_protocols.push(cfg); + let (dispute_req_receiver, cfg) = IncomingRequest::get_config_receiver(); + config.network.request_response_protocols.push(cfg); let warp_sync = Arc::new(grandpa::warp_proof::NetworkProvider::new( backend.clone(), @@ -824,7 +832,12 @@ where parachains_db, network_service: network.clone(), authority_discovery_service, - request_multiplexer, + pov_req_receiver, + chunk_req_receiver, + collation_req_receiver, + available_data_req_receiver, + statement_req_receiver, + dispute_req_receiver, registry: prometheus_registry.as_ref(), spawner, is_collator, diff --git a/node/service/src/overseer.rs b/node/service/src/overseer.rs index 971deb455729..79585135cfae 100644 --- a/node/service/src/overseer.rs +++ b/node/service/src/overseer.rs @@ -15,12 +15,13 @@ // along with Polkadot. If not, see . use super::{AuthorityDiscoveryApi, Block, Error, Hash, IsCollator, Registry, SpawnNamed}; -use polkadot_network_bridge::RequestMultiplexer; +use polkadot_availability_distribution::IncomingRequestReceivers; use polkadot_node_core_approval_voting::Config as ApprovalVotingConfig; use polkadot_node_core_av_store::Config as AvailabilityConfig; use polkadot_node_core_candidate_validation::Config as CandidateValidationConfig; use polkadot_node_core_chain_selection::Config as ChainSelectionConfig; use polkadot_node_core_dispute_coordinator::Config as DisputeCoordinatorConfig; +use polkadot_node_network_protocol::request_response::{v1 as request_v1, IncomingRequestReceiver}; use polkadot_overseer::{AllSubsystems, BlockInfo, Overseer, OverseerHandle}; use polkadot_primitives::v1::ParachainHost; use sc_authority_discovery::Service as AuthorityDiscoveryService; @@ -72,8 +73,14 @@ where pub network_service: Arc>, /// Underlying authority discovery service. pub authority_discovery_service: AuthorityDiscoveryService, - /// A multiplexer to arbitrate incoming `IncomingRequest`s from the network. - pub request_multiplexer: RequestMultiplexer, + /// POV request receiver + pub pov_req_receiver: IncomingRequestReceiver, + pub chunk_req_receiver: IncomingRequestReceiver, + pub collation_req_receiver: IncomingRequestReceiver, + pub available_data_req_receiver: + IncomingRequestReceiver, + pub statement_req_receiver: IncomingRequestReceiver, + pub dispute_req_receiver: IncomingRequestReceiver, /// Prometheus registry, commonly used for production systems, less so for test. pub registry: Option<&'a Registry>, /// Task spawner to be used throughout the overseer and the APIs it provides. @@ -103,7 +110,12 @@ pub fn create_default_subsystems<'a, Spawner, RuntimeClient>( parachains_db, network_service, authority_discovery_service, - request_multiplexer, + pov_req_receiver, + chunk_req_receiver, + collation_req_receiver, + available_data_req_receiver, + statement_req_receiver, + dispute_req_receiver, registry, spawner, is_collator, @@ -153,9 +165,12 @@ where let all_subsystems = AllSubsystems { availability_distribution: AvailabilityDistributionSubsystem::new( keystore.clone(), + IncomingRequestReceivers { pov_req_receiver, chunk_req_receiver }, Metrics::register(registry)?, ), - availability_recovery: AvailabilityRecoverySubsystem::with_chunks_only(), + availability_recovery: AvailabilityRecoverySubsystem::with_chunks_only( + available_data_req_receiver, + ), availability_store: AvailabilityStoreSubsystem::new( parachains_db.clone(), availability_config, @@ -183,6 +198,7 @@ where IsCollator::Yes(collator_pair) => ProtocolSide::Collator( network_service.local_peer_id().clone(), collator_pair, + collation_req_receiver, Metrics::register(registry)?, ), IsCollator::No => ProtocolSide::Validator { @@ -196,7 +212,6 @@ where network_bridge: NetworkBridgeSubsystem::new( network_service.clone(), authority_discovery_service.clone(), - request_multiplexer, Box::new(network_service.clone()), Metrics::register(registry)?, ), @@ -208,6 +223,7 @@ where ), statement_distribution: StatementDistributionSubsystem::new( keystore.clone(), + statement_req_receiver, Metrics::register(registry)?, ), approval_distribution: ApprovalDistributionSubsystem::new(Metrics::register(registry)?), @@ -227,6 +243,7 @@ where dispute_participation: DisputeParticipationSubsystem::new(), dispute_distribution: DisputeDistributionSubsystem::new( keystore.clone(), + dispute_req_receiver, authority_discovery_service.clone(), Metrics::register(registry)?, ), diff --git a/node/subsystem-types/src/messages.rs b/node/subsystem-types/src/messages.rs index 7ffc84b3a999..147074fa1b1d 100644 --- a/node/subsystem-types/src/messages.rs +++ b/node/subsystem-types/src/messages.rs @@ -22,15 +22,14 @@ //! //! Subsystems' APIs are defined separately from their implementation, leading to easier mocking. -use futures::channel::{mpsc, oneshot}; +use futures::channel::oneshot; use thiserror::Error; pub use sc_network::IfDisconnected; use polkadot_node_network_protocol::{ - peer_set::PeerSet, - request_response::{request::IncomingRequest, v1 as req_res_v1, Requests}, - v1 as protocol_v1, PeerId, UnifiedReputationChange, + peer_set::PeerSet, request_response::Requests, v1 as protocol_v1, PeerId, + UnifiedReputationChange, }; use polkadot_node_primitives::{ approval::{BlockApprovalMeta, IndirectAssignmentCert, IndirectSignedApprovalVote}, @@ -165,8 +164,6 @@ pub enum CollatorProtocolMessage { /// Get a network bridge update. #[from] NetworkBridgeUpdateV1(NetworkBridgeEvent), - /// Incoming network request for a collation. - CollationFetchingRequest(IncomingRequest), /// We recommended a particular candidate to be seconded, but it was invalid; penalize the collator. /// /// The hash is the relay parent. @@ -297,9 +294,6 @@ pub enum DisputeDistributionMessage { /// Tell dispute distribution to distribute an explicit dispute statement to /// validators. SendDispute(DisputeMessage), - - /// Get receiver for receiving incoming network requests for dispute sending. - DisputeSendingReceiver(mpsc::Receiver), } /// Messages received by the network bridge subsystem. @@ -380,10 +374,6 @@ impl NetworkBridgeMessage { /// Availability Distribution Message. #[derive(Debug)] pub enum AvailabilityDistributionMessage { - /// Incoming network request for an availability chunk. - ChunkFetchingRequest(IncomingRequest), - /// Incoming network request for a seconded PoV. - PoVFetchingRequest(IncomingRequest), /// Instruct availability distribution to fetch a remote PoV. /// /// NOTE: The result of this fetch is not yet locally validated and could be bogus. @@ -413,9 +403,6 @@ pub enum AvailabilityRecoveryMessage { Option, // Optional backing group to request from first. oneshot::Sender>, ), - /// Incoming network request for available data. - #[from] - AvailableDataFetchingRequest(IncomingRequest), } /// Bitfield distribution message. @@ -666,8 +653,6 @@ pub enum StatementDistributionMessage { /// Event from the network bridge. #[from] NetworkBridgeUpdateV1(NetworkBridgeEvent), - /// Get receiver for receiving incoming network requests for statement fetching. - StatementFetchingReceiver(mpsc::Receiver), } /// This data becomes intrinsics or extrinsics which should be included in a future relay chain block. @@ -867,19 +852,3 @@ pub enum ApprovalDistributionMessage { /// Message to the Gossip Support subsystem. #[derive(Debug)] pub enum GossipSupportMessage {} - -impl From> for AvailabilityDistributionMessage { - fn from(req: IncomingRequest) -> Self { - Self::PoVFetchingRequest(req) - } -} -impl From> for AvailabilityDistributionMessage { - fn from(req: IncomingRequest) -> Self { - Self::ChunkFetchingRequest(req) - } -} -impl From> for CollatorProtocolMessage { - fn from(req: IncomingRequest) -> Self { - Self::CollationFetchingRequest(req) - } -} diff --git a/node/subsystem-util/Cargo.toml b/node/subsystem-util/Cargo.toml index 8fc268d73ce7..bc5f35e2e8e5 100644 --- a/node/subsystem-util/Cargo.toml +++ b/node/subsystem-util/Cargo.toml @@ -16,6 +16,7 @@ pin-project = "1.0.7" rand = "0.8.3" thiserror = "1.0.26" tracing = "0.1.26" +derive_more = "0.99.11" lru = "0.6.6" polkadot-node-primitives = { path = "../primitives" } diff --git a/node/subsystem-util/src/error_handling.rs b/node/subsystem-util/src/error_handling.rs deleted file mode 100644 index 3660b0454130..000000000000 --- a/node/subsystem-util/src/error_handling.rs +++ /dev/null @@ -1,202 +0,0 @@ -// Copyright 2021 Parity Technologies (UK) Ltd. -// This file is part of Polkadot. - -// Polkadot is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. - -// Polkadot is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. - -// You should have received a copy of the GNU General Public License -// along with Polkadot. If not, see . - -//! Utilities for general error handling in Polkadot. -//! -//! Goals: -//! -//! - Ergonomic API with little repetition. -//! - Still explicitness where it matters - fatal errors should be visible and justified. -//! - Easy recovering from non-fatal errors. -//! - Errors start as non-fatal and can be made fatal at the level where it is really clear they -//! are fatal. E.g. cancellation of a oneshot might be fatal in one case, but absolutely expected -//! in another. -//! - Good error messages. Fatal errors don't need to be properly structured (as we won't handle -//! them), but should provide good error messages of what is going on. -//! - Encourage many error types. One per module or even per function is totally fine - it makes -//! error handling robust, if you only need to handle errors that can actually happen, also error -//! messages will get better. - -use thiserror::Error; - -/// Error abstraction. -/// -/// Errors might either be fatal and should bring the subsystem down or are at least at the point -/// of occurrence deemed potentially recoverable. -/// -/// Upper layers might have a better view and might make a non-fatal error of a called function a -/// fatal one. The opposite should not happen, therefore don't make an error fatal if you don't -/// know it is in all cases. -/// -/// Usage pattern: -/// -/// ``` -/// use thiserror::Error; -/// use polkadot_node_subsystem::errors::RuntimeApiError; -/// use polkadot_primitives::v1::SessionIndex; -/// use futures::channel::oneshot; -/// use polkadot_node_subsystem_util::{Fault, runtime}; -/// -/// #[derive(Debug, Error)] -/// #[error(transparent)] -/// pub struct Error(pub Fault); -/// -/// pub type Result = std::result::Result; -/// pub type NonFatalResult = std::result::Result; -/// pub type FatalResult = std::result::Result; -/// -/// // Make an error from a `NonFatal` one. -/// impl From for Error { -/// fn from(e: NonFatal) -> Self { -/// Self(Fault::from_non_fatal(e)) -/// } -/// } -/// -/// // Make an Error from a `Fatal` one. -/// impl From for Error { -/// fn from(f: Fatal) -> Self { -/// Self(Fault::from_fatal(f)) -/// } -/// } -/// -/// // Easy conversion from sub error types from other modules: -/// impl From for Error { -/// fn from(o: runtime::Error) -> Self { -/// Self(Fault::from_other(o)) -/// } -/// } -/// -/// #[derive(Debug, Error)] -/// pub enum Fatal { -/// /// Really fatal stuff. -/// #[error("Something fatal happened.")] -/// SomeFatalError, -/// /// Errors coming from runtime::Runtime. -/// #[error("Error while accessing runtime information")] -/// Runtime(#[from] runtime::Fatal), -/// } -/// -/// #[derive(Debug, Error)] -/// pub enum NonFatal { -/// /// Some non fatal error. -/// /// For example if we prune a block we're requesting info about. -/// #[error("Non fatal error happened.")] -/// SomeNonFatalError, -/// -/// /// Errors coming from runtime::Runtime. -/// #[error("Error while accessing runtime information")] -/// Runtime(#[from] runtime::NonFatal), -/// } -/// ``` -/// Then mostly use `Error` in functions, you may also use `NonFatal` and `Fatal` directly in -/// functions that strictly only fail non-fatal or fatal respectively, as `Fatal` and `NonFatal` -/// can automatically converted into the above defined `Error`. -/// ``` -#[derive(Debug, Error)] -pub enum Fault -where - E: std::fmt::Debug + std::error::Error + 'static, - F: std::fmt::Debug + std::error::Error + 'static, -{ - /// Error is fatal and should be escalated up. - /// - /// While we usually won't want to pattern match on those, a concrete descriptive enum might - /// still be a good idea for easy auditing of what can go wrong in a module and also makes for - /// good error messages thanks to `thiserror`. - #[error("Fatal error occurred.")] - Fatal(#[source] F), - /// Error that is not fatal, at least not yet at this level of execution. - #[error("Non fatal error occurred.")] - Err(#[source] E), -} - -/// Due to typesystem constraints we cannot implement the following methods as standard -/// `From::from` implementations. So no auto conversions by default, a simple `Result::map_err` is -/// not too bad though. -impl Fault -where - E: std::fmt::Debug + std::error::Error + 'static, - F: std::fmt::Debug + std::error::Error + 'static, -{ - /// Build an `Fault` from compatible fatal error. - pub fn from_fatal>(f: F1) -> Self { - Self::Fatal(f.into()) - } - - /// Build an `Fault` from compatible non-fatal error. - pub fn from_non_fatal>(e: E1) -> Self { - Self::Err(e.into()) - } - - /// Build an `Fault` from a compatible other `Fault`. - pub fn from_other(e: Fault) -> Self - where - E1: Into + std::fmt::Debug + std::error::Error + 'static, - F1: Into + std::fmt::Debug + std::error::Error + 'static, - { - match e { - Fault::Fatal(f) => Self::from_fatal(f), - Fault::Err(e) => Self::from_non_fatal(e), - } - } -} - -/// Unwrap non-fatal error and report fatal one. -/// -/// This function is useful for top level error handling. Fatal errors will be extracted, -/// non-fatal error will be returned for handling. -/// -/// Usage: -/// -/// ```no_run -/// # use thiserror::Error; -/// # use polkadot_node_subsystem_util::{Fault, unwrap_non_fatal}; -/// # use polkadot_node_subsystem::SubsystemError; -/// # #[derive(Error, Debug)] -/// # enum Fatal { -/// # } -/// # #[derive(Error, Debug)] -/// # enum NonFatal { -/// # } -/// # fn computation() -> Result<(), Fault> { -/// # panic!(); -/// # } -/// # -/// // Use run like so: -/// // run(ctx) -/// // .map_err(|e| SubsystemError::with_origin("subsystem-name", e)) -/// fn run() -> std::result::Result<(), Fatal> { -/// loop { -/// // .... -/// if let Some(err) = unwrap_non_fatal(computation())? { -/// println!("Something bad happened: {}", err); -/// continue -/// } -/// } -/// } -/// -/// ``` -pub fn unwrap_non_fatal(result: Result<(), Fault>) -> Result, F> -where - E: std::fmt::Debug + std::error::Error + 'static, - F: std::fmt::Debug + std::error::Error + Send + Sync + 'static, -{ - match result { - Ok(()) => Ok(None), - Err(Fault::Fatal(f)) => Err(f), - Err(Fault::Err(e)) => Ok(Some(e)), - } -} diff --git a/node/subsystem-util/src/lib.rs b/node/subsystem-util/src/lib.rs index b8fbd9c4694c..0c5e35d1abc8 100644 --- a/node/subsystem-util/src/lib.rs +++ b/node/subsystem-util/src/lib.rs @@ -74,8 +74,6 @@ pub use metered_channel as metered; pub use polkadot_node_network_protocol::MIN_GOSSIP_PEERS; pub use determine_new_blocks::determine_new_blocks; -/// Error classification. -pub use error_handling::{unwrap_non_fatal, Fault}; /// These reexports are required so that external crates can use the `delegated_subsystem` macro properly. pub mod reexports { @@ -88,7 +86,6 @@ pub mod rolling_session_window; pub mod runtime; mod determine_new_blocks; -mod error_handling; #[cfg(test)] mod tests; diff --git a/node/subsystem-util/src/runtime/error.rs b/node/subsystem-util/src/runtime/error.rs index ff4584340826..af61438f5ed2 100644 --- a/node/subsystem-util/src/runtime/error.rs +++ b/node/subsystem-util/src/runtime/error.rs @@ -23,23 +23,16 @@ use thiserror::Error; use polkadot_node_subsystem::errors::RuntimeApiError; use polkadot_primitives::v1::SessionIndex; -use crate::Fault; - pub type Result = std::result::Result; /// Errors for `Runtime` cache. -pub type Error = Fault; - -impl From for Error { - fn from(e: NonFatal) -> Self { - Self::from_non_fatal(e) - } -} - -impl From for Error { - fn from(f: Fatal) -> Self { - Self::from_fatal(f) - } +#[derive(Debug, Error, derive_more::From)] +#[error(transparent)] +pub enum Error { + /// All fatal errors. + Fatal(Fatal), + /// All nonfatal/potentially recoverable errors. + NonFatal(NonFatal), } /// Fatal runtime errors.