paritytech · sandreim · Jan 25, 2024 · Oct 25, 2023 · Nov 6, 2023 · Nov 6, 2023
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -159,6 +159,7 @@ members = [
 	"polkadot/node/gum/proc-macro",
 	"polkadot/node/jaeger",
 	"polkadot/node/malus",
+	"polkadot/node/subsystem-bench",
 	"polkadot/node/metrics",
 	"polkadot/node/network/approval-distribution",
 	"polkadot/node/network/availability-distribution",

diff --git a/polkadot/node/subsystem-bench/Cargo.toml b/polkadot/node/subsystem-bench/Cargo.toml
@@ -22,24 +22,31 @@ polkadot-node-subsystem-types = { path = "../subsystem-types" }
 polkadot-node-primitives = { path = "../primitives" }
 polkadot-primitives = { path = "../../primitives" }
 polkadot-node-network-protocol = { path = "../network/protocol" }
-polkadot-availability-recovery = { path = "../network/availability-recovery", features = ["subsystem-benchmarks"] }
+polkadot-availability-recovery = { path = "../network/availability-recovery", features=["subsystem-benchmarks"]}
+polkadot-availability-distribution = { path = "../network/availability-distribution"}
+polkadot-node-core-av-store = { path = "../core/av-store"}
+polkadot-node-core-chain-api = { path = "../core/chain-api"}
+polkadot-availability-bitfield-distribution = { path = "../network/bitfield-distribution"}
 color-eyre = { version = "0.6.1", default-features = false }
-polkadot-overseer = { path = "../overseer" }
+polkadot-overseer =  { path = "../overseer" }
 colored = "2.0.4"
 assert_matches = "1.5"
-async-trait = "0.1.74"
+async-trait = "0.1.57"
 sp-keystore = { path = "../../../substrate/primitives/keystore" }
 sc-keystore = { path = "../../../substrate/client/keystore" }
 sp-core = { path = "../../../substrate/primitives/core" }
-clap = { version = "4.4.16", features = ["derive"] }
+clap = { version = "4.4.6", features = ["derive"] }
 futures = "0.3.21"
 futures-timer = "3.0.2"
 gum = { package = "tracing-gum", path = "../gum" }
 polkadot-erasure-coding = { package = "polkadot-erasure-coding", path = "../../erasure-coding" }
 log = "0.4.17"
 env_logger = "0.9.0"
 rand = "0.8.5"
-parity-scale-codec = { version = "3.6.1", features = ["derive", "std"] }
+# `rand` only supports uniform distribution, we need normal distribution for latency.
+rand_distr = "0.4.3"
+
+parity-scale-codec = { version = "3.6.1", features = ["std", "derive"] }
 tokio = "1.24.2"
 clap-num = "1.0.2"
 polkadot-node-subsystem-test-helpers = { path = "../subsystem-test-helpers" }
@@ -55,7 +62,10 @@ prometheus = { version = "0.13.0", default-features = false }
 serde = "1.0.195"
 serde_yaml = "0.9"
 paste = "1.0.14"
-orchestra = { version = "0.3.3", default-features = false, features = ["futures_channel"] }
+orchestra = { version = "0.3.3", default-features = false, features=["futures_channel"] }
+kvdb-memorydb = "0.13.0"
+sp-consensus = { path = "../../../substrate/primitives/consensus/common", default-features = false }
+bitvec = "1.0.0"
 pyroscope = "0.5.7"
 pyroscope_pprofrs = "0.2.7"
 

diff --git a/polkadot/node/subsystem-bench/README.md b/polkadot/node/subsystem-bench/README.md
@@ -1,14 +1,14 @@
 # Subsystem benchmark client
 
-Run parachain consensus stress and performance tests on your development machine.
+Run parachain consensus stress and performance tests on your development machine.  
 
 ## Motivation
 
-The parachain consensus node implementation spans across many modules which we call subsystems. Each subsystem is
-responsible for a small part of logic of the parachain consensus pipeline, but in general the most load and
-performance issues are localized in just a few core subsystems like `availability-recovery`, `approval-voting` or
-`dispute-coordinator`. In the absence of such a tool, we would run large test nets to load/stress test these parts of
-the system. Setting up and making sense of the amount of data produced by such a large test is very expensive, hard
+The parachain consensus node implementation spans across many modules which we call subsystems. Each subsystem is 
+responsible for a small part of logic of the parachain consensus pipeline, but in general the most load and 
+performance issues are localized in just a few core subsystems like `availability-recovery`, `approval-voting` or 
+`dispute-coordinator`. In the absence of such a tool, we would run large test nets to load/stress test these parts of 
+the system. Setting up and making sense of the amount of data produced by such a large test is very expensive, hard 
 to orchestrate and is a huge development time sink.
 
 This tool aims to solve the problem by making it easy to:
@@ -111,30 +111,28 @@ Commands:
 ```
 
 Note: `test-sequence` is a special test objective that wraps up an arbitrary number of test objectives. It is tipically
-used to run a suite of tests defined in a `yaml` file like in this [example](examples/availability_read.yaml).
+ used to run a suite of tests defined in a `yaml` file like in this [example](examples/availability_read.yaml).
 
 ### Standard test options
-
+  
 ```
-Options:
-    --network <NETWORK>                              The type of network to be emulated [default: ideal] [possible
-                                                     values: ideal, healthy, degraded]
-    --n-cores <N_CORES>                              Number of cores to fetch availability for [default: 100]
-    --n-validators <N_VALIDATORS>                    Number of validators to fetch chunks from [default: 500]
-    --min-pov-size <MIN_POV_SIZE>                    The minimum pov size in KiB [default: 5120]
-    --max-pov-size <MAX_POV_SIZE>                    The maximum pov size bytes [default: 5120]
--n, --num-blocks <NUM_BLOCKS>                        The number of blocks the test is going to run [default: 1]
--p, --peer-bandwidth <PEER_BANDWIDTH>                The bandwidth of simulated remote peers in KiB
--b, --bandwidth <BANDWIDTH>                          The bandwidth of our simulated node in KiB
-    --peer-error <PEER_ERROR>                        Simulated conection error ratio [0-100]
-    --peer-min-latency <PEER_MIN_LATENCY>            Minimum remote peer latency in milliseconds [0-5000]
-    --peer-max-latency <PEER_MAX_LATENCY>            Maximum remote peer latency in milliseconds [0-5000]
-    --profile                                        Enable CPU Profiling with Pyroscope
-    --pyroscope-url <PYROSCOPE_URL>                  Pyroscope Server URL [default: http://localhost:4040]
-    --pyroscope-sample-rate <PYROSCOPE_SAMPLE_RATE>  Pyroscope Sample Rate [default: 113]
-    --cache-misses                                   Enable Cache Misses Profiling with Valgrind. Linux only, Valgrind
-                                                     must be in the PATH
--h, --help                                           Print help
+      --network <NETWORK>                              The type of network to be emulated [default: ideal] [possible values: ideal, healthy,
+                                                       degraded]
+      --n-cores <N_CORES>                              Number of cores to fetch availability for [default: 100]
+      --n-validators <N_VALIDATORS>                    Number of validators to fetch chunks from [default: 500]
+      --min-pov-size <MIN_POV_SIZE>                    The minimum pov size in KiB [default: 5120]
+      --max-pov-size <MAX_POV_SIZE>                    The maximum pov size bytes [default: 5120]
+  -n, --num-blocks <NUM_BLOCKS>                        The number of blocks the test is going to run [default: 1]
+  -p, --peer-bandwidth <PEER_BANDWIDTH>                The bandwidth of emulated remote peers in KiB
+  -b, --bandwidth <BANDWIDTH>                          The bandwidth of our node in KiB
+      --connectivity <CONNECTIVITY>                    Emulated peer connection ratio [0-100]
+      --peer-mean-latency <PEER_MEAN_LATENCY>          Mean remote peer latency in milliseconds [0-5000]
+      --peer-latency-std-dev <PEER_LATENCY_STD_DEV>    Remote peer latency standard deviation
+      --profile                                        Enable CPU Profiling with Pyroscope
+      --pyroscope-url <PYROSCOPE_URL>                  Pyroscope Server URL [default: http://localhost:4040]
+      --pyroscope-sample-rate <PYROSCOPE_SAMPLE_RATE>  Pyroscope Sample Rate [default: 113]
+      --cache-misses                                   Enable Cache Misses Profiling with Valgrind. Linux only, Valgrind must be in the PATH
+  -h, --help                                           Print help
 ```
 
 These apply to all test objectives, except `test-sequence` which relies on the values being specified in a file.
@@ -152,8 +150,8 @@ Benchmark availability recovery strategies
 Usage: subsystem-bench data-availability-read [OPTIONS]
 
 Options:
-  -f, --fetch-from-backers  Turbo boost AD Read by fetching the full availability datafrom backers first. Saves CPU
-                            as we don't need to re-construct from chunks. Tipically this is only faster if nodes
+  -f, --fetch-from-backers  Turbo boost AD Read by fetching the full availability datafrom backers first. Saves CPU 
+                            as we don't need to re-construct from chunks. Tipically this is only faster if nodes 
                             have enough bandwidth
   -h, --help                Print help
 ```
@@ -170,9 +168,9 @@ usage:
 - for how many blocks the test should run (`num_blocks`)
 
 From the perspective of the subsystem under test, this means that it will receive an `ActiveLeavesUpdate` signal
-followed by an arbitrary amount of messages. This process repeats itself for `num_blocks`. The messages are generally
-test payloads pre-generated before the test run, or constructed on pre-genereated payloads. For example the
-`AvailabilityRecoveryMessage::RecoverAvailableData` message includes a `CandidateReceipt` which is generated before
+followed by an arbitrary amount of messages. This process repeats itself for `num_blocks`. The messages are generally 
+test payloads pre-generated before the test run, or constructed on pre-genereated payloads. For example the 
+`AvailabilityRecoveryMessage::RecoverAvailableData` message includes a `CandidateReceipt` which is generated before 
 the test is started.
 
 ### Example run
@@ -181,9 +179,9 @@ Let's run an availabilty read test which will recover availability for 10 cores
 node validator network.
 
 ```
- target/testnet/subsystem-bench --n-cores 10 data-availability-read
-[2023-11-28T09:01:59Z INFO  subsystem_bench::core::display] n_validators = 500, n_cores = 10, pov_size = 5120 - 5120,
-                                                            error = 0, latency = None
+ target/testnet/subsystem-bench --n-cores 10 data-availability-read 
+[2023-11-28T09:01:59Z INFO  subsystem_bench::core::display] n_validators = 500, n_cores = 10, pov_size = 5120 - 5120, 
+                                                            latency = None
 [2023-11-28T09:01:59Z INFO  subsystem-bench::availability] Generating template candidate index=0 pov_size=5242880
 [2023-11-28T09:01:59Z INFO  subsystem-bench::availability] Created test environment.
 [2023-11-28T09:01:59Z INFO  subsystem-bench::availability] Pre-generating 10 candidates.
@@ -196,8 +194,8 @@ node validator network.
 [2023-11-28T09:02:07Z INFO  subsystem_bench::availability] All blocks processed in 6001ms
 [2023-11-28T09:02:07Z INFO  subsystem_bench::availability] Throughput: 51200 KiB/block
 [2023-11-28T09:02:07Z INFO  subsystem_bench::availability] Block time: 6001 ms
-[2023-11-28T09:02:07Z INFO  subsystem_bench::availability]
-
+[2023-11-28T09:02:07Z INFO  subsystem_bench::availability] 
+    
     Total received from network: 66 MiB
     Total sent to network: 58 KiB
     Total subsystem CPU usage 4.16s
@@ -206,12 +204,12 @@ node validator network.
     CPU usage per block 0.00s
 ```
 
-`Block time` in the context of `data-availability-read` has a different meaning. It measures the amount of time it
+`Block time` in the context of `data-availability-read` has a different meaning. It measures the amount of time it 
 took the subsystem to finish processing all of the messages sent in the context of the current test block.
 
 ### Test logs
 
-You can select log target, subtarget and verbosity just like with Polkadot node CLI, simply setting
+You can select log target, subtarget and verbosity just like with Polkadot node CLI, simply setting 
 `RUST_LOOG="parachain=debug"` turns on debug logs for all parachain consensus subsystems in the test.
 
 ### View test metrics
@@ -270,17 +268,17 @@ This tool is intended to make it easy to write new test objectives that focus in
 or even multiple subsystems (for example `approval-distribution` and `approval-voting`).
 
 A special kind of test objectives are performance regression tests for the CI pipeline. These should be sequences
-of tests that check the performance characteristics (such as CPU usage, speed) of the subsystem under test in both
+of tests that check the performance characteristics (such as CPU usage, speed) of the subsystem under test in both 
 happy and negative scenarios (low bandwidth, network errors and low connectivity).
 
 ### Reusable test components
 
-To faster write a new test objective you need to use some higher level wrappers and logic: `TestEnvironment`,
+To faster write a new test objective you need to use some higher level wrappers and logic: `TestEnvironment`, 
 `TestConfiguration`, `TestAuthorities`, `NetworkEmulator`. To create the `TestEnvironment` you will
 need to also build an `Overseer`, but that should be easy using the mockups for subsystems in`core::mock`.
 
 ### Mocking
 
 Ideally we want to have a single mock implementation for subsystems that can be minimally configured to
-be used in different tests. A good example is `runtime-api` which currently only responds to session information
+be used in different tests. A good example is `runtime-api` which currently only responds to session information 
 requests based on static data. It can be easily extended to service other requests.
diff --git a/polkadot/node/subsystem-bench/examples/availability_read.yaml b/polkadot/node/subsystem-bench/examples/availability_read.yaml
@@ -1,57 +1,45 @@
 TestConfiguration:
 # Test 1
 - objective: !DataAvailabilityRead
-    fetch_from_backers: false
+    fetch_from_backers: true
   n_validators: 300
   n_cores: 20
   min_pov_size: 5120
   max_pov_size: 5120
   peer_bandwidth: 52428800
   bandwidth: 52428800
   latency:
-    min_latency:
-      secs: 0
-      nanos: 1000000
-    max_latency:
-      secs: 0
-      nanos: 100000000
-  error: 3
+    mean_latency_ms: 100
+    std_dev: 1
   num_blocks: 3
+  connectivity: 90
 
 # Test 2
 - objective: !DataAvailabilityRead
-    fetch_from_backers: false
+    fetch_from_backers: true
   n_validators: 500
   n_cores: 20
   min_pov_size: 5120
   max_pov_size: 5120
   peer_bandwidth: 52428800
   bandwidth: 52428800
   latency:
-    min_latency:
-      secs: 0
-      nanos: 1000000
-    max_latency:
-      secs: 0
-      nanos: 100000000
-  error: 3
+    mean_latency_ms: 100
+    std_dev: 1
   num_blocks: 3
+  connectivity: 90
 
 # Test 3
 - objective: !DataAvailabilityRead
-    fetch_from_backers: false
+    fetch_from_backers: true
   n_validators: 1000
   n_cores: 20
   min_pov_size: 5120
   max_pov_size: 5120
   peer_bandwidth: 52428800
   bandwidth: 52428800
   latency:
-    min_latency:
-      secs: 0
-      nanos: 1000000
-    max_latency:
-      secs: 0
-      nanos: 100000000
-  error: 3
+    mean_latency_ms: 100
+    std_dev: 1
   num_blocks: 3
+  connectivity: 90
diff --git a/polkadot/node/subsystem-bench/examples/availability_write.yaml b/polkadot/node/subsystem-bench/examples/availability_write.yaml
@@ -0,0 +1,15 @@
+TestConfiguration:
+# Test 1kV, 200 cores, max Pov
+- objective: DataAvailabilityWrite
+  n_validators: 1000
+  n_cores: 200
+  max_validators_per_core: 5
+  min_pov_size: 5120
+  max_pov_size: 5120
+  peer_bandwidth: 52428800
+  bandwidth: 52428800
+  latency:
+    mean_latency_ms: 30
+    std_dev: 2.0
+  connectivity: 75
+  num_blocks: 3