Skip to content

Commit

Permalink
[Benchmark] fix writing benchmark output in workflow; adjust latency …
Browse files Browse the repository at this point in the history
…calculations (MystenLabs#6987)

Avoid truncating benchmark output since there are more rows in the
results and we use regex to extract the results anyway.

Example failure:
https://github.com/MystenLabs/sui/actions/runs/3753653913/jobs/6377112297#step:9:26

Example new output:

MystenLabs@c14f11c#commitcomment-93874326

Write per-header and per-certificate latencies at debug level outside of
benchmarks as well.

Increase number of buckets for the range of latencies we are interested
in.
  • Loading branch information
mwtian authored Dec 22, 2022
1 parent bb14ffd commit cebf8ff
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 26 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
set -o pipefail
cargo run --release --package sui-benchmark --bin stress -- --log-path /tmp/stress.log --num-client-threads 10 --num-server-threads 24 --num-transfer-accounts 2 bench --target-qps 100 --num-workers 10 --transfer-object 100 --run-duration 60s 2>&1 | huniq | tee -a artifacts/owned.txt
cargo run --release --package sui-benchmark --bin stress -- --log-path /tmp/stress.log --num-client-threads 10 --num-server-threads 24 --num-transfer-accounts 2 bench --target-qps 100 --num-workers 10 --shared-counter 100 --run-duration 60s 2>&1 | huniq | tee -a artifacts/shared.txt
pushd narwhal/benchmark && fab local | tail -n 31 | tee -a ../../artifacts/narwhal.txt && popd
pushd narwhal/benchmark && fab local | tee -a ../../artifacts/narwhal.txt && popd
- name: Retrieve benchmark results
id: get-comment-body
Expand Down
3 changes: 1 addition & 2 deletions narwhal/consensus/src/consensus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,8 @@ impl ConsensusState {
.with_label_values(&[])
.set(last_committed_round as i64);

#[cfg(feature = "benchmark")]
// NOTE: This log entry is used to compute performance.
tracing::info!(
tracing::debug!(
"Certificate {:?} took {} seconds to be committed at round {}",
certificate.digest(),
certificate.metadata.created_at.elapsed().as_secs_f64(),
Expand Down
3 changes: 1 addition & 2 deletions narwhal/primary/src/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -483,9 +483,8 @@ impl Core {
.with_label_values(&[&epoch.to_string()])
.observe(header_to_certificate_duration);

#[cfg(feature = "benchmark")]
// NOTE: This log entry is used to compute performance.
tracing::info!(
debug!(
"Header {:?} took {} seconds to be materialized to a certificate {:?}",
certificate.header.digest(),
header_to_certificate_duration,
Expand Down
5 changes: 3 additions & 2 deletions narwhal/primary/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ use std::time::Duration;
use tonic::Code;

const LATENCY_SEC_BUCKETS: &[f64] = &[
0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 7.5, 10., 12.5, 15., 20., 25., 30., 60., 90., 120., 180.,
300.,
0.001, 0.005, 0.01, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.4,
1.6, 1.8, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5, 10.,
12.5, 15., 17.5, 20., 25., 30., 60., 90., 120., 180., 300.,
];

#[derive(Clone)]
Expand Down
25 changes: 18 additions & 7 deletions narwhal/primary/src/proposer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,11 @@ impl Proposer {
self.proposed_headers.insert(this_round, header.clone());

// Update metrics related to latency
let mut total_inclusion_secs = 0.0;
for (_digest, _worker_id, created_at_timestamp) in digests.clone() {
let batch_inclusion_duration =
let batch_inclusion_secs =
Duration::from_millis(header.created_at - created_at_timestamp).as_secs_f64();
total_inclusion_secs += batch_inclusion_secs;

#[cfg(feature = "benchmark")]
{
Expand All @@ -252,21 +254,30 @@ impl Proposer {
"Batch {:?} from worker {} took {} seconds from creation to be included in a proposed header",
_digest,
_worker_id,
batch_inclusion_duration
batch_inclusion_secs
);
}

self.metrics
.proposer_batch_latency
.observe(batch_inclusion_duration);
.observe(batch_inclusion_secs);
}

#[cfg(feature = "benchmark")]
// NOTE: This log entry is used to compute performance.
tracing::info!(
"Header {:?} was created in {} seconds",
let (header_creation_secs, avg_inclusion_secs) = if let Some(digest) = digests.first() {
(
Duration::from_millis(header.created_at - digest.2).as_secs_f64(),
total_inclusion_secs / digests.len() as f64,
)
} else {
(self.max_header_delay.as_secs_f64(), 0.0)
};
debug!(
"Header {:?} was created in {} seconds. Contains {} batches, with average delay {} seconds.",
header.digest(),
Duration::from_millis(header.created_at - digests.first().unwrap().2).as_secs_f64()
header_creation_secs,
digests.len(),
avg_inclusion_secs,
);

Ok(header)
Expand Down
18 changes: 6 additions & 12 deletions narwhal/worker/src/batch_maker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ pub struct BatchMaker {
tx_message: Sender<(Batch, Option<tokio::sync::oneshot::Sender<()>>)>,
/// Metrics handler
node_metrics: Arc<WorkerMetrics>,
/// The timestamp of the first transaction received
/// to be included on the next batch
/// The timestamp of the batch creation.
/// Average resident time in the batch would be ~ (batch seal time - creation time) / 2
batch_start_timestamp: Instant,
/// The batch store to store our own batches.
store: Store<BatchDigest, Batch>,
Expand Down Expand Up @@ -119,15 +119,6 @@ impl BatchMaker {
// 'in-flight' are below a certain number (MAX_PARALLEL_BATCH). This
// condition will be met eventually if the store and network are functioning.
Some((transaction, response_sender)) = self.rx_batch_maker.recv(), if batch_pipeline.len() < MAX_PARALLEL_BATCH => {

if current_batch.transactions.is_empty() {
// We are interested to measure the time to seal a batch
// only when we do have transactions to include. Thus we reset
// the timer on the first transaction we receive to include on
// an empty batch.
self.batch_start_timestamp = Instant::now();
}

current_batch_size += transaction.len();
current_batch.transactions.push(transaction);
current_responses.push(response_sender);
Expand All @@ -137,10 +128,12 @@ impl BatchMaker {
}
self.node_metrics.parallel_worker_batches.set(batch_pipeline.len() as i64);

timer.as_mut().reset(Instant::now() + self.max_batch_delay);
current_batch = Batch::default();
current_responses = Vec::new();
current_batch_size = 0;

timer.as_mut().reset(Instant::now() + self.max_batch_delay);
self.batch_start_timestamp = Instant::now();
}
},

Expand All @@ -157,6 +150,7 @@ impl BatchMaker {
current_batch_size = 0;
}
timer.as_mut().reset(Instant::now() + self.max_batch_delay);
self.batch_start_timestamp = Instant::now();
}

// TODO: duplicated code in quorum_waiter.rs
Expand Down

0 comments on commit cebf8ff

Please sign in to comment.