Skip to content

Commit

Permalink
Expose metrics to compute stdev (MystenLabs#8814)
Browse files Browse the repository at this point in the history
## Description 

This PR exposes two extra metrics on the stress client:
* The benchmark duration to compute measurements snapshots (this help to
determine when the system finished warming up).
* The sum of the square of the latencies (to compute stdefv)
  • Loading branch information
asonnino authored Mar 3, 2023
1 parent c4cb84f commit a4bdf88
Showing 1 changed file with 37 additions and 4 deletions.
41 changes: 37 additions & 4 deletions crates/sui-benchmark/src/drivers/bench_driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ use futures::FutureExt;
use futures::{stream::FuturesUnordered, StreamExt};
use indicatif::ProgressBar;
use indicatif::ProgressStyle;
use prometheus::register_gauge_vec_with_registry;
use prometheus::register_histogram_vec_with_registry;
use prometheus::register_int_counter_vec_with_registry;
use prometheus::GaugeVec;
use prometheus::HistogramVec;
use prometheus::IntCounterVec;
use prometheus::Registry;
use prometheus::{register_counter_vec_with_registry, register_gauge_vec_with_registry};
use prometheus::{register_histogram_vec_with_registry, register_int_counter_with_registry};
use prometheus::{register_int_counter_vec_with_registry, CounterVec};
use prometheus::{GaugeVec, IntCounter};
use tokio::sync::mpsc::Sender;
use tokio::sync::OnceCell;
use tokio_util::sync::CancellationToken;
Expand All @@ -38,11 +38,13 @@ use tracing::{debug, error, info};
use super::Interval;
use super::{BenchmarkStats, StressStats};
pub struct BenchMetrics {
pub benchmark_duration: IntCounter,
pub num_success: IntCounterVec,
pub num_error: IntCounterVec,
pub num_submitted: IntCounterVec,
pub num_in_flight: GaugeVec,
pub latency_s: HistogramVec,
pub latency_squared_s: CounterVec,
pub validators_in_tx_cert: IntCounterVec,
pub validators_in_effects_cert: IntCounterVec,
pub cpu_usage: GaugeVec,
Expand All @@ -55,6 +57,12 @@ const LATENCY_SEC_BUCKETS: &[f64] = &[
impl BenchMetrics {
fn new(registry: &Registry) -> Self {
BenchMetrics {
benchmark_duration: register_int_counter_with_registry!(
"benchmark_duration",
"Duration of the benchmark",
registry,
)
.unwrap(),
num_success: register_int_counter_vec_with_registry!(
"num_success",
"Total number of transaction success",
Expand Down Expand Up @@ -91,6 +99,13 @@ impl BenchMetrics {
registry,
)
.unwrap(),
latency_squared_s: register_counter_vec_with_registry!(
"latency_squared_s",
"Square of total time in seconds to return a response",
&["workload"],
registry,
)
.unwrap(),
validators_in_tx_cert: register_int_counter_vec_with_registry!(
"validators_in_tx_cert",
"Number of times a validator was included in tx cert",
Expand Down Expand Up @@ -357,7 +372,16 @@ impl Driver<(BenchmarkStats, StressStats)> for BenchDriver {
match res {
Ok(effects) => {
let latency = start.elapsed();
let time_from_start = start_time.elapsed();

if let Some(delta) = time_from_start.as_secs().checked_sub(metrics_cloned.benchmark_duration.get()) {
metrics_cloned.benchmark_duration.inc_by(delta);
}

let square_latency_ms = latency.as_secs_f64().powf(2.0);
metrics_cloned.latency_s.with_label_values(&[&b.1.get_workload_type().to_string()]).observe(latency.as_secs_f64());
metrics_cloned.latency_squared_s.with_label_values(&[&b.1.get_workload_type().to_string()]).inc_by(square_latency_ms);

metrics_cloned.num_success.with_label_values(&[&b.1.get_workload_type().to_string()]).inc();
metrics_cloned.num_in_flight.with_label_values(&[&b.1.get_workload_type().to_string()]).dec();
// let auth_sign_info = AuthorityStrongQuorumSignInfo::try_from(&cert.auth_sign_info).unwrap();
Expand Down Expand Up @@ -402,7 +426,16 @@ impl Driver<(BenchmarkStats, StressStats)> for BenchDriver {
match res {
Ok(effects) => {
let latency = start.elapsed();
let time_from_start = start_time.elapsed();

if let Some(delta) = time_from_start.as_secs().checked_sub(metrics_cloned.benchmark_duration.get()) {
metrics_cloned.benchmark_duration.inc_by(delta);
}

let square_latency_ms = latency.as_secs_f64().powf(2.0);
metrics_cloned.latency_s.with_label_values(&[&payload.get_workload_type().to_string()]).observe(latency.as_secs_f64());
metrics_cloned.latency_squared_s.with_label_values(&[&payload.get_workload_type().to_string()]).inc_by(square_latency_ms);

metrics_cloned.num_success.with_label_values(&[&payload.get_workload_type().to_string()]).inc();
metrics_cloned.num_in_flight.with_label_values(&[&payload.get_workload_type().to_string()]).dec();
// let auth_sign_info = AuthorityStrongQuorumSignInfo::try_from(&cert.auth_sign_info).unwrap();
Expand Down

0 comments on commit a4bdf88

Please sign in to comment.