Skip to content

Commit

Permalink
Cleanup PoH speed check error (anza-xyz#2400)
Browse files Browse the repository at this point in the history
Cleanup PoH speed check logs and error

The current logging and error message from the Poh speed check are
confusing. If the node fails, the error message states that the node is
too slow. But, the reported numbers are slot durations in nanoseconds
where a slower node will have a larger number. Lastly, the reported
numbers aren't labeled with a unit so it is hard to make sense of this
without looking at the actual code.

The check now computes and reports hashes per second.
  • Loading branch information
steviez authored Aug 4, 2024
1 parent ccabfcf commit ecc05c5
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 23 deletions.
44 changes: 24 additions & 20 deletions core/src/validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ use {
utils::{move_and_async_delete_path, move_and_async_delete_path_contents},
},
solana_client::connection_cache::{ConnectionCache, Protocol},
solana_entry::poh::compute_hash_time_ns,
solana_entry::poh::compute_hash_time,
solana_geyser_plugin_manager::{
geyser_plugin_service::GeyserPluginService, GeyserPluginManagerRequest,
},
Expand Down Expand Up @@ -1680,29 +1680,28 @@ fn active_vote_account_exists_in_bank(bank: &Bank, vote_account: &Pubkey) -> boo
fn check_poh_speed(
genesis_config: &GenesisConfig,
maybe_hash_samples: Option<u64>,
) -> Result<(), String> {
) -> Result<(), ValidatorError> {
if let Some(hashes_per_tick) = genesis_config.hashes_per_tick() {
let ticks_per_slot = genesis_config.ticks_per_slot();
let hashes_per_slot = hashes_per_tick * ticks_per_slot;

let hash_samples = maybe_hash_samples.unwrap_or(hashes_per_slot);
let hash_time_ns = compute_hash_time_ns(hash_samples);

let my_ns_per_slot = (hash_time_ns * hashes_per_slot) / hash_samples;
debug!("computed: ns_per_slot: {}", my_ns_per_slot);
let target_ns_per_slot = genesis_config.ns_per_slot() as u64;
debug!(
"cluster ns_per_hash: {}ns ns_per_slot: {}",
target_ns_per_slot / hashes_per_slot,
target_ns_per_slot

let hash_time = compute_hash_time(hash_samples);
let my_hashes_per_second = (hash_samples as f64 / hash_time.as_secs_f64()) as u64;
let target_slot_duration = Duration::from_nanos(genesis_config.ns_per_slot() as u64);
let target_hashes_per_second =
(hashes_per_slot as f64 / target_slot_duration.as_secs_f64()) as u64;

info!(
"PoH speed check: \
computed hashes per second {my_hashes_per_second}, \
target hashes per second {target_hashes_per_second}"
);
if my_ns_per_slot < target_ns_per_slot {
let extra_ns = target_ns_per_slot - my_ns_per_slot;
info!("PoH speed check: Will sleep {}ns per slot.", extra_ns);
} else {
return Err(format!(
"PoH is slower than cluster target tick rate! mine: {my_ns_per_slot} cluster: {target_ns_per_slot}.",
));
if my_hashes_per_second < target_hashes_per_second {
return Err(ValidatorError::PohTooSlow {
mine: my_hashes_per_second,
target: target_hashes_per_second,
});
}
}
Ok(())
Expand Down Expand Up @@ -1861,7 +1860,7 @@ fn load_blockstore(
}

if !config.no_poh_speed_test {
check_poh_speed(&genesis_config, None)?;
check_poh_speed(&genesis_config, None).map_err(|err| format!("{err}"))?;
}

let mut blockstore =
Expand Down Expand Up @@ -2345,6 +2344,11 @@ pub enum ValidatorError {
#[error("{0}")]
Other(String),

#[error(
"PoH hashes/second rate is slower than the cluster target: mine {mine}, cluster {target}"
)]
PohTooSlow { mine: u64, target: u64 },

#[error(transparent)]
TraceError(#[from] TraceError),

Expand Down
6 changes: 3 additions & 3 deletions entry/src/poh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,18 +109,18 @@ impl Poh {
}
}

pub fn compute_hash_time_ns(hashes_sample_size: u64) -> u64 {
pub fn compute_hash_time(hashes_sample_size: u64) -> Duration {
info!("Running {} hashes...", hashes_sample_size);
let mut v = Hash::default();
let start = Instant::now();
for _ in 0..hashes_sample_size {
v = hash(v.as_ref());
}
start.elapsed().as_nanos() as u64
start.elapsed()
}

pub fn compute_hashes_per_tick(duration: Duration, hashes_sample_size: u64) -> u64 {
let elapsed_ms = compute_hash_time_ns(hashes_sample_size) / (1000 * 1000);
let elapsed_ms = compute_hash_time(hashes_sample_size).as_millis() as u64;
duration.as_millis() as u64 * hashes_sample_size / elapsed_ms
}

Expand Down

0 comments on commit ecc05c5

Please sign in to comment.