Skip to content

Commit

Permalink
feat(collator): added anchor import and block sync metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
SmaGMan authored and Rexagon committed Nov 29, 2024
1 parent 0132576 commit 3e2bb2f
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 19 deletions.
16 changes: 12 additions & 4 deletions collator/src/collator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,8 @@ impl CollatorStdImpl {
mc_data: Arc<McData>,
working_state_tx: oneshot::Sender<Result<Box<WorkingState>>>,
) -> Result<()> {
let labels = [("workchain", self.shard_id.workchain().to_string())];

tracing::info!(target: tracing_targets::COLLATOR, "initializing...");

// init working state
Expand Down Expand Up @@ -384,6 +386,7 @@ impl CollatorStdImpl {
tracing::info!(target: tracing_targets::COLLATOR,
"collation was cancelled by manager on init",
);
metrics::counter!("tycho_collator_anchor_import_cancelled_count", &labels).increment(1);
self.listener
.on_cancelled(
working_state.mc_data.block_id,
Expand Down Expand Up @@ -640,6 +643,7 @@ impl CollatorStdImpl {
tracing::info!(target: tracing_targets::COLLATOR,
"collation was cancelled by manager on resume",
);
metrics::counter!("tycho_collator_anchor_import_cancelled_count", &labels).increment(1);
self.listener
.on_cancelled(
working_state.mc_data.block_id,
Expand Down Expand Up @@ -989,6 +993,7 @@ impl CollatorStdImpl {
// do not import anchor if mempool may be paused
// needs to process more anchors in collator first
if prev_anchor_id.saturating_sub(top_processed_to_anchor) > max_consensus_lag_rounds / 2 {
metrics::counter!("tycho_collator_anchor_import_skipped_count", &labels).increment(1);
return Ok(ImportNextAnchor::Skipped);
}

Expand Down Expand Up @@ -1258,6 +1263,7 @@ impl CollatorStdImpl {
next_chain_time: u64,
) -> Result<()> {
let working_state = self.delayed_working_state.wait().await?;

self.do_collate(working_state, Some(top_shard_blocks_info), next_chain_time)
.await
}
Expand Down Expand Up @@ -1347,6 +1353,7 @@ impl CollatorStdImpl {
last_imported_chain_time,
"collation was cancelled by manager on try_collate_next_master_block",
);
metrics::counter!("tycho_collator_anchor_import_cancelled_count", &labels).increment(1);
self.listener
.on_cancelled(
working_state.mc_data.block_id,
Expand Down Expand Up @@ -1386,7 +1393,7 @@ impl CollatorStdImpl {
// time elapsed from prev anchor
let elapsed_from_prev_anchor = self.anchor_timer.elapsed();
self.anchor_timer = std::time::Instant::now();
metrics::histogram!("tycho_do_collate_from_prev_anchor_time", &labels)
metrics::histogram!("tycho_collator_from_prev_anchor_time", &labels)
.record(elapsed_from_prev_anchor);

working_state.wu_used_from_last_anchor = 0;
Expand Down Expand Up @@ -1573,6 +1580,7 @@ impl CollatorStdImpl {
last_imported_chain_time,
"collation was cancelled by manager on try_collate_next_shard_block",
);
metrics::counter!("tycho_collator_anchor_import_cancelled_count", &labels).increment(1);
self.listener
.on_cancelled(
working_state.mc_data.block_id,
Expand Down Expand Up @@ -1616,12 +1624,12 @@ impl CollatorStdImpl {
let elapsed_from_prev_anchor = self.anchor_timer.elapsed();
self.anchor_timer = std::time::Instant::now();
metrics::histogram!(
"tycho_do_collate_from_prev_anchor_time",
"tycho_collator_from_prev_anchor_time",
&labels
)
.record(elapsed_from_prev_anchor);

metrics::gauge!("tycho_do_collate_shard_blocks_count_btw_anchors")
metrics::gauge!("tycho_collator_shard_blocks_count_btw_anchors")
.set(self.shard_blocks_count_from_last_anchor);
self.shard_blocks_count_from_last_anchor = 0;

Expand Down Expand Up @@ -1651,7 +1659,7 @@ impl CollatorStdImpl {
}
}

metrics::gauge!("tycho_do_collate_import_next_anchor_count")
metrics::gauge!("tycho_collator_import_next_anchor_count")
.set(imported_anchors_count);

if imported_anchors_has_externals {
Expand Down
14 changes: 11 additions & 3 deletions collator/src/manager/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,9 @@ where
.store_collated(collation_result.candidate, collation_result.mc_data.clone())?;

if store_res.block_mismatch {
let labels = [("workchain", block_id.shard.workchain().to_string())];
metrics::counter!("tycho_collator_block_mismatch_count", &labels).increment(1);

self.set_collator_state(&block_id.shard, |ac| ac.state = CollatorState::Cancelled);

// when master block mismatched then should cancel shard collators as well
Expand Down Expand Up @@ -955,6 +958,9 @@ where
};

if store_res.block_mismatch {
let labels = [("workchain", block_id.shard.workchain().to_string())];
metrics::counter!("tycho_collator_block_mismatch_count", &labels).increment(1);

self.set_collator_state(&block_id.shard, |ac| {
ac.state = match ac.state {
CollatorState::Waiting | CollatorState::Cancelled => CollatorState::Cancelled,
Expand Down Expand Up @@ -1145,6 +1151,7 @@ where
);

let _histogram = HistogramGuard::begin("tycho_collator_sync_to_applied_mc_block_time");
metrics::counter!("tycho_collator_sync_to_applied_mc_block_count").increment(1);

let first_applied_mc_block_key = BlockIdShort {
shard: ShardIdent::MASTERCHAIN,
Expand Down Expand Up @@ -1626,6 +1633,9 @@ where
"Current node was not authorized to collate shard {}",
shard_id,
);
metrics::gauge!("tycho_node_in_current_vset").set(0);
} else {
metrics::gauge!("tycho_node_in_current_vset").set(1);
}

match active_collation_sessions_guard.entry(shard_id) {
Expand Down Expand Up @@ -1891,9 +1901,7 @@ where
force_mc_block: ForceMasterCollation,
mc_block_min_interval_ms: u64,
) -> NextCollationStep {
let _histogram = HistogramGuard::begin(
"tycho_collator_update_last_collated_chain_time_and_check_should_collate_mc_block_time",
);
let _histogram = HistogramGuard::begin("detect_next_collation_step_time");

let mc_block_latest_chain_time = guard.mc_block_latest_chain_time;

Expand Down
42 changes: 30 additions & 12 deletions scripts/gen-dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -974,26 +974,34 @@ def collator_params_metrics() -> RowPanel:
return create_row("collator: Parameters", metrics)


def block_metrics() -> RowPanel:
def collation_metrics() -> RowPanel:
metrics = [
create_gauge_panel(
"tycho_node_in_current_vset",
"Node is in current validator set",
),
create_counter_panel(
"tycho_do_collate_blocks_count",
"Blocks rate",
labels_selectors=['workchain=~"$workchain"'],
"tycho_collator_sync_to_applied_mc_block_count",
"Number of syncs to applied mc block",
),
create_counter_panel(
"tycho_do_collate_blocks_with_limits_reached_count",
"Number of blocks with limits reached",
labels_selectors=['workchain=~"$workchain"'],
),
create_counter_panel(
"tycho_collator_block_mismatch_count",
"Number of mismatched blocks",
labels_selectors=['workchain=~"$workchain"'],
),
create_counter_panel(
"tycho_do_collate_tx_total",
"Number of transactions over time",
labels_selectors=['workchain=~"$workchain"'],
),
create_gauge_panel(
"tycho_do_collate_block_seqno",
"Block seqno",
"Collated block seqno",
labels=['workchain=~"$workchain"'],
),
create_gauge_panel(
Expand All @@ -1007,15 +1015,25 @@ def block_metrics() -> RowPanel:
labels=['workchain=~"$workchain"'],
),
create_gauge_panel(
"tycho_do_collate_shard_blocks_count_btw_anchors",
"tycho_collator_shard_blocks_count_btw_anchors",
"Number of Shard Blocks before import next anchor",
),
create_gauge_panel(
"tycho_do_collate_import_next_anchor_count",
"tycho_collator_import_next_anchor_count",
"Number of imported anchors per tick",
),
create_counter_panel(
"tycho_collator_anchor_import_cancelled_count",
"Number of anchor import cancelled",
labels_selectors=['workchain=~"$workchain"'],
),
create_counter_panel(
"tycho_collator_anchor_import_skipped_count",
"Number of anchor import skipped",
labels_selectors=['workchain=~"$workchain"'],
),
]
return create_row("collator: Block Metrics", metrics)
return create_row("collator: Collation Metrics", metrics)


def collator_execution_metrics() -> RowPanel:
Expand Down Expand Up @@ -1187,7 +1205,7 @@ def collator_time_metrics() -> RowPanel:
labels=['workchain=~"$workchain"'],
),
create_heatmap_panel(
"tycho_do_collate_from_prev_anchor_time",
"tycho_collator_from_prev_anchor_time",
"Time elapsed from prev anchor",
labels=['workchain=~"$workchain"'],
),
Expand Down Expand Up @@ -1395,8 +1413,8 @@ def collator_misc_operations_metrics() -> RowPanel:
"Refresh collation sessions",
),
create_heatmap_panel(
"tycho_collator_update_last_collated_chain_time_and_check_should_collate_mc_block_time",
"Update last collated chain time and check should collate mc block",
"detect_next_collation_step_time",
"Detect next collation step",
),
create_heatmap_panel(
"tycho_collator_enqueue_mc_block_collation_time",
Expand Down Expand Up @@ -1996,7 +2014,7 @@ def templates() -> Templating:
core_blockchain_rpc(),
storage(),
collator_params_metrics(),
block_metrics(),
collation_metrics(),
collator_execution_metrics(),
collator_message_metrics(),
collator_queue_metrics(),
Expand Down

0 comments on commit 3e2bb2f

Please sign in to comment.