Skip to content

Add metric for duration between block timestamp and acceptance time #2366

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 27 additions & 16 deletions snow/consensus/snowman/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,21 +41,22 @@ type metrics struct {
// numProcessing keeps track of the number of processing blocks
numProcessing prometheus.Gauge

blockSizeAcceptedSum prometheus.Gauge
// pollsAccepted tracks the number of polls that a block was in processing
// for before being accepted
pollsAccepted metric.Averager
// latAccepted tracks the number of nanoseconds that a block was processing
// before being accepted
latAccepted metric.Averager
blockSizeAcceptedSum prometheus.Gauge
buildLatencyAccepted prometheus.Gauge

blockSizeRejectedSum prometheus.Gauge
// pollsRejected tracks the number of polls that a block was in processing
// for before being rejected
pollsRejected metric.Averager
// latRejected tracks the number of nanoseconds that a block was processing
// before being rejected
latRejected metric.Averager
blockSizeRejectedSum prometheus.Gauge
latRejected metric.Averager

// numFailedPolls keeps track of the number of polls that failed
numFailedPolls prometheus.Counter
Expand Down Expand Up @@ -101,6 +102,11 @@ func newMetrics(
Help: "number of currently processing blocks",
}),

blockSizeAcceptedSum: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Name: "blks_accepted_container_size_sum",
Help: "cumulative size of all accepted blocks",
}),
pollsAccepted: metric.NewAveragerWithErrs(
namespace,
"blks_polls_accepted",
Expand All @@ -121,12 +127,17 @@ func newMetrics(
reg,
&errs,
),
blockSizeAcceptedSum: prometheus.NewGauge(prometheus.GaugeOpts{
buildLatencyAccepted: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Name: "blks_accepted_container_size_sum",
Help: "cumulative size of all accepted blocks",
Name: "blks_build_accept_latency",
Help: "time (in ns) from the timestamp of a block to the time it was accepted",
}),

blockSizeRejectedSum: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Name: "blks_rejected_container_size_sum",
Help: "cumulative size of all rejected blocks",
}),
pollsRejected: metric.NewAveragerWithErrs(
namespace,
"blks_polls_rejected",
Expand All @@ -147,11 +158,6 @@ func newMetrics(
reg,
&errs,
),
blockSizeRejectedSum: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Name: "blks_rejected_container_size_sum",
Help: "cumulative size of all rejected blocks",
}),

numSuccessfulPolls: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Expand All @@ -176,6 +182,7 @@ func newMetrics(
reg.Register(m.lastAcceptedTimestamp),
reg.Register(m.numProcessing),
reg.Register(m.blockSizeAcceptedSum),
reg.Register(m.buildLatencyAccepted),
reg.Register(m.blockSizeRejectedSum),
reg.Register(m.numSuccessfulPolls),
reg.Register(m.numFailedPolls),
Expand Down Expand Up @@ -216,12 +223,16 @@ func (m *metrics) Accepted(
m.processingBlocks.Delete(blkID)
m.numProcessing.Dec()

m.blockSizeAcceptedSum.Add(float64(blockSize))

m.pollsAccepted.Observe(float64(pollNumber - start.pollNumber))

duration := time.Since(start.time)
m.latAccepted.Observe(float64(duration))
now := time.Now()
processingDuration := now.Sub(start.time)
m.latAccepted.Observe(float64(processingDuration))

m.blockSizeAcceptedSum.Add(float64(blockSize))
builtDuration := now.Sub(timestamp)
m.buildLatencyAccepted.Add(float64(builtDuration))
}

func (m *metrics) Rejected(blkID ids.ID, pollNumber uint64, blockSize int) {
Expand All @@ -236,12 +247,12 @@ func (m *metrics) Rejected(blkID ids.ID, pollNumber uint64, blockSize int) {
m.processingBlocks.Delete(blkID)
m.numProcessing.Dec()

m.blockSizeRejectedSum.Add(float64(blockSize))

m.pollsRejected.Observe(float64(pollNumber - start.pollNumber))

duration := time.Since(start.time)
m.latRejected.Observe(float64(duration))

m.blockSizeRejectedSum.Add(float64(blockSize))
}

func (m *metrics) MeasureAndGetOldestDuration() time.Duration {
Expand Down
4 changes: 3 additions & 1 deletion snow/consensus/snowman/topological.go
Original file line number Diff line number Diff line change
Expand Up @@ -622,9 +622,11 @@ func (ts *Topological) acceptPreferredChild(ctx context.Context, n *snowmanBlock
}

height := child.Height()
timestamp := child.Timestamp()
ts.ctx.Log.Trace("accepting block",
zap.Stringer("blkID", pref),
zap.Uint64("height", height),
zap.Time("timestamp", timestamp),
)
if err := child.Accept(ctx); err != nil {
return err
Expand All @@ -641,7 +643,7 @@ func (ts *Topological) acceptPreferredChild(ctx context.Context, n *snowmanBlock
ts.metrics.Accepted(
pref,
height,
child.Timestamp(),
timestamp,
ts.pollNumber,
len(bytes),
)
Expand Down
3 changes: 2 additions & 1 deletion snow/engine/snowman/bootstrap/block_job.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ func (b *blockJob) Execute(ctx context.Context) error {
b.numAccepted.Inc()
b.log.Trace("accepting block in bootstrapping",
zap.Stringer("blkID", blkID),
zap.Uint64("blkHeight", b.blk.Height()),
zap.Uint64("height", b.blk.Height()),
zap.Time("timestamp", b.blk.Timestamp()),
)
if err := b.blk.Accept(ctx); err != nil {
b.log.Debug("failed to accept block during bootstrapping",
Expand Down