Skip to content

Commit

Permalink
Addressed PR feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
pinebit committed Oct 16, 2024
1 parent 99ea803 commit 75321cb
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 40 deletions.
58 changes: 42 additions & 16 deletions core/consensus/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,36 @@ package metrics

import (
"github.com/prometheus/client_golang/prometheus"

"github.com/obolnetwork/charon/app/promauto"
"github.com/prometheus/client_golang/prometheus/promauto"
)

type ConsensusMetrics interface {
SetDecidedRounds(duty, timer string, rounds float64)
ObserveConsensusDuration(duty, timer string, duration float64)
IncConsensusTimeout(duty, timer string)
IncConsensusError()
}

var (
decidedRoundsGauge = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "core",
Subsystem: "consensus",
Name: "decided_rounds",
Help: "Number of rounds it took to decide consensus instances by protocol, duty and timer type.",
}, []string{"protocol", "duty", "timer"}) // Using gauge since the value changes slowly, once per slot.
Help: "Number of decided rounds by protocol, duty, and timer",
}, []string{"protocol", "duty", "timer"})

decidedLeaderGauge = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "core",
Subsystem: "consensus",
Name: "decided_leader_index",
Help: "Index of the decided leader by protocol",
}, []string{"protocol"})

consensusDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "core",
Subsystem: "consensus",
Name: "duration_seconds",
Help: "Duration of a consensus instance in seconds by protocol, duty and timer type.",
Buckets: []float64{.05, .1, .25, .5, 1, 2.5, 5, 10, 20, 30, 60},
Help: "Duration of the consensus process by protocol, duty, and timer",
}, []string{"protocol", "duty", "timer"})

consensusTimeout = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: "core",
Subsystem: "consensus",
Name: "timeout_total",
Help: "Total count of consensus timeouts by protocol, duty and timer type.",
Help: "Total count of consensus timeouts by protocol, duty, and timer",
}, []string{"protocol", "duty", "timer"})

consensusError = promauto.NewCounterVec(prometheus.CounterOpts{
Expand All @@ -46,28 +44,56 @@ var (
}, []string{"protocol"})
)

// ConsensusMetrics defines the interface for consensus metrics.
type ConsensusMetrics interface {
// SetDecidedRounds sets the number of decided rounds for a given duty and timer.
SetDecidedRounds(duty, timer string, rounds int64)

// SetDecidedLeaderIndex sets the decided leader index.
SetDecidedLeaderIndex(leaderIndex int64)

// ObserveConsensusDuration observes the duration of the consensus process for a given duty and timer.
ObserveConsensusDuration(duty, timer string, duration float64)

// IncConsensusTimeout increments the consensus timeout counter for a given duty and timer.
IncConsensusTimeout(duty, timer string)

// IncConsensusError increments the consensus error counter.
IncConsensusError()
}

type consensusMetrics struct {
protocolID string
}

// NewConsensusMetrics creates a new instance of ConsensusMetrics with the given protocol ID.
func NewConsensusMetrics(protocolID string) ConsensusMetrics {
return &consensusMetrics{
protocolID: protocolID,
}
}

func (m *consensusMetrics) SetDecidedRounds(duty, timer string, rounds float64) {
decidedRoundsGauge.WithLabelValues(m.protocolID, duty, timer).Set(rounds)
// SetDecidedRounds sets the number of decided rounds for a given duty and timer.
func (m *consensusMetrics) SetDecidedRounds(duty, timer string, rounds int64) {
decidedRoundsGauge.WithLabelValues(m.protocolID, duty, timer).Set(float64(rounds))
}

// SetDecidedLeaderIndex sets the decided leader index.
func (m *consensusMetrics) SetDecidedLeaderIndex(leaderIndex int64) {
decidedLeaderGauge.WithLabelValues(m.protocolID).Set(float64(leaderIndex))
}

// ObserveConsensusDuration observes the duration of the consensus process for a given duty and timer.
func (m *consensusMetrics) ObserveConsensusDuration(duty, timer string, duration float64) {
consensusDuration.WithLabelValues(m.protocolID, duty, timer).Observe(duration)
}

// IncConsensusTimeout increments the consensus timeout counter for a given duty and timer.
func (m *consensusMetrics) IncConsensusTimeout(duty, timer string) {
consensusTimeout.WithLabelValues(m.protocolID, duty, timer).Inc()
}

// IncConsensusError increments the consensus error counter.
func (m *consensusMetrics) IncConsensusError() {
consensusError.WithLabelValues(m.protocolID).Inc()
}
22 changes: 19 additions & 3 deletions core/consensus/qbft/qbft.go
Original file line number Diff line number Diff line change
Expand Up @@ -382,11 +382,27 @@ func (c *Consensus) runInstance(ctx context.Context, duty core.Duty) (err error)
}

// Instrument consensus instance.
var decided bool
var (
decided bool
nodes = len(c.peers)
)

decideCallback := func(qcommit []qbft.Msg[core.Duty, [32]byte]) {
round := qcommit[0].Round()
decided = true
c.metrics.SetDecidedRounds(duty.Type.String(), string(roundTimer.Type()), float64(qcommit[0].Round()))
inst.DecidedAtCh <- time.Now()

leaderIndex := leader(duty, round, nodes)
leaderName := c.peers[leaderIndex].Name
log.Debug(ctx, "QBFT consensus decided",
z.Str("duty", duty.Type.String()),
z.U64("slot", duty.Slot),
z.I64("round", round),
z.I64("leader_index", leaderIndex),
z.Str("leader_name", leaderName))

c.metrics.SetDecidedLeaderIndex(leaderIndex)
c.metrics.SetDecidedRounds(duty.Type.String(), string(roundTimer.Type()), round)
}

// Create a new qbft definition for this instance.
Expand All @@ -410,7 +426,7 @@ func (c *Consensus) runInstance(ctx context.Context, duty core.Duty) (err error)
}

// Run the algo, blocking until the context is cancelled.
err = qbft.Run[core.Duty, [32]byte](ctx, def, qt, duty, peerIdx, inst.HashCh)
err = qbft.Run(ctx, def, qt, duty, peerIdx, inst.HashCh)
if err != nil && !isContextErr(err) {
c.metrics.IncConsensusError()
return err // Only return non-context errors.
Expand Down
45 changes: 28 additions & 17 deletions docs/consensus.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,66 +5,77 @@ This document describes how Charon handles various consensus protocols.
## Overview

Historically, Charon has supported the single consensus protocol QBFT v2.0.
However, now the consensus layer has pluggable interface which allows running different consensus protocols as long as they are available and accepted by cluster's quorum.
However, now the consensus layer has a pluggable interface that allows running different consensus protocols as long as they are available and accepted by the majority of the cluster. Moreover, the cluster can run multiple consensus protocols at the same time, e.g., for different purposes.

## Consensus Protocol Selection

The cluster nodes must agree on the preferred consensus protocol to use, otherwise the entire consensus would fail.
Each node, depending on its configuration and software version may prefer one or more consensus protocols in a specific order of preference.
Charon runs the special protocol called Priority which achieves consensus on the preferred consensus protocol to use.
Under the hood this protocol uses the existing QBFT v2.0 algorithm that is known to be present since v0.19 up until now and must not be deprecated.
The cluster nodes must agree on the preferred consensus protocol to use, otherwise, the entire consensus will fail.
Each node, depending on its configuration and software version, may prefer one or more consensus protocols in a specific order of precedence.
Charon runs a special protocol called Priority, which achieves consensus on the preferred consensus protocol to use.
Under the hood, this protocol uses the existing QBFT v2.0 algorithm that has been present since v0.19 and must not be deprecated.
This way, the existing QBFT v2.0 remains present for all future Charon versions to serve two purposes: running the Priority protocol and being a fallback protocol if no other protocol is selected.

### Priority Protocol Input and Output

The input to the Priority protocol is a list of protocols defined in the order of precedence, e.g.:
The input to the Priority protocol is a list of protocols defined in order of precedence, e.g.:

```json
[
"/charon/consensus/hotstuff/1.0.0", // Highest precedence
"/charon/consensus/abft/2.0.0",
"/charon/consensus/abft/1.0.0",
"/charon/consensus/qbft/2.0.0", // Lowest precedence and the last resort
"/charon/consensus/qbft/2.0.0", // Lowest precedence and the fallback since it is always present
]
```

The output of the Priority protocol is the common "subset" of all inputs respecting the initial order of precedence, e.g.:
The output of the Priority protocol is the common "subset" of all inputs, respecting the initial order of precedence, e.g.:

```json
[
"/charon/consensus/abft/1.0.0", // This means the quorum of nodes has this protocol in common
"/charon/consensus/abft/1.0.0", // This means the majority of nodes have this protocol available
"/charon/consensus/qbft/2.0.0",
]
```

Eventually, more nodes will upgrade and therefore start preferring newest protocols, which will change the output. Because we know that all nodes must at least support QBFT v2.0, it becomes the last resort option in the list and the "default" protocol. This way, Priority protocol would never get stuck and can't produce an empty output.
Eventually, more nodes will upgrade and therefore start preferring newer protocols, which will change the output. Because we know that all nodes must at least support QBFT v2.0, it becomes the fallback option in the list and the "default" protocol. This way, the Priority protocol will never get stuck and can't produce an empty output.

The Priority protocol runs once per epoch and changes its output depending on the inputs. If another protocol started to appear at the top of the list, Charon would switch the consensus protocol to that one with the next epoch.
The Priority protocol runs once per epoch (the last slot of each epoch) and changes its output depending on the inputs. If another protocol starts to appear at the top of the list, Charon will switch the consensus protocol to that one starting in the next epoch.

### Changing Consensus Protocol Preference

A cluster creator can specify the preferred consensus protocol in the cluster configuration file. This new field `consensus_protocol` appeared in the cluster definition file from v1.9 onwards. The field is optional and if not specified, the cluster definition will not impact the consensus protocol selection.

A node operator can also specify the preferred consensus protocol using the new CLI flag `--consensus-protocol` which has the same effect as the cluster configuration file, but it has a higher precedence. The flag is also optional.

In both cases, a user is supposed to specify the protocol family name, e.g. `abft` string and not a fully-qualified ID. The precise version of the protocol is to be determined by the Priority protocol.
In both cases, a user is supposed to specify the protocol family name, e.g. `abft` string and not a fully-qualified protocol ID.
The precise version of the protocol is to be determined by the Priority protocol, which will try picking the latest version.
To list all available consensus protocols (with versions), a user can run the command `charon version --verbose`.

When a node starts, it sequentially mutates the list of preferred consensus protocols by processing the cluster configuration file and then the mentioned CLI flag. The final list of preferred protocols is then passed to the Priority protocol for cluster-wide consensus. Until the Priority protocol reaches consensus, the cluster will use the default QBFT v2.0 protocol.
When a node starts, it sequentially mutates the list of preferred consensus protocols by processing the cluster configuration file and then the mentioned CLI flag. The final list of preferred protocols is then passed to the Priority protocol for cluster-wide consensus. Until the Priority protocol reaches consensus, the cluster will use the default QBFT v2.0 protocol for any duties.

## Observability

The four existing metrics are reflecting the consensus layer behavior:

- `core_consensus_decided_rounds`
- `core_consensus_decided_leader_index`
- `core_consensus_duration_seconds`
- `core_consensus_error_total`
- `core_consensus_timeout_total`

With the new capability to run different consensus protocols, all these metrics now populate the `protocol` label which allows distinguishing between different protocols.
Note that a cluster may run at most two different consensus protocols at the same time, e.g. QBFT v2.0 for Priority and HotStuff v1.0 for validator duties.
Therefore the mentioned metrics will have at most two unique values in the `protocol` label.
Note that a cluster may run at most two different consensus protocols at the same time, e.g. QBFT v2.0 for Priority and HotStuff v1.0 for validator duties. But this can be changed in the future and more different protocols can be running at the same time.
Therefore the mentioned metrics may have different unique values for the `protocol` label.

Some protocols may export their own metrics. We agreed that all such metrics should be prefixed with the protocol name, e.g. `core_consensus_hotstuff_xyz`.

## Debugging

Charon will handle `/debug/consensus` HTTP endpoint that would respond with `consensus_messages.pb.gz` file containing some number of the last consensus messages (protobuf format).
All consensus messages are tagged with the corresponding protocol id, in case of multiple protocols running at the same time.
Charon handles `/debug/consensus` HTTP endpoint that responds with `consensus_messages.pb.gz` file containing certain number of the last consensus messages (in protobuf format).
All consensus messages are tagged with the corresponding protocol ID, in case of multiple protocols running at the same time.

## Protocol Specific Configuration

Each consensus protocol may have its own configuration parameters. For instance, QBFT v2.0 has two parameters: `eager_double_linear` and `consensus_participate` that users control via Feature set.
For future protocols we decided to follow the same design and allow users to control the protocol-specific parameters via Feature set.
Charon will set the recommended default values to all such parameters, so node operators don't need to override them unless they know what they are doing. Note that Priority protocol does not take into account any variations caused by different parameters, therefore node operators must be careful when changing them and make sure all nodes have the same configuration.
4 changes: 0 additions & 4 deletions docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,6 @@ when storing metrics from multiple nodes or clusters in one Prometheus instance.
| `core_bcast_recast_errors_total` | Counter | The total count of failed recasted registrations by source; `pregen` vs `downstream` | `source` |
| `core_bcast_recast_registration_total` | Counter | The total number of unique validator registration stored in recaster per pubkey | `pubkey` |
| `core_bcast_recast_total` | Counter | The total count of recasted registrations by source; `pregen` vs `downstream` | `source` |
| `core_consensus_decided_rounds` | Gauge | Number of rounds it took to decide consensus instances by protocol, duty and timer type. | `protocol, duty, timer` |
| `core_consensus_duration_seconds` | Histogram | Duration of a consensus instance in seconds by protocol, duty and timer type. | `protocol, duty, timer` |
| `core_consensus_error_total` | Counter | Total count of consensus errors by protocol | `protocol` |
| `core_consensus_timeout_total` | Counter | Total count of consensus timeouts by protocol, duty and timer type. | `protocol, duty, timer` |
| `core_parsigdb_exit_total` | Counter | Total number of partially signed voluntary exits per public key | `pubkey` |
| `core_scheduler_current_epoch` | Gauge | The current epoch | |
| `core_scheduler_current_slot` | Gauge | The current slot | |
Expand Down

0 comments on commit 75321cb

Please sign in to comment.