Skip to content

Commit

Permalink
feat/refactor/docs(observability): epoch hook, txfees epoch, existing…
Browse files Browse the repository at this point in the history
… metric clean up (osmosis-labs#7515)

* feat/refactor/docs(observability): epoch hook, txfees epoch, existing metric clean up

* go mod

* updates

* updates

* format error in txfees counters

* updates

* updates

* lint

* fix

* updates
  • Loading branch information
p0mvn authored Feb 21, 2024
1 parent dc9b3b4 commit ab3f31a
Show file tree
Hide file tree
Showing 21 changed files with 393 additions and 46 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ require (
github.com/ory/dockertest/v3 v3.10.0
github.com/osmosis-labs/go-mutesting v0.0.0-20221208041716-b43bcd97b3b3
github.com/osmosis-labs/osmosis/osmomath v0.0.8
github.com/osmosis-labs/osmosis/osmoutils v0.0.9-0.20240206013051-b1a74295e6e9
github.com/osmosis-labs/osmosis/osmoutils v0.0.9-0.20240217005029-900b2404cb0e
github.com/osmosis-labs/osmosis/x/epochs v0.0.4
github.com/osmosis-labs/osmosis/x/ibc-hooks v0.0.10
github.com/osmosis-labs/sqs/sqsdomain v0.0.0-20240208025415-894605171463
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1517,6 +1517,8 @@ github.com/osmosis-labs/osmosis/osmomath v0.0.8 h1:jm6D5UgzQ0GQGtyFezs7tQRgwakf3
github.com/osmosis-labs/osmosis/osmomath v0.0.8/go.mod h1:4iPfmy6R0qbImLe5urBLS0thndfLxfmHOdnYboDS1Qo=
github.com/osmosis-labs/osmosis/osmoutils v0.0.9-0.20240206013051-b1a74295e6e9 h1:dVe/Lfng/y5QSVc5KHVM5ieQzXvqHLVwq8gdwfczuFo=
github.com/osmosis-labs/osmosis/osmoutils v0.0.9-0.20240206013051-b1a74295e6e9/go.mod h1:uNKaL8tCfSAoj7pBj9B+s67GUvRBbhiMSSwuDj4Nnag=
github.com/osmosis-labs/osmosis/osmoutils v0.0.9-0.20240217005029-900b2404cb0e h1:HnvoZYIDFgKqkT1ph6fAS1teftxj/6xgFLCgqM/1rrs=
github.com/osmosis-labs/osmosis/osmoutils v0.0.9-0.20240217005029-900b2404cb0e/go.mod h1:FraqhI1HTh3tM0DNPPimZ2x79Dt2pQI6UGSwFJlMc/I=
github.com/osmosis-labs/osmosis/v22 v22.0.0 h1:eb0ywQgvymb7RAf16ByrPgBO1ldXlvRku+V617gmyPI=
github.com/osmosis-labs/osmosis/v22 v22.0.0/go.mod h1:PHPdUS2GBd2ikAde7Z3/jW9KcF7nbf4cZrF8gbYIXXg=
github.com/osmosis-labs/osmosis/x/epochs v0.0.4 h1:5c5LQpjN+XpbhXobC4VG3V6/+4O3bm26NP0BKvHsQdg=
Expand Down
6 changes: 6 additions & 0 deletions osmoutils/observability/helpers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package observability

// FormatMetricName helper to format a metric name given SDK module name and extension.
func FormatMetricName(moduleName, extension string) string {
return moduleName + "_" + extension
}
28 changes: 6 additions & 22 deletions x/concentrated-liquidity/incentives.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ func calcAccruedIncentivesForAccum(ctx sdk.Context, accumUptime time.Duration, l
// Total amount emitted = time elapsed * emission
totalEmittedAmount, err := computeTotalIncentivesToEmit(timeElapsed, incentiveRecordBody.EmissionRate)
if err != nil {
ctx.Logger().Info(types.IncentiveOverflowTelemetryName, "pool_id", poolID, "incentive_id", incentiveRecord.IncentiveId, "time_elapsed", timeElapsed, "emission_rate", incentiveRecordBody.EmissionRate, "error", err.Error())
emitIncentiveOverflowTelemetry(poolID, incentiveRecord.IncentiveId, timeElapsed, incentiveRecordBody.EmissionRate, err)
// Silently ignore the truncated incentive record to avoid halting the entire accumulator update.
// Continue to the next incentive record.
continue
Expand All @@ -275,7 +275,7 @@ func calcAccruedIncentivesForAccum(ctx sdk.Context, accumUptime time.Duration, l
// when dividing by the liquidity in the accumulator.
scaledTotalEmittedAmount, err := scaleUpTotalEmittedAmount(totalEmittedAmount, incentiveScalingFactorForPool)
if err != nil {
ctx.Logger().Info(types.IncentiveOverflowTelemetryName, "pool_id", poolID, "incentive_id", incentiveRecord.IncentiveId, "time_elapsed", timeElapsed, "emission_rate", incentiveRecordBody.EmissionRate, "error", err.Error())
emitIncentiveOverflowTelemetry(poolID, incentiveRecord.IncentiveId, timeElapsed, incentiveRecordBody.EmissionRate, err)
// Silently ignore the truncated incentive record to avoid halting the entire accumulator update.
// Continue to the next incentive record.
continue
Expand All @@ -285,16 +285,16 @@ func calcAccruedIncentivesForAccum(ctx sdk.Context, accumUptime time.Duration, l
// Note that we truncate to ensure we do not overdistribute incentives
incentivesPerLiquidity := scaledTotalEmittedAmount.QuoTruncate(liquidityInAccum)

// Emit telemetry for accumulator updates
emitAccumulatorUpdateTelemetry(ctx, types.IncentiveTruncationTelemetryName, types.IncentiveEmissionTelemetryName, incentivesPerLiquidity, totalEmittedAmount, poolID, liquidityInAccum)

emittedIncentivesPerLiquidity := sdk.NewDecCoinFromDec(incentiveRecordBody.RemainingCoin.Denom, incentivesPerLiquidity)

// Ensure that we only emit if there are enough incentives remaining to be emitted
remainingRewards := poolIncentiveRecords[incentiveIndex].IncentiveRecordBody.RemainingCoin.Amount

// if total amount emitted does not exceed remaining rewards,
if totalEmittedAmount.LTE(remainingRewards) {
// Emit telemetry for accumulator updates
emitAccumulatorUpdateTelemetry(types.IncentiveTruncationTelemetryName, incentivesPerLiquidity, totalEmittedAmount, poolID, liquidityInAccum)

incentivesToAddToCurAccum = incentivesToAddToCurAccum.Add(emittedIncentivesPerLiquidity)

// Update incentive record to reflect the incentives that were emitted
Expand All @@ -320,7 +320,7 @@ func calcAccruedIncentivesForAccum(ctx sdk.Context, accumUptime time.Duration, l
emittedIncentivesPerLiquidity = sdk.NewDecCoinFromDec(incentiveRecordBody.RemainingCoin.Denom, remainingIncentivesPerLiquidity)

// Emit telemetry for accumulator updates
emitAccumulatorUpdateTelemetry(ctx, types.IncentiveTruncationTelemetryName, types.IncentiveEmissionTelemetryName, remainingIncentivesPerLiquidity, remainingRewards, poolID, liquidityInAccum)
emitAccumulatorUpdateTelemetry(types.IncentiveTruncationTelemetryName, remainingIncentivesPerLiquidity, remainingRewards, poolID, liquidityInAccum)

incentivesToAddToCurAccum = incentivesToAddToCurAccum.Add(emittedIncentivesPerLiquidity)

Expand Down Expand Up @@ -1040,22 +1040,6 @@ func (k Keeper) getLargestSupportedUptimeDuration() time.Duration {
return getLargestDuration(types.SupportedUptimes)
}

// emitAccumulatorUpdateTelemetry emits telemetry for accumulator updates
// It detects whether an accumulator update does not occur when expected due to truncation or does occur and emits the appropriate telemetry
func emitAccumulatorUpdateTelemetry(ctx sdk.Context, truncatedPlaceholder, emittedPlaceholder string, rewardsPerUnitOfLiquidity, rewardsTotal osmomath.Dec, poolID uint64, liquidityInAccum osmomath.Dec, extraKeyVals ...interface{}) {
// If truncation occurs, we emit events to alert us of the issue.
if rewardsPerUnitOfLiquidity.IsZero() && !rewardsTotal.IsZero() {
telemetry.IncrCounter(1, truncatedPlaceholder)
ctx.Logger().Error(truncatedPlaceholder, "pool_id", poolID, "total_liq", liquidityInAccum, "per_unit_liq", rewardsPerUnitOfLiquidity, "total_amt", rewardsTotal, extraKeyVals)

// We emit events for these pools specifically as they are at the border of truncation in terms of liquidity
// TODO: remove these after scaling factor approach is implemented
} else if poolID == (1423) || poolID == (1213) {
telemetry.IncrCounter(1, emittedPlaceholder)
ctx.Logger().Info(emittedPlaceholder, "pool_id", poolID, "total_liq", liquidityInAccum, "per_unit_liq", rewardsPerUnitOfLiquidity, "total_amt", rewardsTotal, extraKeyVals)
}
}

// getIncentiveScalingFactorForPool returns the scaling factor for the given pool.
// It returns perUnitLiqScalingFactor if the pool is migrated or if the pool ID is greater than the migration threshold.
// It returns oneDecScalingFactor otherwise.
Expand Down
4 changes: 2 additions & 2 deletions x/concentrated-liquidity/swaps.go
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ func (k Keeper) computeOutAmtGivenIn(
spreadFactorsAccruedPerUnitOfLiquidity := swapState.updateSpreadRewardGrowthGlobal(spreadRewardCharge)

// Emit telemetry to detect spread reward truncation.
emitAccumulatorUpdateTelemetry(ctx, types.SpreadFactorTruncationTelemetryName, types.SpreadRewardEmissionTelemetryName, spreadFactorsAccruedPerUnitOfLiquidity, spreadRewardCharge, poolId, swapState.liquidity, "is_out_given_in", true)
emitAccumulatorUpdateTelemetry(types.SpreadFactorTruncationTelemetryName, spreadFactorsAccruedPerUnitOfLiquidity, spreadRewardCharge, poolId, swapState.liquidity, "is_out_given_in", "true")

ctx.Logger().Debug("cl calc out given in")
emitSwapDebugLogs(ctx, swapState, computedSqrtPrice, amountIn, amountOut, spreadRewardCharge)
Expand Down Expand Up @@ -565,7 +565,7 @@ func (k Keeper) computeInAmtGivenOut(
spreadFactorsAccruedPerUnitOfLiquidity := swapState.updateSpreadRewardGrowthGlobal(spreadRewardChargeTotal)

// Emit telemetry to detect spread reward truncation.
emitAccumulatorUpdateTelemetry(ctx, types.SpreadFactorTruncationTelemetryName, types.SpreadRewardEmissionTelemetryName, spreadFactorsAccruedPerUnitOfLiquidity, spreadRewardChargeTotal, poolId, swapState.liquidity, "is_out_given_in", false)
emitAccumulatorUpdateTelemetry(types.SpreadFactorTruncationTelemetryName, spreadFactorsAccruedPerUnitOfLiquidity, spreadRewardChargeTotal, poolId, swapState.liquidity, "is_out_given_in", "false")

ctx.Logger().Debug("cl calc in given out")
emitSwapDebugLogs(ctx, swapState, computedSqrtPrice, amountIn, amountOut, spreadRewardChargeTotal)
Expand Down
81 changes: 81 additions & 0 deletions x/concentrated-liquidity/telemetry.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package concentrated_liquidity

import (
"strconv"

"github.com/armon/go-metrics"
"github.com/cosmos/cosmos-sdk/telemetry"

"github.com/osmosis-labs/osmosis/osmomath"
types "github.com/osmosis-labs/osmosis/v23/x/concentrated-liquidity/types"
)

// emitAccumulatorUpdateTelemetry emits telemetry for accumulator updates
// It detects whether an accumulator update does not occur when expected due to truncation or does occur and emits the appropriate telemetry
func emitAccumulatorUpdateTelemetry(truncatedPlaceholder string, rewardsPerUnitOfLiquidity, rewardsTotal osmomath.Dec, poolID uint64, liquidityInAccum osmomath.Dec, extraKeyVals ...string) {
// If truncation occurs, we emit events to alert us of the issue.
if rewardsPerUnitOfLiquidity.IsZero() && !rewardsTotal.IsZero() {
labels := []metrics.Label{
{
Name: "pool_id",
Value: strconv.FormatUint(poolID, 10),
},
{
Name: "total_liq",
Value: liquidityInAccum.String(),
},
{
Name: "per_unit_liq",
Value: rewardsPerUnitOfLiquidity.String(),
},
{
Name: "total_amt",
Value: rewardsTotal.String(),
},
}

// Append additional labels
for i := 0; i < len(extraKeyVals); i += 2 {
// This might skip applying the last label pair if key or value is missing
if i+1 > len(labels)-1 {
break
}

key := extraKeyVals[i]
value := extraKeyVals[i+1]

labels = append(labels, metrics.Label{
Name: key,
Value: value,
})
}

telemetry.IncrCounterWithLabels([]string{truncatedPlaceholder}, 1, labels)
}
}

// emitIncentiveOverflowTelemetry emits telemetry for incentive overflow in intermediaty calculations
func emitIncentiveOverflowTelemetry(poolID, incentiveRecordID uint64, timeElapsed, emissionRate osmomath.Dec, err error) {
telemetry.IncrCounterWithLabels([]string{types.IncentiveOverflowTelemetryName}, 1, []metrics.Label{
{
Name: "pool_id",
Value: strconv.FormatUint(poolID, 10),
},
{
Name: "incentive_id",
Value: strconv.FormatUint(incentiveRecordID, 10),
},
{
Name: "time_elapsed",
Value: timeElapsed.String(),
},
{
Name: "emission_rate",
Value: emissionRate.String(),
},
{
Name: "error",
Value: err.Error(),
},
})
}
47 changes: 41 additions & 6 deletions x/concentrated-liquidity/types/telemetry.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,45 @@
package types

const (
IncentiveEmissionTelemetryName = "concentrated_liquidity_incentive_emission"
IncentiveTruncationTelemetryName = "concentrated_liquidity_incentive_truncation"
IncentiveOverflowTelemetryName = "concentrated_liquidity_incentive_overflow"
import "github.com/osmosis-labs/osmosis/osmoutils/observability"

SpreadRewardEmissionTelemetryName = "concentrated_liquidity_spread_reward_emission"
SpreadFactorTruncationTelemetryName = "concentrated_liquidity_sptread_factor_truncation"
var (
// concentrated_liquidity_incentive_truncation
//
// counter that is increased if the incentive accumulator update gets truncated due to division.
//
// Has the following labels:
// * pool_id - the ID of the pool.
// * total_liq - the liquidity amount in the denominator.
// * per_unit_liq - the resulting rewards per unit of liquidity (zero if truncated).
// * total_amt - the total reward amount before dividing by the liquidity value.
IncentiveTruncationTelemetryName = formatConcentratedMetricName("incentive_truncation")
// concentrated_liquidity_incentive_overflow
//
// counter that is increased if an intermediary math operation in the incentives flow overflows.
//
// Has the following labels:
// * pool_id - the ID of the pool.
// * incentive_id - the incentive record ID.
// * time_elapsed - the time elapsed in seconds since the last pool update
// * emission_rate - the emission rate per second from the incentive record.
// * error - the error/panic from the failing math operation
IncentiveOverflowTelemetryName = formatConcentratedMetricName("incentive_overflow")
// concentrated_liquidity_sptread_factor_truncation
//
// counter that is increased if spread factor accumulator update gets truncated due to division by large
// liquidity value.
//
// Has the following labels:
// * pool_id - the ID of the pool.
// * incentive_id - the incentive record ID.
// * time_elapsed - the time elapsed in seconds since the last pool update
// * emission_rate - the emission rate per second from the incentive record.
// * error - the error/panic from the failing math operation
// * is_out_given_in - boolean flag specifying which swap method caused the truncation.
SpreadFactorTruncationTelemetryName = formatConcentratedMetricName("spread_factor_truncation")
)

// formatConcentratedMetricName formats the concentrated module metric name.
func formatConcentratedMetricName(metricName string) string {
return observability.FormatMetricName(ModuleName, metricName)
}
40 changes: 36 additions & 4 deletions x/epochs/types/hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ package types

import (
fmt "fmt"
"strconv"

"github.com/armon/go-metrics"
"github.com/cosmos/cosmos-sdk/telemetry"
sdk "github.com/cosmos/cosmos-sdk/types"

"github.com/osmosis-labs/osmosis/osmoutils"
Expand All @@ -13,29 +16,41 @@ type EpochHooks interface {
AfterEpochEnd(ctx sdk.Context, epochIdentifier string, epochNumber int64) error
// new epoch is next block of epoch end block
BeforeEpochStart(ctx sdk.Context, epochIdentifier string, epochNumber int64) error
// Returns the name of the module implementing epoch hook.
GetModuleName() string
}

const (
// flag indicating whether this is a before epoch hook
isBeforeEpoch = true
)

var _ EpochHooks = MultiEpochHooks{}

// combine multiple gamm hooks, all hook functions are run in array sequence.
type MultiEpochHooks []EpochHooks

// GetModuleName implements EpochHooks.
func (MultiEpochHooks) GetModuleName() string {
return ModuleName
}

func NewMultiEpochHooks(hooks ...EpochHooks) MultiEpochHooks {
return hooks
}

// AfterEpochEnd is called when epoch is going to be ended, epochNumber is the number of epoch that is ending.
func (h MultiEpochHooks) AfterEpochEnd(ctx sdk.Context, epochIdentifier string, epochNumber int64) error {
for i := range h {
panicCatchingEpochHook(ctx, h[i].AfterEpochEnd, epochIdentifier, epochNumber)
for _, hook := range h {
panicCatchingEpochHook(ctx, hook.AfterEpochEnd, epochIdentifier, epochNumber, h.GetModuleName(), !isBeforeEpoch)
}
return nil
}

// BeforeEpochStart is called when epoch is going to be started, epochNumber is the number of epoch that is starting.
func (h MultiEpochHooks) BeforeEpochStart(ctx sdk.Context, epochIdentifier string, epochNumber int64) error {
for i := range h {
panicCatchingEpochHook(ctx, h[i].BeforeEpochStart, epochIdentifier, epochNumber)
for _, hook := range h {
panicCatchingEpochHook(ctx, hook.BeforeEpochStart, epochIdentifier, epochNumber, hook.GetModuleName(), isBeforeEpoch)
}
return nil
}
Expand All @@ -45,13 +60,30 @@ func panicCatchingEpochHook(
hookFn func(ctx sdk.Context, epochIdentifier string, epochNumber int64) error,
epochIdentifier string,
epochNumber int64,
moduleName string,
isBeforeEpoch bool,
) {
wrappedHookFn := func(ctx sdk.Context) error {
return hookFn(ctx, epochIdentifier, epochNumber)
}
// TODO: Thread info for which hook this is, may be dependent on larger hook system refactoring
err := osmoutils.ApplyFuncIfNoError(ctx, wrappedHookFn)
if err != nil {
telemetry.IncrCounterWithLabels([]string{}, 1, []metrics.Label{
{
Name: "module_name",
Value: moduleName,
},
{
Name: "error",
Value: err.Error(),
},
{
Name: "is_before_hook",
Value: strconv.FormatBool(isBeforeEpoch),
},
})

ctx.Logger().Error(fmt.Sprintf("error in epoch hook %v", err))
}
}
5 changes: 5 additions & 0 deletions x/epochs/types/hooks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ type dummyEpochHook struct {
shouldError bool
}

// GetModuleName implements types.EpochHooks.
func (*dummyEpochHook) GetModuleName() string {
return "dummy"
}

func (hook *dummyEpochHook) AfterEpochEnd(ctx sdk.Context, epochIdentifier string, epochNumber int64) error {
if hook.shouldPanic {
panic("dummyEpochHook is panicking")
Expand Down
20 changes: 20 additions & 0 deletions x/epochs/types/telemetry.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package types

import "github.com/osmosis-labs/osmosis/osmoutils/observability"

var (
// epoch_hook_failed
//
// counter that is increased if epoch hook fails
//
// Has the following labels:
// * module_name - the name of the module that errored or panicked
// * err - the error or panic returned
// * is_before_hook - true if this is a before epoch hook. False otherwise.
EpochHookFailedMetricName = formatEpochMetricName("hook_failed")
)

// formatTxFeesMetricName formats the epochs module metric name.
func formatEpochMetricName(metricName string) string {
return observability.FormatMetricName(ModuleName, metricName)
}
14 changes: 12 additions & 2 deletions x/incentives/keeper/distribute.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ package keeper
import (
"errors"
"fmt"
"strconv"
"time"

"github.com/armon/go-metrics"
db "github.com/cometbft/cometbft-db"

"github.com/cosmos/cosmos-sdk/telemetry"
Expand Down Expand Up @@ -45,8 +47,16 @@ func (k Keeper) AllocateAcrossGauges(ctx sdk.Context, activeGroups []types.Group
for _, group := range activeGroups {
err := k.syncGroupWeights(ctx, group)
if err != nil {
telemetry.IncrCounter(1, types.SyncGroupGaugeFailureTelemetryName)
ctx.Logger().Error(types.SyncGroupGaugeFailureTelemetryName, "group_gauge_id", group.GroupGaugeId, "error", err.Error())
telemetry.IncrCounterWithLabels([]string{types.SyncGroupGaugeFailureMetricName}, 1, []metrics.Label{
{
Name: "group_gauge_id",
Value: strconv.FormatUint(group.GroupGaugeId, 10),
},
{
Name: "err",
Value: err.Error(),
},
})
continue
}

Expand Down
Loading

0 comments on commit ab3f31a

Please sign in to comment.