Skip to content

Commit

Permalink
Remove group label from compact metrics
Browse files Browse the repository at this point in the history
Compaction metrics have too high a cardinality, causing metric bloat on
large installations. The group information is better suited to logs.
* Add a `resolution` label to the compaction counters.

Fixes: #5841

Signed-off-by: SuperQ <superq@gmail.com>
  • Loading branch information
SuperQ committed Feb 22, 2023
1 parent 1967cd0 commit acf062d
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 127 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
- [#5887](https://github.com/thanos-io/thanos/pull/5887) Tracing: Make sure rate limiting sampler is the default, as was the case in version pre-0.29.0.
- [#5997](https://github.com/thanos-io/thanos/pull/5997) Rule: switch to miekgdns DNS resolver as the default one.
- [#6035](https://github.com/thanos-io/thanos/pull/6035) Replicate: Support all types of matchers to match blocks for replication. Change matcher parameter from string slice to a single string.
- [#6049](https://github.com/thanos-io/thanos/pull/6049) compact: Remove group label from compact metrics.
- [#6131](https://github.com/thanos-io/thanos/pull/6131) Store: *breaking :warning:* Use Histograms for bucket metrics.

## [v0.30.2](https://github.com/thanos-io/thanos/tree/release-0.30) - 28.01.2023
Expand Down
67 changes: 34 additions & 33 deletions pkg/compact/compact.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,23 +256,23 @@ func NewDefaultGrouper(
compactions: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
Name: "thanos_compact_group_compactions_total",
Help: "Total number of group compaction attempts that resulted in a new block.",
}, []string{"group"}),
}, []string{"resolution"}),
compactionRunsStarted: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
Name: "thanos_compact_group_compaction_runs_started_total",
Help: "Total number of group compaction attempts.",
}, []string{"group"}),
}, []string{"resolution"}),
compactionRunsCompleted: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
Name: "thanos_compact_group_compaction_runs_completed_total",
Help: "Total number of group completed compaction runs. This also includes compactor group runs that resulted with no compaction.",
}, []string{"group"}),
}, []string{"resolution"}),
compactionFailures: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
Name: "thanos_compact_group_compactions_failures_total",
Help: "Total number of failed group compactions.",
}, []string{"group"}),
}, []string{"resolution"}),
verticalCompactions: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
Name: "thanos_compact_group_vertical_compactions_total",
Help: "Total number of group compaction attempts that resulted in a new block based on overlapping blocks.",
}, []string{"group"}),
}, []string{"resolution"}),
blocksMarkedForNoCompact: blocksMarkedForNoCompact,
garbageCollectedBlocks: garbageCollectedBlocks,
blocksMarkedForDeletion: blocksMarkedForDeletion,
Expand All @@ -291,19 +291,20 @@ func (g *DefaultGrouper) Groups(blocks map[ulid.ULID]*metadata.Meta) (res []*Gro
group, ok := groups[groupKey]
if !ok {
lbls := labels.FromMap(m.Thanos.Labels)
resolutionLabel := fmt.Sprintf("%d", m.Thanos.Downsample.Resolution)
group, err = NewGroup(
log.With(g.logger, "group", fmt.Sprintf("%d@%v", m.Thanos.Downsample.Resolution, lbls.String()), "groupKey", groupKey),
log.With(g.logger, "group", fmt.Sprintf("%s@%v", resolutionLabel, lbls.String()), "groupKey", groupKey),
g.bkt,
groupKey,
lbls,
m.Thanos.Downsample.Resolution,
g.acceptMalformedIndex,
g.enableVerticalCompaction,
g.compactions.WithLabelValues(groupKey),
g.compactionRunsStarted.WithLabelValues(groupKey),
g.compactionRunsCompleted.WithLabelValues(groupKey),
g.compactionFailures.WithLabelValues(groupKey),
g.verticalCompactions.WithLabelValues(groupKey),
g.compactions.WithLabelValues(resolutionLabel),
g.compactionRunsStarted.WithLabelValues(resolutionLabel),
g.compactionRunsCompleted.WithLabelValues(resolutionLabel),
g.compactionFailures.WithLabelValues(resolutionLabel),
g.verticalCompactions.WithLabelValues(resolutionLabel),
g.garbageCollectedBlocks,
g.blocksMarkedForDeletion,
g.blocksMarkedForNoCompact,
Expand Down Expand Up @@ -492,8 +493,8 @@ func (cg *Group) Resolution() int64 {

// CompactProgressMetrics contains Prometheus metrics related to compaction progress.
type CompactProgressMetrics struct {
NumberOfCompactionRuns *prometheus.GaugeVec
NumberOfCompactionBlocks *prometheus.GaugeVec
NumberOfCompactionRuns prometheus.Gauge
NumberOfCompactionBlocks prometheus.Gauge
}

// ProgressCalculator calculates the progress of the compaction process for a given slice of Groups.
Expand All @@ -512,14 +513,14 @@ func NewCompactionProgressCalculator(reg prometheus.Registerer, planner *tsdbBas
return &CompactionProgressCalculator{
planner: planner,
CompactProgressMetrics: &CompactProgressMetrics{
NumberOfCompactionRuns: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
NumberOfCompactionRuns: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
Name: "thanos_compact_todo_compactions",
Help: "number of compactions to be done",
}, []string{"group"}),
NumberOfCompactionBlocks: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
}),
NumberOfCompactionBlocks: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
Name: "thanos_compact_todo_compaction_blocks",
Help: "number of blocks planned to be compacted",
}, []string{"group"}),
}),
},
}
}
Expand Down Expand Up @@ -568,20 +569,20 @@ func (ps *CompactionProgressCalculator) ProgressCalculate(ctx context.Context, g
groups = tmpGroups
}

ps.CompactProgressMetrics.NumberOfCompactionRuns.Reset()
ps.CompactProgressMetrics.NumberOfCompactionBlocks.Reset()
ps.CompactProgressMetrics.NumberOfCompactionRuns.Set(0)
ps.CompactProgressMetrics.NumberOfCompactionBlocks.Set(0)

for key, iters := range groupCompactions {
ps.CompactProgressMetrics.NumberOfCompactionRuns.WithLabelValues(key).Add(float64(iters))
ps.CompactProgressMetrics.NumberOfCompactionBlocks.WithLabelValues(key).Add(float64(groupBlocks[key]))
ps.CompactProgressMetrics.NumberOfCompactionRuns.Add(float64(iters))
ps.CompactProgressMetrics.NumberOfCompactionBlocks.Add(float64(groupBlocks[key]))
}

return nil
}

// DownsampleProgressMetrics contains Prometheus metrics related to downsampling progress.
type DownsampleProgressMetrics struct {
NumberOfBlocksDownsampled *prometheus.GaugeVec
NumberOfBlocksDownsampled prometheus.Gauge
}

// DownsampleProgressCalculator contains DownsampleMetrics, which are updated during the downsampling simulation process.
Expand All @@ -593,10 +594,10 @@ type DownsampleProgressCalculator struct {
func NewDownsampleProgressCalculator(reg prometheus.Registerer) *DownsampleProgressCalculator {
return &DownsampleProgressCalculator{
DownsampleProgressMetrics: &DownsampleProgressMetrics{
NumberOfBlocksDownsampled: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
NumberOfBlocksDownsampled: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
Name: "thanos_compact_todo_downsample_blocks",
Help: "number of blocks to be downsampled",
}, []string{"group"}),
}),
},
}
}
Expand Down Expand Up @@ -666,17 +667,17 @@ func (ds *DownsampleProgressCalculator) ProgressCalculate(ctx context.Context, g
}
}

ds.DownsampleProgressMetrics.NumberOfBlocksDownsampled.Reset()
for key, blocks := range groupBlocks {
ds.DownsampleProgressMetrics.NumberOfBlocksDownsampled.WithLabelValues(key).Add(float64(blocks))
ds.DownsampleProgressMetrics.NumberOfBlocksDownsampled.Set(0)
for _, blocks := range groupBlocks {
ds.DownsampleProgressMetrics.NumberOfBlocksDownsampled.Add(float64(blocks))
}

return nil
}

// RetentionProgressMetrics contains Prometheus metrics related to retention progress.
type RetentionProgressMetrics struct {
NumberOfBlocksToDelete *prometheus.GaugeVec
NumberOfBlocksToDelete prometheus.Gauge
}

// RetentionProgressCalculator contains RetentionProgressMetrics, which are updated during the retention simulation process.
Expand All @@ -690,10 +691,10 @@ func NewRetentionProgressCalculator(reg prometheus.Registerer, retentionByResolu
return &RetentionProgressCalculator{
retentionByResolution: retentionByResolution,
RetentionProgressMetrics: &RetentionProgressMetrics{
NumberOfBlocksToDelete: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
NumberOfBlocksToDelete: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
Name: "thanos_compact_todo_deletion_blocks",
Help: "number of blocks that have crossed their retention period",
}, []string{"group"}),
}),
},
}
}
Expand All @@ -715,9 +716,9 @@ func (rs *RetentionProgressCalculator) ProgressCalculate(ctx context.Context, gr
}
}

rs.RetentionProgressMetrics.NumberOfBlocksToDelete.Reset()
for key, blocks := range groupBlocks {
rs.RetentionProgressMetrics.NumberOfBlocksToDelete.WithLabelValues(key).Add(float64(blocks))
rs.RetentionProgressMetrics.NumberOfBlocksToDelete.Set(0)
for _, blocks := range groupBlocks {
rs.RetentionProgressMetrics.NumberOfBlocksToDelete.Add(float64(blocks))
}

return nil
Expand Down
2 changes: 1 addition & 1 deletion pkg/compact/compact_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ func testGroupCompactE2e(t *testing.T, mergeFunc storage.VerticalChunkSeriesMerg
testutil.Equals(t, 5.0, promtest.ToFloat64(sy.metrics.blocksMarkedForDeletion))
testutil.Equals(t, 1.0, promtest.ToFloat64(grouper.blocksMarkedForNoCompact))
testutil.Equals(t, 0.0, promtest.ToFloat64(sy.metrics.garbageCollectionFailures))
testutil.Equals(t, 4, MetricCount(grouper.compactions))
testutil.Equals(t, 2, MetricCount(grouper.compactions))
testutil.Equals(t, 1.0, promtest.ToFloat64(grouper.compactions.WithLabelValues(metas[0].Thanos.GroupKey())))
testutil.Equals(t, 1.0, promtest.ToFloat64(grouper.compactions.WithLabelValues(metas[7].Thanos.GroupKey())))
testutil.Equals(t, 0.0, promtest.ToFloat64(grouper.compactions.WithLabelValues(metas[4].Thanos.GroupKey())))
Expand Down
Loading

0 comments on commit acf062d

Please sign in to comment.