Skip to content

Create cortex_reduced_resolution_histogram_samples_total metric #6182

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

* [ENHANCEMENT] Ruler: Add new ruler metric `cortex_ruler_rule_groups_in_store` that is the total rule groups per tenant in store, which can be used to compare with `cortex_prometheus_rule_group_rules` to count the number of rule groups that are not loaded by a ruler. #5869
* [ENHANCEMENT] Ruler: Add query statistics metrics when --ruler.query-stats-enabled=true. #6173
* [ENHANCEMENT] Distributor: Add new `cortex_reduced_resolution_histogram_samples_total` metric to to track the number of histogram samples which resolution was reduced. #6182

## 1.18.0 in progress

Expand Down
33 changes: 27 additions & 6 deletions pkg/util/validation/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,10 @@ const (
)

type ValidateMetrics struct {
DiscardedSamples *prometheus.CounterVec
DiscardedExemplars *prometheus.CounterVec
DiscardedMetadata *prometheus.CounterVec
DiscardedSamples *prometheus.CounterVec
DiscardedExemplars *prometheus.CounterVec
DiscardedMetadata *prometheus.CounterVec
HistogramSamplesReducedResolution *prometheus.CounterVec
}

func registerCollector(r prometheus.Registerer, c prometheus.Collector) {
Expand Down Expand Up @@ -111,10 +112,19 @@ func NewValidateMetrics(r prometheus.Registerer) *ValidateMetrics {
[]string{discardReasonLabel, "user"},
)
registerCollector(r, discardedMetadata)
histogramSamplesReducedResolution := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "cortex_reduced_resolution_histogram_samples_total",
Help: "The total number of histogram samples that had the resolution reduced.",
},
[]string{"user"},
)
registerCollector(r, histogramSamplesReducedResolution)
m := &ValidateMetrics{
DiscardedSamples: discardedSamples,
DiscardedExemplars: discardedExemplars,
DiscardedMetadata: discardedMetadata,
DiscardedSamples: discardedSamples,
DiscardedExemplars: discardedExemplars,
DiscardedMetadata: discardedMetadata,
HistogramSamplesReducedResolution: histogramSamplesReducedResolution,
}

return m
Expand Down Expand Up @@ -286,13 +296,17 @@ func ValidateNativeHistogram(validateMetrics *ValidateMetrics, limits *Limits, u
return cortexpb.Histogram{}, newHistogramBucketLimitExceededError(ls, limits.MaxNativeHistogramBuckets)
}
fh := cortexpb.FloatHistogramProtoToFloatHistogram(histogramSample)
oBuckets := len(fh.PositiveBuckets) + len(fh.NegativeBuckets)
for len(fh.PositiveBuckets)+len(fh.NegativeBuckets) > limits.MaxNativeHistogramBuckets {
if fh.Schema <= histogram.ExponentialSchemaMin {
validateMetrics.DiscardedSamples.WithLabelValues(nativeHistogramBucketCountLimitExceeded, userID).Inc()
return cortexpb.Histogram{}, newHistogramBucketLimitExceededError(ls, limits.MaxNativeHistogramBuckets)
}
fh = fh.ReduceResolution(fh.Schema - 1)
}
if oBuckets != len(fh.PositiveBuckets)+len(fh.NegativeBuckets) {
validateMetrics.HistogramSamplesReducedResolution.WithLabelValues(userID).Inc()
}
// If resolution reduced, convert new float histogram to protobuf type again.
return cortexpb.FloatHistogramToHistogramProto(histogramSample.TimestampMs, fh), nil
}
Expand All @@ -308,13 +322,17 @@ func ValidateNativeHistogram(validateMetrics *ValidateMetrics, limits *Limits, u
return cortexpb.Histogram{}, newHistogramBucketLimitExceededError(ls, limits.MaxNativeHistogramBuckets)
}
h := cortexpb.HistogramProtoToHistogram(histogramSample)
oBuckets := len(h.PositiveBuckets) + len(h.NegativeBuckets)
for len(h.PositiveBuckets)+len(h.NegativeBuckets) > limits.MaxNativeHistogramBuckets {
if h.Schema <= histogram.ExponentialSchemaMin {
validateMetrics.DiscardedSamples.WithLabelValues(nativeHistogramBucketCountLimitExceeded, userID).Inc()
return cortexpb.Histogram{}, newHistogramBucketLimitExceededError(ls, limits.MaxNativeHistogramBuckets)
}
h = h.ReduceResolution(h.Schema - 1)
}
if oBuckets != len(h.PositiveBuckets)+len(h.NegativeBuckets) {
validateMetrics.HistogramSamplesReducedResolution.WithLabelValues(userID).Inc()
}
// If resolution reduced, convert new histogram to protobuf type again.
return cortexpb.HistogramToHistogramProto(histogramSample.TimestampMs, h), nil
}
Expand All @@ -331,4 +349,7 @@ func DeletePerUserValidationMetrics(validateMetrics *ValidateMetrics, userID str
if err := util.DeleteMatchingLabels(validateMetrics.DiscardedMetadata, filter); err != nil {
level.Warn(log).Log("msg", "failed to remove cortex_discarded_metadata_total metric for user", "user", userID, "err", err)
}
if err := util.DeleteMatchingLabels(validateMetrics.HistogramSamplesReducedResolution, filter); err != nil {
level.Warn(log).Log("msg", "failed to remove cortex_reduced_resolution_histogram_samples_total metric for user", "user", userID, "err", err)
}
}
9 changes: 9 additions & 0 deletions pkg/util/validation/validate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ func TestValidateNativeHistogram(t *testing.T) {
for _, tc := range []struct {
name string
bucketLimit int
resolutionReduced bool
histogram cortexpb.Histogram
expectedHistogram cortexpb.Histogram
expectedErr error
Expand Down Expand Up @@ -341,12 +342,14 @@ func TestValidateNativeHistogram(t *testing.T) {
bucketLimit: 6,
histogram: cortexpb.HistogramToHistogramProto(0, h.Copy()),
expectedHistogram: cortexpb.HistogramToHistogramProto(0, h.Copy().ReduceResolution(0)),
resolutionReduced: true,
},
{
name: "exceed limit and reduce resolution for 1 level, float histogram",
bucketLimit: 6,
histogram: cortexpb.FloatHistogramToHistogramProto(0, fh.Copy()),
expectedHistogram: cortexpb.FloatHistogramToHistogramProto(0, fh.Copy().ReduceResolution(0)),
resolutionReduced: true,
},
{
name: "exceed limit and reduce resolution for 2 levels, histogram",
Expand Down Expand Up @@ -394,7 +397,13 @@ func TestValidateNativeHistogram(t *testing.T) {
if tc.expectedErr != nil {
require.Equal(t, tc.expectedErr, actualErr)
require.Equal(t, float64(1), testutil.ToFloat64(validateMetrics.DiscardedSamples.WithLabelValues(nativeHistogramBucketCountLimitExceeded, userID)))
// Should never increment if error was returned
require.Equal(t, float64(0), testutil.ToFloat64(validateMetrics.HistogramSamplesReducedResolution.WithLabelValues(userID)))

} else {
if tc.resolutionReduced {
require.Equal(t, float64(1), testutil.ToFloat64(validateMetrics.HistogramSamplesReducedResolution.WithLabelValues(userID)))
}
require.NoError(t, actualErr)
require.Equal(t, tc.expectedHistogram, actualHistogram)
}
Expand Down
Loading