Skip to content

Add metric name in limiter per-metric exceeded errors #6422

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
* [ENHANCEMENT] Ingester: If a limit per label set entry doesn't have any label, use it as the default partition to catch all series that doesn't match any other label sets entries. #6435
* [ENHANCEMENT] Querier: Add new `cortex_querier_codec_response_size` metric to track the size of the encoded query responses from queriers. #6444
* [ENHANCEMENT] Distributor: Added `cortex_distributor_received_samples_per_labelset_total` metric to calculate ingestion rate per label set. #6443
* [ENHANCEMENT] Added metric name in limiter per-metric exceeded errors. #6416
* [ENHANCEMENT] StoreGateway: Added `cortex_bucket_store_indexheader_load_duration_seconds` and `cortex_bucket_store_indexheader_download_duration_seconds` metrics for time of downloading and loading index header files. #6445
* [BUGFIX] Runtime-config: Handle absolute file paths when working directory is not / #6224
* [BUGFIX] Ruler: Allow rule evaluation to complete during shutdown. #6326
Expand Down
8 changes: 5 additions & 3 deletions pkg/ingester/ingester.go
Original file line number Diff line number Diff line change
Expand Up @@ -1172,18 +1172,20 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte

case errors.Is(cause, errMaxSeriesPerUserLimitExceeded):
perUserSeriesLimitCount++
updateFirstPartial(func() error { return makeLimitError(perUserSeriesLimit, i.limiter.FormatError(userID, cause)) })
updateFirstPartial(func() error {
return makeLimitError(perUserSeriesLimit, i.limiter.FormatError(userID, cause, copiedLabels))
})

case errors.Is(cause, errMaxSeriesPerMetricLimitExceeded):
perMetricSeriesLimitCount++
updateFirstPartial(func() error {
return makeMetricLimitError(perMetricSeriesLimit, copiedLabels, i.limiter.FormatError(userID, cause))
return makeMetricLimitError(perMetricSeriesLimit, copiedLabels, i.limiter.FormatError(userID, cause, copiedLabels))
})

case errors.As(cause, &errMaxSeriesPerLabelSetLimitExceeded{}):
perLabelSetSeriesLimitCount++
updateFirstPartial(func() error {
return makeMetricLimitError(perLabelsetSeriesLimit, copiedLabels, i.limiter.FormatError(userID, cause))
return makeMetricLimitError(perLabelsetSeriesLimit, copiedLabels, i.limiter.FormatError(userID, cause, copiedLabels))
})

case errors.Is(cause, histogram.ErrHistogramSpanNegativeOffset):
Expand Down
4 changes: 2 additions & 2 deletions pkg/ingester/ingester_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ func TestIngesterUserLimitExceeded(t *testing.T) {
httpResp, ok := httpgrpc.HTTPResponseFromError(err)
require.True(t, ok, "returned error is not an httpgrpc response")
assert.Equal(t, http.StatusBadRequest, int(httpResp.Code))
assert.Equal(t, wrapWithUser(makeLimitError(perUserSeriesLimit, ing.limiter.FormatError(userID, errMaxSeriesPerUserLimitExceeded)), userID).Error(), string(httpResp.Body))
assert.Equal(t, wrapWithUser(makeLimitError(perUserSeriesLimit, ing.limiter.FormatError(userID, errMaxSeriesPerUserLimitExceeded, labels1)), userID).Error(), string(httpResp.Body))

// Append two metadata, expect no error since metadata is a best effort approach.
_, err = ing.Push(ctx, cortexpb.ToWriteRequest(nil, nil, []*cortexpb.MetricMetadata{metadata1, metadata2}, nil, cortexpb.API))
Expand Down Expand Up @@ -778,7 +778,7 @@ func TestIngesterMetricLimitExceeded(t *testing.T) {
httpResp, ok := httpgrpc.HTTPResponseFromError(err)
require.True(t, ok, "returned error is not an httpgrpc response")
assert.Equal(t, http.StatusBadRequest, int(httpResp.Code))
assert.Equal(t, wrapWithUser(makeMetricLimitError(perMetricSeriesLimit, labels3, ing.limiter.FormatError(userID, errMaxSeriesPerMetricLimitExceeded)), userID).Error(), string(httpResp.Body))
assert.Equal(t, wrapWithUser(makeMetricLimitError(perMetricSeriesLimit, labels3, ing.limiter.FormatError(userID, errMaxSeriesPerMetricLimitExceeded, labels1)), userID).Error(), string(httpResp.Body))

// Append two metadata for the same metric. Drop the second one, and expect no error since metadata is a best effort approach.
_, err = ing.Push(ctx, cortexpb.ToWriteRequest(nil, nil, []*cortexpb.MetricMetadata{metadata1, metadata2}, nil, cortexpb.API))
Expand Down
18 changes: 9 additions & 9 deletions pkg/ingester/limiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,16 +130,16 @@ func (l *Limiter) AssertMaxSeriesPerLabelSet(userID string, metric labels.Labels

// FormatError returns the input error enriched with the actual limits for the given user.
// It acts as pass-through if the input error is unknown.
func (l *Limiter) FormatError(userID string, err error) error {
func (l *Limiter) FormatError(userID string, err error, lbls labels.Labels) error {
switch {
case errors.Is(err, errMaxSeriesPerUserLimitExceeded):
return l.formatMaxSeriesPerUserError(userID)
case errors.Is(err, errMaxSeriesPerMetricLimitExceeded):
return l.formatMaxSeriesPerMetricError(userID)
return l.formatMaxSeriesPerMetricError(userID, lbls.Get(labels.MetricName))
case errors.Is(err, errMaxMetadataPerUserLimitExceeded):
return l.formatMaxMetadataPerUserError(userID)
case errors.Is(err, errMaxMetadataPerMetricLimitExceeded):
return l.formatMaxMetadataPerMetricError(userID)
return l.formatMaxMetadataPerMetricError(userID, lbls.Get(labels.MetricName))
case errors.As(err, &errMaxSeriesPerLabelSetLimitExceeded{}):
e := errMaxSeriesPerLabelSetLimitExceeded{}
errors.As(err, &e)
Expand All @@ -158,13 +158,13 @@ func (l *Limiter) formatMaxSeriesPerUserError(userID string) error {
minNonZero(localLimit, globalLimit), l.AdminLimitMessage, localLimit, globalLimit, actualLimit)
}

func (l *Limiter) formatMaxSeriesPerMetricError(userID string) error {
func (l *Limiter) formatMaxSeriesPerMetricError(userID string, metric string) error {
actualLimit := l.maxSeriesPerMetric(userID)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can just pass the metric name string. No need to pass labels

localLimit := l.limits.MaxLocalSeriesPerMetric(userID)
globalLimit := l.limits.MaxGlobalSeriesPerMetric(userID)

return fmt.Errorf("per-metric series limit of %d exceeded, %s (local limit: %d global limit: %d actual local limit: %d)",
minNonZero(localLimit, globalLimit), l.AdminLimitMessage, localLimit, globalLimit, actualLimit)
return fmt.Errorf("per-metric series limit of %d exceeded for metric %s, %s (local limit: %d global limit: %d actual local limit: %d)",
minNonZero(localLimit, globalLimit), metric, l.AdminLimitMessage, localLimit, globalLimit, actualLimit)
}

func (l *Limiter) formatMaxMetadataPerUserError(userID string) error {
Expand All @@ -176,13 +176,13 @@ func (l *Limiter) formatMaxMetadataPerUserError(userID string) error {
minNonZero(localLimit, globalLimit), l.AdminLimitMessage, localLimit, globalLimit, actualLimit)
}

func (l *Limiter) formatMaxMetadataPerMetricError(userID string) error {
func (l *Limiter) formatMaxMetadataPerMetricError(userID string, metric string) error {
actualLimit := l.maxMetadataPerMetric(userID)
localLimit := l.limits.MaxLocalMetadataPerMetric(userID)
globalLimit := l.limits.MaxGlobalMetadataPerMetric(userID)

return fmt.Errorf("per-metric metadata limit of %d exceeded, %s (local limit: %d global limit: %d actual local limit: %d)",
minNonZero(localLimit, globalLimit), l.AdminLimitMessage, localLimit, globalLimit, actualLimit)
return fmt.Errorf("per-metric metadata limit of %d exceeded for metric %s, %s (local limit: %d global limit: %d actual local limit: %d)",
minNonZero(localLimit, globalLimit), metric, l.AdminLimitMessage, localLimit, globalLimit, actualLimit)
}

func (l *Limiter) formatMaxSeriesPerLabelSetError(err errMaxSeriesPerLabelSetLimitExceeded) error {
Expand Down
15 changes: 8 additions & 7 deletions pkg/ingester/limiter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -588,21 +588,22 @@ func TestLimiter_FormatError(t *testing.T) {
require.NoError(t, err)

limiter := NewLimiter(limits, ring, util.ShardingStrategyDefault, true, 3, false, "please contact administrator to raise it")
lbls := labels.FromStrings(labels.MetricName, "testMetric")

actual := limiter.FormatError("user-1", errMaxSeriesPerUserLimitExceeded)
actual := limiter.FormatError("user-1", errMaxSeriesPerUserLimitExceeded, lbls)
assert.EqualError(t, actual, "per-user series limit of 100 exceeded, please contact administrator to raise it (local limit: 0 global limit: 100 actual local limit: 100)")

actual = limiter.FormatError("user-1", errMaxSeriesPerMetricLimitExceeded)
assert.EqualError(t, actual, "per-metric series limit of 20 exceeded, please contact administrator to raise it (local limit: 0 global limit: 20 actual local limit: 20)")
actual = limiter.FormatError("user-1", errMaxSeriesPerMetricLimitExceeded, lbls)
assert.EqualError(t, actual, "per-metric series limit of 20 exceeded for metric testMetric, please contact administrator to raise it (local limit: 0 global limit: 20 actual local limit: 20)")

actual = limiter.FormatError("user-1", errMaxMetadataPerUserLimitExceeded)
actual = limiter.FormatError("user-1", errMaxMetadataPerUserLimitExceeded, lbls)
assert.EqualError(t, actual, "per-user metric metadata limit of 10 exceeded, please contact administrator to raise it (local limit: 0 global limit: 10 actual local limit: 10)")

actual = limiter.FormatError("user-1", errMaxMetadataPerMetricLimitExceeded)
assert.EqualError(t, actual, "per-metric metadata limit of 3 exceeded, please contact administrator to raise it (local limit: 0 global limit: 3 actual local limit: 3)")
actual = limiter.FormatError("user-1", errMaxMetadataPerMetricLimitExceeded, lbls)
assert.EqualError(t, actual, "per-metric metadata limit of 3 exceeded for metric testMetric, please contact administrator to raise it (local limit: 0 global limit: 3 actual local limit: 3)")

input := errors.New("unknown error")
actual = limiter.FormatError("user-1", input)
actual = limiter.FormatError("user-1", input, lbls)
assert.Equal(t, input, actual)
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/ingester/user_metrics_metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@ func (mm *userMetricsMetadata) add(metric string, metadata *cortexpb.MetricMetad
// Verify that the user can create more metric metadata given we don't have a set for that metric name.
if err := mm.limiter.AssertMaxMetricsWithMetadataPerUser(mm.userID, len(mm.metricToMetadata)); err != nil {
mm.validateMetrics.DiscardedMetadata.WithLabelValues(mm.userID, perUserMetadataLimit).Inc()
return makeLimitError(perUserMetadataLimit, mm.limiter.FormatError(mm.userID, err))
return makeLimitError(perUserMetadataLimit, mm.limiter.FormatError(mm.userID, err, labels.FromStrings(labels.MetricName, metric)))
}
set = metricMetadataSet{}
mm.metricToMetadata[metric] = set
}

if err := mm.limiter.AssertMaxMetadataPerMetric(mm.userID, len(set)); err != nil {
mm.validateMetrics.DiscardedMetadata.WithLabelValues(mm.userID, perMetricMetadataLimit).Inc()
return makeMetricLimitError(perMetricMetadataLimit, labels.FromStrings(labels.MetricName, metric), mm.limiter.FormatError(mm.userID, err))
return makeMetricLimitError(perMetricMetadataLimit, labels.FromStrings(labels.MetricName, metric), mm.limiter.FormatError(mm.userID, err, labels.FromStrings(labels.MetricName, metric)))
}

// if we have seen this metadata before, it is a no-op and we don't need to change our metrics.
Expand Down
Loading