Skip to content

Commit 30ee547

Browse files
authored
Add NH count validation (#7072)
* Add NH count validation Signed-off-by: SungJin1212 <tjdwls1201@gmail.com> * fix lint Signed-off-by: SungJin1212 <tjdwls1201@gmail.com> * move changelog Signed-off-by: SungJin1212 <tjdwls1201@gmail.com> * Use prometheus NH validate Signed-off-by: SungJin1212 <tjdwls1201@gmail.com> * fix lint Signed-off-by: SungJin1212 <tjdwls1201@gmail.com> --------- Signed-off-by: SungJin1212 <tjdwls1201@gmail.com>
1 parent a68b702 commit 30ee547

File tree

4 files changed

+180
-10
lines changed

4 files changed

+180
-10
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
* [ENHANCEMENT] Ingester: Add `enable_matcher_optimization` config to apply low selectivity matchers lazily. #7063
66
* [ENHANCEMENT] Distributor: Add a label references validation for remote write v2 request. #7074
7+
* [ENHANCEMENT] Distributor: Add count, spans, and buckets validations for native histogram. #7072
78
* [BUGFIX] Compactor: Avoid race condition which allow a grouper to not compact all partitions. #7082
89

910
## 1.20.0 in progress

pkg/util/validation/errors.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,22 @@ func (e *nativeHistogramSampleSizeBytesExceededError) Error() string {
282282
return fmt.Sprintf("native histogram sample size bytes exceeded for metric (actual: %d, limit: %d) metric: %.200q", e.nhSampleSizeBytes, e.limit, formatLabelSet(e.series))
283283
}
284284

285+
type nativeHistogramInvalidError struct {
286+
nhValidationErr error
287+
series []cortexpb.LabelAdapter
288+
}
289+
290+
func newNativeHistogramInvalidError(series []cortexpb.LabelAdapter, nhValidationErr error) ValidationError {
291+
return &nativeHistogramInvalidError{
292+
series: series,
293+
nhValidationErr: nhValidationErr,
294+
}
295+
}
296+
297+
func (e *nativeHistogramInvalidError) Error() string {
298+
return fmt.Sprintf("invalid native histogram, validation err: %v, metric: %.200q", e.nhValidationErr, formatLabelSet(e.series))
299+
}
300+
285301
// formatLabelSet formats label adapters as a metric name with labels, while preserving
286302
// label order, and keeping duplicates. If there are multiple "__name__" labels, only
287303
// first one is used as metric name, other ones will be included as regular labels.

pkg/util/validation/validate.go

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ const (
6060
nativeHistogramBucketCountLimitExceeded = "native_histogram_buckets_exceeded"
6161
nativeHistogramInvalidSchema = "native_histogram_invalid_schema"
6262
nativeHistogramSampleSizeBytesExceeded = "native_histogram_sample_size_bytes_exceeded"
63+
nativeHistogramInvalid = "native_histogram_invalid"
6364

6465
// RateLimited is one of the values for the reason to discard samples.
6566
// Declared here to avoid duplication in ingester and distributor.
@@ -368,7 +369,6 @@ func ValidateMetadata(validateMetrics *ValidateMetrics, cfg *Limits, userID stri
368369
}
369370

370371
func ValidateNativeHistogram(validateMetrics *ValidateMetrics, limits *Limits, userID string, ls []cortexpb.LabelAdapter, histogramSample cortexpb.Histogram) (cortexpb.Histogram, error) {
371-
372372
// sample size validation for native histogram
373373
if limits.MaxNativeHistogramSampleSizeBytes > 0 && histogramSample.Size() > limits.MaxNativeHistogramSampleSizeBytes {
374374
validateMetrics.DiscardedSamples.WithLabelValues(nativeHistogramSampleSizeBytesExceeded, userID).Inc()
@@ -381,25 +381,33 @@ func ValidateNativeHistogram(validateMetrics *ValidateMetrics, limits *Limits, u
381381
return cortexpb.Histogram{}, newNativeHistogramSchemaInvalidError(ls, int(histogramSample.Schema))
382382
}
383383

384-
if limits.MaxNativeHistogramBuckets == 0 {
385-
return histogramSample, nil
386-
}
387-
388384
var (
389385
exceedLimit bool
390386
)
387+
391388
if histogramSample.IsFloatHistogram() {
392-
// Initial check to see if the bucket limit is exceeded or not. If not, we can avoid type casting.
389+
fh := cortexpb.FloatHistogramProtoToFloatHistogram(histogramSample)
390+
if err := fh.Validate(); err != nil {
391+
validateMetrics.DiscardedSamples.WithLabelValues(nativeHistogramInvalid, userID).Inc()
392+
return cortexpb.Histogram{}, newNativeHistogramInvalidError(ls, err)
393+
}
394+
395+
// limit check
396+
if limits.MaxNativeHistogramBuckets == 0 {
397+
return histogramSample, nil
398+
}
399+
393400
exceedLimit = len(histogramSample.PositiveCounts)+len(histogramSample.NegativeCounts) > limits.MaxNativeHistogramBuckets
394401
if !exceedLimit {
395402
return histogramSample, nil
396403
}
404+
397405
// Exceed limit.
398406
if histogramSample.Schema <= histogram.ExponentialSchemaMin {
399407
validateMetrics.DiscardedSamples.WithLabelValues(nativeHistogramBucketCountLimitExceeded, userID).Inc()
400408
return cortexpb.Histogram{}, newHistogramBucketLimitExceededError(ls, limits.MaxNativeHistogramBuckets)
401409
}
402-
fh := cortexpb.FloatHistogramProtoToFloatHistogram(histogramSample)
410+
403411
oBuckets := len(fh.PositiveBuckets) + len(fh.NegativeBuckets)
404412
for len(fh.PositiveBuckets)+len(fh.NegativeBuckets) > limits.MaxNativeHistogramBuckets {
405413
if fh.Schema <= histogram.ExponentialSchemaMin {
@@ -415,7 +423,17 @@ func ValidateNativeHistogram(validateMetrics *ValidateMetrics, limits *Limits, u
415423
return cortexpb.FloatHistogramToHistogramProto(histogramSample.TimestampMs, fh), nil
416424
}
417425

418-
// Initial check to see if bucket limit is exceeded or not. If not, we can avoid type casting.
426+
h := cortexpb.HistogramProtoToHistogram(histogramSample)
427+
if err := h.Validate(); err != nil {
428+
validateMetrics.DiscardedSamples.WithLabelValues(nativeHistogramInvalid, userID).Inc()
429+
return cortexpb.Histogram{}, newNativeHistogramInvalidError(ls, err)
430+
}
431+
432+
// limit check
433+
if limits.MaxNativeHistogramBuckets == 0 {
434+
return histogramSample, nil
435+
}
436+
419437
exceedLimit = len(histogramSample.PositiveDeltas)+len(histogramSample.NegativeDeltas) > limits.MaxNativeHistogramBuckets
420438
if !exceedLimit {
421439
return histogramSample, nil
@@ -425,7 +443,6 @@ func ValidateNativeHistogram(validateMetrics *ValidateMetrics, limits *Limits, u
425443
validateMetrics.DiscardedSamples.WithLabelValues(nativeHistogramBucketCountLimitExceeded, userID).Inc()
426444
return cortexpb.Histogram{}, newHistogramBucketLimitExceededError(ls, limits.MaxNativeHistogramBuckets)
427445
}
428-
h := cortexpb.HistogramProtoToHistogram(histogramSample)
429446
oBuckets := len(h.PositiveBuckets) + len(h.NegativeBuckets)
430447
for len(h.PositiveBuckets)+len(h.NegativeBuckets) > limits.MaxNativeHistogramBuckets {
431448
if h.Schema <= histogram.ExponentialSchemaMin {
@@ -437,6 +454,7 @@ func ValidateNativeHistogram(validateMetrics *ValidateMetrics, limits *Limits, u
437454
if oBuckets != len(h.PositiveBuckets)+len(h.NegativeBuckets) {
438455
validateMetrics.HistogramSamplesReducedResolution.WithLabelValues(userID).Inc()
439456
}
457+
440458
// If resolution reduced, convert new histogram to protobuf type again.
441459
return cortexpb.HistogramToHistogramProto(histogramSample.TimestampMs, h), nil
442460
}

pkg/util/validation/validate_test.go

Lines changed: 136 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package validation
22

33
import (
4+
"errors"
5+
"math"
46
"net/http"
57
"strings"
68
"testing"
@@ -412,6 +414,55 @@ func TestValidateNativeHistogram(t *testing.T) {
412414
exceedMaxRangeSchemaFloatHistogram.Schema = 20
413415
exceedMaxSampleSizeBytesLimitFloatHistogram := tsdbutil.GenerateTestFloatHistogram(100)
414416

417+
bucketNumMisMatchInPSpanFH := tsdbutil.GenerateTestFloatHistogram(0)
418+
bucketNumMisMatchInPSpanFH.PositiveSpans[0].Length = 3
419+
420+
negativeSpanOffsetInPSpansFH := tsdbutil.GenerateTestFloatHistogram(0)
421+
negativeSpanOffsetInPSpansFH.PositiveSpans[1].Offset = -1
422+
423+
bucketNumMisMatchInNSpanFH := tsdbutil.GenerateTestFloatHistogram(0)
424+
bucketNumMisMatchInNSpanFH.NegativeSpans[0].Length = 3
425+
426+
negativeSpanOffsetInNSpansFH := tsdbutil.GenerateTestFloatHistogram(0)
427+
negativeSpanOffsetInNSpansFH.NegativeSpans[1].Offset = -1
428+
429+
negativeBucketCountInNBucketsFH := tsdbutil.GenerateTestFloatHistogram(0)
430+
negativeBucketCountInNBucketsFH.NegativeBuckets = []float64{-1.1, -1.2, -1.3, -1.4}
431+
432+
negativeBucketCountInPBucketsFH := tsdbutil.GenerateTestFloatHistogram(0)
433+
negativeBucketCountInPBucketsFH.PositiveBuckets = []float64{-1.1, -1.2, -1.3, -1.4}
434+
435+
negativeCountFloatHistogram := tsdbutil.GenerateTestFloatHistogram(0)
436+
negativeCountFloatHistogram.Count = -1.2345
437+
438+
negativeZeroCountFloatHistogram := tsdbutil.GenerateTestFloatHistogram(0)
439+
negativeZeroCountFloatHistogram.ZeroCount = -1.2345
440+
441+
bucketNumMisMatchInPSpanH := tsdbutil.GenerateTestHistogram(0)
442+
bucketNumMisMatchInPSpanH.PositiveSpans[0].Length = 3
443+
444+
negativeSpanOffsetInPSpansH := tsdbutil.GenerateTestHistogram(0)
445+
negativeSpanOffsetInPSpansH.PositiveSpans[1].Offset = -1
446+
447+
bucketNumMisMatchInNSpanH := tsdbutil.GenerateTestHistogram(0)
448+
bucketNumMisMatchInNSpanH.NegativeSpans[0].Length = 3
449+
450+
negativeSpanOffsetInNSpansH := tsdbutil.GenerateTestHistogram(0)
451+
negativeSpanOffsetInNSpansH.NegativeSpans[1].Offset = -1
452+
453+
negativeBucketCountInNBucketsH := tsdbutil.GenerateTestHistogram(0)
454+
negativeBucketCountInNBucketsH.NegativeBuckets = []int64{-1, -2, -3, -4}
455+
456+
negativeBucketCountInPBucketsH := tsdbutil.GenerateTestHistogram(0)
457+
negativeBucketCountInPBucketsH.PositiveBuckets = []int64{-1, -2, -3, -4}
458+
459+
countMisMatchSumIsNaN := tsdbutil.GenerateTestHistogram(0)
460+
countMisMatchSumIsNaN.Sum = math.NaN()
461+
countMisMatchSumIsNaN.Count = 11
462+
463+
countMisMatch := tsdbutil.GenerateTestHistogram(0)
464+
countMisMatch.Count = 11
465+
415466
for _, tc := range []struct {
416467
name string
417468
bucketLimit int
@@ -525,6 +576,90 @@ func TestValidateNativeHistogram(t *testing.T) {
525576
discardedSampleLabelValue: nativeHistogramSampleSizeBytesExceeded,
526577
maxNativeHistogramSampleSizeBytesLimit: 100,
527578
},
579+
{
580+
name: "bucket number mismatch in positive spans for float native histogram",
581+
histogram: cortexpb.FloatHistogramToHistogramProto(0, bucketNumMisMatchInPSpanFH.Copy()),
582+
expectedErr: newNativeHistogramInvalidError(lbls, errors.New("positive side: spans need 5 buckets, have 4 buckets: histogram spans specify different number of buckets than provided")),
583+
discardedSampleLabelValue: nativeHistogramInvalid,
584+
},
585+
{
586+
name: "negative span offset found in positive spans for float native histogram",
587+
histogram: cortexpb.FloatHistogramToHistogramProto(0, negativeSpanOffsetInPSpansFH.Copy()),
588+
expectedErr: newNativeHistogramInvalidError(lbls, errors.New("positive side: span number 2 with offset -1: histogram has a span whose offset is negative")),
589+
discardedSampleLabelValue: nativeHistogramInvalid,
590+
},
591+
{
592+
name: "bucket number mismatch in negative spans for float native histogram",
593+
histogram: cortexpb.FloatHistogramToHistogramProto(0, bucketNumMisMatchInNSpanFH.Copy()),
594+
expectedErr: newNativeHistogramInvalidError(lbls, errors.New("negative side: spans need 5 buckets, have 4 buckets: histogram spans specify different number of buckets than provided")),
595+
discardedSampleLabelValue: nativeHistogramInvalid,
596+
},
597+
{
598+
name: "negative spans offset found in negative spans for float native histogram",
599+
histogram: cortexpb.FloatHistogramToHistogramProto(0, negativeSpanOffsetInNSpansFH.Copy()),
600+
expectedErr: newNativeHistogramInvalidError(lbls, errors.New("negative side: span number 2 with offset -1: histogram has a span whose offset is negative")),
601+
discardedSampleLabelValue: nativeHistogramInvalid,
602+
},
603+
{
604+
name: "negative observations count in negative buckets for float native histogram",
605+
histogram: cortexpb.FloatHistogramToHistogramProto(0, negativeBucketCountInNBucketsFH.Copy()),
606+
expectedErr: newNativeHistogramInvalidError(lbls, errors.New("negative side: bucket number 1 has observation count of -1.1: histogram has a bucket whose observation count is negative")),
607+
discardedSampleLabelValue: nativeHistogramInvalid,
608+
},
609+
{
610+
name: "negative observations count in positive buckets for native histogram",
611+
histogram: cortexpb.HistogramToHistogramProto(0, negativeBucketCountInPBucketsH.Copy()),
612+
expectedErr: newNativeHistogramInvalidError(lbls, errors.New("positive side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative")),
613+
discardedSampleLabelValue: nativeHistogramInvalid,
614+
},
615+
{
616+
name: "bucket number mismatch in positive spans for native histogram",
617+
histogram: cortexpb.HistogramToHistogramProto(0, bucketNumMisMatchInPSpanH.Copy()),
618+
expectedErr: newNativeHistogramInvalidError(lbls, errors.New("positive side: spans need 5 buckets, have 4 buckets: histogram spans specify different number of buckets than provided")),
619+
discardedSampleLabelValue: nativeHistogramInvalid,
620+
},
621+
{
622+
name: "negative span offset found in positive spans for native histogram",
623+
histogram: cortexpb.HistogramToHistogramProto(0, negativeSpanOffsetInPSpansH.Copy()),
624+
expectedErr: newNativeHistogramInvalidError(lbls, errors.New("positive side: span number 2 with offset -1: histogram has a span whose offset is negative")),
625+
discardedSampleLabelValue: nativeHistogramInvalid,
626+
},
627+
{
628+
name: "bucket number mismatch in negative spans for native histogram",
629+
histogram: cortexpb.HistogramToHistogramProto(0, bucketNumMisMatchInNSpanH.Copy()),
630+
expectedErr: newNativeHistogramInvalidError(lbls, errors.New("negative side: spans need 5 buckets, have 4 buckets: histogram spans specify different number of buckets than provided")),
631+
discardedSampleLabelValue: nativeHistogramInvalid,
632+
},
633+
{
634+
name: "negative spans offset found in negative spans for native histogram",
635+
histogram: cortexpb.FloatHistogramToHistogramProto(0, negativeSpanOffsetInNSpansFH.Copy()),
636+
expectedErr: newNativeHistogramInvalidError(lbls, errors.New("negative side: span number 2 with offset -1: histogram has a span whose offset is negative")),
637+
discardedSampleLabelValue: nativeHistogramInvalid,
638+
},
639+
{
640+
name: "negative observations count in negative buckets for native histogram",
641+
histogram: cortexpb.HistogramToHistogramProto(0, negativeBucketCountInNBucketsH.Copy()),
642+
expectedErr: newNativeHistogramInvalidError(lbls, errors.New("negative side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative")),
643+
discardedSampleLabelValue: nativeHistogramInvalid,
644+
},
645+
{
646+
name: "negative observations count in positive buckets for native histogram",
647+
histogram: cortexpb.HistogramToHistogramProto(0, negativeBucketCountInPBucketsH.Copy()),
648+
expectedErr: newNativeHistogramInvalidError(lbls, errors.New("positive side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative")),
649+
discardedSampleLabelValue: nativeHistogramInvalid,
650+
},
651+
{
652+
name: "mismatch between observations count with count field when sum is NaN",
653+
histogram: cortexpb.HistogramToHistogramProto(0, countMisMatchSumIsNaN.Copy()),
654+
expectedErr: newNativeHistogramInvalidError(lbls, errors.New("12 observations found in buckets, but the Count field is 11: histogram's observation count should be at least the number of observations found in the buckets")),
655+
discardedSampleLabelValue: nativeHistogramInvalid,
656+
},
657+
{
658+
name: "mismatch between observations count with count field",
659+
histogram: cortexpb.HistogramToHistogramProto(0, countMisMatch.Copy()),
660+
expectedErr: newNativeHistogramInvalidError(lbls, errors.New("12 observations found in buckets, but the Count field is 11: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)")),
661+
discardedSampleLabelValue: nativeHistogramInvalid,
662+
},
528663
} {
529664
t.Run(tc.name, func(t *testing.T) {
530665
reg := prometheus.NewRegistry()
@@ -534,7 +669,7 @@ func TestValidateNativeHistogram(t *testing.T) {
534669
limits.MaxNativeHistogramSampleSizeBytes = tc.maxNativeHistogramSampleSizeBytesLimit
535670
actualHistogram, actualErr := ValidateNativeHistogram(validateMetrics, limits, userID, lbls, tc.histogram)
536671
if tc.expectedErr != nil {
537-
require.Equal(t, tc.expectedErr, actualErr)
672+
require.Equal(t, tc.expectedErr.Error(), actualErr.Error())
538673
require.Equal(t, float64(1), testutil.ToFloat64(validateMetrics.DiscardedSamples.WithLabelValues(tc.discardedSampleLabelValue, userID)))
539674
// Should never increment if error was returned
540675
require.Equal(t, float64(0), testutil.ToFloat64(validateMetrics.HistogramSamplesReducedResolution.WithLabelValues(userID)))

0 commit comments

Comments
 (0)