Handle arbitrarily infinity buckets

DataDog · mfpierre · Aug 23, 2019 · Jul 25, 2019 · Jul 26, 2019 · Jul 26, 2019
commit df2a290c0c3cfee756de5ad4c5dc1232f59e713b
diff --git a/pkg/aggregator/check_sampler.go b/pkg/aggregator/check_sampler.go
@@ -6,6 +6,7 @@
 package aggregator
 
 import (
+	"math"
 	"time"
 
 	"github.com/DataDog/datadog-agent/pkg/aggregator/ckey"
@@ -64,6 +65,14 @@ func (cs *CheckSampler) newSketchSeries(ck ckey.ContextKey, points []metrics.Ske
 }
 
 func (cs *CheckSampler) addBucket(bucket *metrics.HistogramBucket) {
+	if bucket.Value < 0 {
+		log.Warnf("Negative bucket value %d for metric %s discarding", bucket.Value, bucket.Name)
+		return
+	}
+	if bucket.Value == 0 {
+		// noop
+		return
+	}
 	contextKey := cs.contextResolver.trackContext(bucket, bucket.Timestamp)
 
 	// if we already saw the bucket we only send the delta
@@ -77,6 +86,10 @@ func (cs *CheckSampler) addBucket(bucket *metrics.HistogramBucket) {
 	cs.lastSeenBucket[contextKey] = time.Now()
 
 	// simple linear interpolation, TODO: optimize
+	if math.IsInf(bucket.UpperBound, 1) {
+		// Arbitrarily double the lower bucket value for interpolation over infinity bucket
+		bucket.UpperBound = bucket.LowerBound * 2
+	}
 	bucketRange := bucket.UpperBound - bucket.LowerBound
 	if bucketRange < 0 {
 		log.Warnf(
@@ -85,18 +98,16 @@ func (cs *CheckSampler) addBucket(bucket *metrics.HistogramBucket) {
 		)
 		return
 	}
-	if bucket.Value < 0 {
-		log.Warnf("Negative bucket value %d for metric %s discarding", bucket.Value, bucket.Name)
-		return
-	}
 	linearIncr := bucketRange / float64(bucket.Value)
 	currentVal := bucket.LowerBound
+	log.Tracef(
+		"Interpolating %d buckets over [%f-%f] with %f increment",
+		bucket.Value, bucket.LowerBound, bucket.UpperBound, linearIncr,
+	)
 	for i := 0; i < bucket.Value; i++ {
 		cs.sketchMap.insert(int64(bucket.Timestamp), contextKey, currentVal)
 		currentVal += linearIncr
 	}
-
-	log.Errorf("Adding bucket %v", bucket)
 }
 
 func (cs *CheckSampler) commitSeries(timestamp float64) {

diff --git a/pkg/aggregator/check_sampler_test.go b/pkg/aggregator/check_sampler_test.go
@@ -9,12 +9,14 @@ import (
 	// stdlib
 	"sort"
 	"testing"
+	"time"
 
 	// 3p
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 
 	"github.com/DataDog/datadog-agent/pkg/metrics"
+	"github.com/DataDog/datadog-agent/pkg/quantile"
 )
 
 func TestCheckGaugeSampling(t *testing.T) {
@@ -191,3 +193,72 @@ func TestHistogramIntervalSampling(t *testing.T) {
 
 	assert.True(t, foundCount)
 }
+
+func TestCheckHistogramBucketSampling(t *testing.T) {
+	checkSampler := newCheckSampler()
+	checkSampler.bucketExpiry = 10 * time.Millisecond
+
+	bucket1 := &metrics.HistogramBucket{
+		Name:       "my.histogram",
+		Value:      4.0,
+		LowerBound: 10.0,
+		UpperBound: 20.0,
+		Tags:       []string{"foo", "bar"},
+		Timestamp:  12345.0,
+	}
+	checkSampler.addBucket(bucket1)
+	assert.Equal(t, len(checkSampler.lastBucketValue), 1)
+	assert.Equal(t, len(checkSampler.lastSeenBucket), 1)
+
+	checkSampler.commit(12349.0)
+	_, flushed := checkSampler.flush()
+
+	expSketch := &quantile.Sketch{}
+	// linear interpolated values
+	expSketch.Insert(quantile.Default(), 10.0, 12.5, 15.0, 17.5)
+
+	assert.Equal(t, 1, len(flushed))
+	metrics.AssertSketchSeriesEqual(t, metrics.SketchSeries{
+		Name: "my.histogram",
+		Tags: []string{"foo", "bar"},
+		Points: []metrics.SketchPoint{
+			{Ts: 12345.0, Sketch: expSketch},
+		},
+		ContextKey: generateContextKey(bucket1),
+	}, flushed[0])
+
+	bucket2 := &metrics.HistogramBucket{
+		Name:       "my.histogram",
+		Value:      6.0,
+		LowerBound: 10.0,
+		UpperBound: 20.0,
+		Tags:       []string{"foo", "bar"},
+		Timestamp:  12400.0,
+	}
+	checkSampler.addBucket(bucket2)
+	assert.Equal(t, len(checkSampler.lastBucketValue), 1)
+	assert.Equal(t, len(checkSampler.lastSeenBucket), 1)
+
+	checkSampler.commit(12401.0)
+	_, flushed = checkSampler.flush()
+
+	expSketch = &quantile.Sketch{}
+	// linear interpolated values (only 2 since we stored the delta)
+	expSketch.Insert(quantile.Default(), 10.0, 15.0)
+
+	assert.Equal(t, 1, len(flushed))
+	metrics.AssertSketchSeriesEqual(t, metrics.SketchSeries{
+		Name: "my.histogram",
+		Tags: []string{"foo", "bar"},
+		Points: []metrics.SketchPoint{
+			{Ts: 12400.0, Sketch: expSketch},
+		},
+		ContextKey: generateContextKey(bucket1),
+	}, flushed[0])
+
+	// garbage collection
+	time.Sleep(11 * time.Millisecond)
+	checkSampler.flush()
+	assert.Equal(t, len(checkSampler.lastBucketValue), 0)
+	assert.Equal(t, len(checkSampler.lastSeenBucket), 0)
+}