grafana · codebien · May 30, 2023 · Apr 14, 2023 · May 22, 2023 · May 21, 2023
@@ -0,0 +1,278 @@
+package expv2
+
+import (
+	"math"
+	"math/bits"
+	"time"
+
+	"go.k6.io/k6/metrics"
+	"go.k6.io/k6/output/cloud/expv2/pbcloud"
+	"google.golang.org/protobuf/types/known/timestamppb"
+)
+
+const (
+	// lowestTrackable represents the minimum value that the histogram tracks.
+	// Essentially, it excludes negative numbers.
+	// Most of metrics tracked by histograms are durations
+	// where we don't expect negative numbers.
+	//
+	// In the future, we may expand and include them,
+	// probably after https://github.com/grafana/k6/issues/763.
+	lowestTrackable = 0
+
+	// highestTrackable represents the maximum
+	// value that the histogram is able to track with high accuracy (0.1% of error).
+	// It should be a high enough
+	// and rationale value for the k6 context; 2^30 = 1_073_741_824
+	highestTrackable = 1 << 30
+)
+
+// histogram represents a distribution
+// of metrics samples' values as histogram.
+//
+// The histogram is the representation of base-2 exponential Histogram with two layers.
+// The first layer has primary buckets in the form of a power of two, and a second layer of buckets
+// for each primary bucket with an equally distributed amount of buckets inside.
+//
+// The histogram has a series of (N * 2^m) buckets, where:
+// N = a power of 2 that defines the number of primary buckets
+// m = a power of 2 that defines the number of the secondary buckets
+// The current version is: f(N = 25, m = 7) = 3200.
+type histogram struct {
+	// Buckets stores the counters for each bin of the histogram.
+	// It does not include the first and the last absolute bucket,
+	// because they contain exception cases
+	// and they requires to be tracked in a dedicated way.
+	//
+	// It is expected to start and end with a non-zero bucket,
+	// in this way we can avoid extra allocation for not significant buckets.
+	// All the zero buckets in between are preserved.
+	Buckets []uint32
+
+	// ExtraLowBucket counts occurrences of observed values smaller
+	// than the minimum trackable value.
+	ExtraLowBucket uint32
+
+	// ExtraLowBucket counts occurrences of observed values bigger
+	// than the maximum trackable value.
+	ExtraHighBucket uint32
+
+	// FirstNotZeroBucket represents the index of the first bucket
+	// with a significant counter in the Buckets slice (a not zero value).
+	// In this way, all the buckets before can be omitted.
+	FirstNotZeroBucket uint32
+
+	// LastNotZeroBucket represents the index of the last bucket
+	// with a significant counter in the Buckets slice (a not zero value).
+	// In this way, all the buckets after can be omitted.
+	LastNotZeroBucket uint32
+
+	// Max is the absolute maximum observed value.
+	Max float64
+
+	// Min is the absolute minimum observed value.
+	Min float64
+
+	// Sum is the sum of all observed values.
+	Sum float64
+
+	// Count is counts the amount of observed values.
+	Count uint32
+}
+
+// newHistogram creates an histogram of the provided values.
+func newHistogram() histogram {
+	return histogram{}
+}
+
+// addToBucket increments the counter of the bucket of the provided value.
+// If the value is lower or higher than the trackable limits
+// then it is counted into specific buckets. All the stats are also updated accordingly.
+func (h *histogram) addToBucket(v float64) {
+	if h.Count == 0 {
+		h.Max, h.Min = v, v
+	} else {
+		if v > h.Max {
+			h.Max = v
+		}
+		if v < h.Min {
+			h.Min = v
+		}
+	}
+
+	h.Count++
+	h.Sum += v
+
+	if v > highestTrackable {
+		h.ExtraHighBucket++
+		return
+	}
+	if v < lowestTrackable {
+		h.ExtraLowBucket++
+		return
+	}
+
+	index := resolveBucketIndex(v)
+
+	// they grow the current Buckets slice if there isn't enough capacity.
+	//
+	// An example with growRight:
+	// With Buckets [4, 1] and index equals to 5
+	// then we expect a slice like [4,1,0,0,0,0]
+	// then the counter at 5th position will be incremented
+	// generating the final slice [4,1,0,0,0,1]
+	switch {
+	case len(h.Buckets) == 0:
+		h.init(index)
+	case index < h.FirstNotZeroBucket:
+		h.prependBuckets(index)
+	case index > h.LastNotZeroBucket:
+		h.appendBuckets(index)
+	default:
+		h.Buckets[index-h.FirstNotZeroBucket]++
+	}
+}
+
+func (h *histogram) init(index uint32) {
+	h.FirstNotZeroBucket = index
+	h.LastNotZeroBucket = index
+	h.Buckets = make([]uint32, 1, 32)
+	h.Buckets[0] = 1
+}
+
+// prependBuckets expands the buckets slice with zeros up to the required index,
+// then it increments the required bucket.
+func (h *histogram) prependBuckets(index uint32) {
+	if h.FirstNotZeroBucket <= index {
+		panic("buckets is already contains the requested index")
+	}
+
+	newLen := (h.FirstNotZeroBucket - index) + uint32(len(h.Buckets))
+
+	// TODO: we may consider to swap by sub-groups
+	// e.g  [4, 1] => [4, 1, 0, 0] => [0, 0, 4, 1]
+	// It requires a benchmark if it is better than just copy it.
+
+	newBuckets := make([]uint32, newLen)
+	copy(newBuckets[h.FirstNotZeroBucket-index:], h.Buckets)
+	h.Buckets = newBuckets
+
+	// Update the stats
+	h.Buckets[0] = 1
+	h.FirstNotZeroBucket = index
+}
+
+// appendBuckets expands the buckets slice with zeros buckets till the required index,
+// then it increments the required bucket.
+// If the slice has enough capacity then it reuses it without allocate.
+func (h *histogram) appendBuckets(index uint32) {
+	if h.LastNotZeroBucket >= index {
+		panic("buckets is already bigger than requested index")
+	}
+
+	newLen := index - h.FirstNotZeroBucket + 1
+
+	if uint32(cap(h.Buckets)) > newLen {
+		// See https://go.dev/ref/spec#Slice_expressions
+		// "For slices, the upper index bound is
+		// the slice capacity cap(a) rather than the length"
+		h.Buckets = h.Buckets[:newLen]
+	} else {
+		newBuckets := make([]uint32, newLen)
+		copy(newBuckets, h.Buckets)
+		h.Buckets = newBuckets
+	}
+
+	// Update the stats
+	h.Buckets[len(h.Buckets)-1] = 1
+	h.LastNotZeroBucket = index
+}
+
+// histogramAsProto converts the histogram into the equivalent Protobuf version.
+func histogramAsProto(h *histogram, time time.Time) *pbcloud.TrendHdrValue {
+	hval := &pbcloud.TrendHdrValue{
+		Time:              timestamppb.New(time),
+		MinResolution:     1.0,
+		SignificantDigits: 2,
+		LowerCounterIndex: h.FirstNotZeroBucket,
+		MinValue:          h.Min,
+		MaxValue:          h.Max,
+		Sum:               h.Sum,
+		Count:             h.Count,
+		Counters:          h.Buckets,
+	}
+	if h.ExtraLowBucket > 0 {
+		hval.ExtraLowValuesCounter = &h.ExtraLowBucket
+	}
+	if h.ExtraHighBucket > 0 {
+		hval.ExtraHighValuesCounter = &h.ExtraHighBucket
+	}
+	return hval
+}
+
+// resolveBucketIndex returns the index
+// of the bucket in the histogram for the provided value.
+func resolveBucketIndex(val float64) uint32 {
+	if val < lowestTrackable {
+		return 0
+	}
+
+	// We upscale to the next integer to ensure that each sample falls
+	// within a specific bucket, even when the value is fractional.
+	// This avoids under-representing the distribution in the histogram.
+	upscaled := uint32(math.Ceil(val))
+
+	// In histograms, bucket boundaries are usually defined as multiples of powers of 2,
+	// allowing for efficient computation of bucket indexes.
+	//
+	// We define k=7 in our case, because it allows for sufficient granularity in the
+	// distribution (2^7=128 primary buckets of which each can be further
+	// subdivided if needed).
+	//
+	// k is the constant balancing factor between granularity and
+	// computational efficiency.
+	//
+	// In our case:
+	// i.e 2^7  = 128  ~  100 = 10^2
+	//     2^10 = 1024 ~ 1000 = 10^3
+	// f(x) = 3*x + 1 - empiric formula that works for us
+	// since f(2)=7 and f(3)=10
+	const k = uint32(7)
+
+	// 256 = 1 << (k+1)
+	if upscaled < 256 {
+		return upscaled
+	}
+
+	// `nkdiff` helps us find the right bucket for `upscaled`. It does so by determining the
+	// index for the "major" bucket (a set of values within a power of two range) and then
+	// the "sub" bucket within that major bucket. This system provides us with a fine level
+	// of granularity within a computationally efficient bucketing system. The result is a
+	// histogram that provides a detailed representation of the distribution of values.
+	//
+	// Here we use some math to get simple formula
+	// derivation:
+	// let u = upscaled
+	// let n = msb(u) - most significant digit position
+	// i.e. n = floor(log(u, 2))
+	//   major_bucket_index = n - k + 1
+	//   sub_bucket_index = u>>(n - k) - (1<<k)
+	//   bucket = major_bucket_index << k + sub_bucket_index =
+	//          = (n-k+1)<<k + u>>(n-k) - (1<<k) =
+	//          = (n-k)<<k + u>>(n-k)
+	//
+	nkdiff := uint32(bits.Len32(upscaled>>k) - 1) // msb index
+	return (nkdiff << k) + (upscaled >> nkdiff)
+}
+
+func (h *histogram) IsEmpty() bool {
+	return h.Count == 0
+}
+
+func (h *histogram) Add(s metrics.Sample) {
+	h.addToBucket(s.Value)
+}
+
+func (h *histogram) Format(time.Duration) map[string]float64 {
+	panic("output/cloud/expv2/histogram.Format is not expected to be called")
+}