From 2feb70477950332b692c152d68561874dc9f9868 Mon Sep 17 00:00:00 2001 From: Kemal Akkoyun Date: Thu, 14 Nov 2019 06:31:27 +0100 Subject: [PATCH] *: Use exponential buckets for histogram metrics (#1545) * Use exponential buckets for compactor histogram metrics Signed-off-by: Kemal Akkoyun * Update buckets Signed-off-by: Kemal Akkoyun * Adjust histogram buckets Signed-off-by: Kemal Akkoyun * Adjust store gate bucket Signed-off-by: Kemal Akkoyun * Adjust http duration buckets Signed-off-by: Kemal Akkoyun Signed-off-by: Aleksey Sin --- cmd/thanos/query.go | 4 +--- pkg/compact/compact.go | 16 ++++++---------- pkg/extprom/http/instrument_server.go | 5 +++-- pkg/objstore/objstore.go | 2 +- pkg/server/grpc/grpc.go | 4 +--- pkg/store/bucket.go | 16 ++++++---------- pkg/store/gate.go | 8 +++----- 7 files changed, 21 insertions(+), 34 deletions(-) diff --git a/cmd/thanos/query.go b/cmd/thanos/query.go index de095475191..60f216cc6b9 100644 --- a/cmd/thanos/query.go +++ b/cmd/thanos/query.go @@ -168,9 +168,7 @@ func registerQuery(m map[string]setupFunc, app *kingpin.Application) { func storeClientGRPCOpts(logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, secure bool, cert, key, caCert, serverName string) ([]grpc.DialOption, error) { grpcMets := grpc_prometheus.NewClientMetrics() grpcMets.EnableClientHandlingTimeHistogram( - grpc_prometheus.WithHistogramBuckets([]float64{ - 0.001, 0.01, 0.05, 0.1, 0.2, 0.4, 0.8, 1.6, 3.2, 6.4, - }), + grpc_prometheus.WithHistogramBuckets(prometheus.ExponentialBuckets(0.001, 2, 15)), ) dialOpts := []grpc.DialOption{ // We want to make sure that we can receive huge gRPC messages from storeAPI. diff --git a/pkg/compact/compact.go b/pkg/compact/compact.go index 01e6f78f892..fbeb9f3b0e8 100644 --- a/pkg/compact/compact.go +++ b/pkg/compact/compact.go @@ -81,11 +81,9 @@ func newSyncerMetrics(reg prometheus.Registerer) *syncerMetrics { Help: "Total number of failed sync meta operations.", }) m.syncMetaDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ - Name: "thanos_compact_sync_meta_duration_seconds", - Help: "Time it took to sync meta files.", - Buckets: []float64{ - 0.25, 0.6, 1, 2, 3.5, 5, 7.5, 10, 15, 30, 60, 100, 200, 500, - }, + Name: "thanos_compact_sync_meta_duration_seconds", + Help: "Time it took to sync meta files.", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), }) m.garbageCollectedBlocks = prometheus.NewCounter(prometheus.CounterOpts{ @@ -101,11 +99,9 @@ func newSyncerMetrics(reg prometheus.Registerer) *syncerMetrics { Help: "Total number of failed garbage collection operations.", }) m.garbageCollectionDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ - Name: "thanos_compact_garbage_collection_duration_seconds", - Help: "Time it took to perform garbage collection iteration.", - Buckets: []float64{ - 0.25, 0.6, 1, 2, 3.5, 5, 7.5, 10, 15, 30, 60, 100, 200, 500, - }, + Name: "thanos_compact_garbage_collection_duration_seconds", + Help: "Time it took to perform garbage collection iteration.", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), }) m.compactions = prometheus.NewCounterVec(prometheus.CounterOpts{ diff --git a/pkg/extprom/http/instrument_server.go b/pkg/extprom/http/instrument_server.go index 5a49383b0a1..3683592f07b 100644 --- a/pkg/extprom/http/instrument_server.go +++ b/pkg/extprom/http/instrument_server.go @@ -39,8 +39,9 @@ func NewInstrumentationMiddleware(reg prometheus.Registerer) InstrumentationMidd ins := defaultInstrumentationMiddleware{ requestDuration: prometheus.NewHistogramVec( prometheus.HistogramOpts{ - Name: "http_request_duration_seconds", - Help: "Tracks the latencies for HTTP requests.", + Name: "http_request_duration_seconds", + Help: "Tracks the latencies for HTTP requests.", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 17), }, []string{"code", "handler", "method"}, ), diff --git a/pkg/objstore/objstore.go b/pkg/objstore/objstore.go index f231d6ef1c7..4f8ae11f408 100644 --- a/pkg/objstore/objstore.go +++ b/pkg/objstore/objstore.go @@ -204,7 +204,7 @@ func BucketWithMetrics(name string, b Bucket, r prometheus.Registerer) Bucket { Name: "thanos_objstore_bucket_operation_duration_seconds", Help: "Duration of operations against the bucket", ConstLabels: prometheus.Labels{"bucket": name}, - Buckets: []float64{0.005, 0.01, 0.02, 0.04, 0.08, 0.15, 0.3, 0.6, 1, 1.5, 2.5, 5, 10, 20, 30}, + Buckets: prometheus.ExponentialBuckets(0.001, 2, 17), }, []string{"operation"}), lastSuccessfullUploadTime: prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "thanos_objstore_bucket_last_successful_upload_time", diff --git a/pkg/server/grpc/grpc.go b/pkg/server/grpc/grpc.go index 926b6068aaa..725479d797a 100644 --- a/pkg/server/grpc/grpc.go +++ b/pkg/server/grpc/grpc.go @@ -43,9 +43,7 @@ func New(logger log.Logger, reg prometheus.Registerer, tracer opentracing.Tracer met := grpc_prometheus.NewServerMetrics() met.EnableHandlingTimeHistogram( - grpc_prometheus.WithHistogramBuckets([]float64{ - 0.001, 0.01, 0.05, 0.1, 0.2, 0.4, 0.8, 1.6, 3.2, 6.4, - }), + grpc_prometheus.WithHistogramBuckets(prometheus.ExponentialBuckets(0.001, 2, 15)), ) panicsTotal := prometheus.NewCounter(prometheus.CounterOpts{ Name: "thanos_grpc_req_panics_recovered_total", diff --git a/pkg/store/bucket.go b/pkg/store/bucket.go index 3f2c409b3e9..a087ba7bd21 100644 --- a/pkg/store/bucket.go +++ b/pkg/store/bucket.go @@ -135,18 +135,14 @@ func newBucketStoreMetrics(reg prometheus.Registerer) *bucketStoreMetrics { Help: "Number of blocks in a bucket store that were touched to satisfy a query.", }) m.seriesGetAllDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ - Name: "thanos_bucket_store_series_get_all_duration_seconds", - Help: "Time it takes until all per-block prepares and preloads for a query are finished.", - Buckets: []float64{ - 0.01, 0.05, 0.1, 0.25, 0.6, 1, 2, 3.5, 5, 7.5, 10, 15, 30, 60, - }, + Name: "thanos_bucket_store_series_get_all_duration_seconds", + Help: "Time it takes until all per-block prepares and preloads for a query are finished.", + Buckets: prometheus.ExponentialBuckets(0.01, 2, 15), }) m.seriesMergeDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ - Name: "thanos_bucket_store_series_merge_duration_seconds", - Help: "Time it takes to merge sub-results from all queried blocks into a single result.", - Buckets: []float64{ - 0.01, 0.05, 0.1, 0.25, 0.6, 1, 2, 3.5, 5, 7.5, 10, 15, 30, 60, - }, + Name: "thanos_bucket_store_series_merge_duration_seconds", + Help: "Time it takes to merge sub-results from all queried blocks into a single result.", + Buckets: prometheus.ExponentialBuckets(0.01, 2, 15), }) m.resultSeriesCount = prometheus.NewSummary(prometheus.SummaryOpts{ Name: "thanos_bucket_store_series_result_series", diff --git a/pkg/store/gate.go b/pkg/store/gate.go index cdb9ea3712d..1a8fe691088 100644 --- a/pkg/store/gate.go +++ b/pkg/store/gate.go @@ -24,11 +24,9 @@ func NewGate(maxConcurrent int, reg prometheus.Registerer) *Gate { Help: "Number of queries that are currently in flight.", }), gateTiming: prometheus.NewHistogram(prometheus.HistogramOpts{ - Name: "gate_duration_seconds", - Help: "How many seconds it took for queries to wait at the gate.", - Buckets: []float64{ - 0.01, 0.05, 0.1, 0.25, 0.6, 1, 2, 3.5, 5, 10, - }, + Name: "gate_duration_seconds", + Help: "How many seconds it took for queries to wait at the gate.", + Buckets: prometheus.ExponentialBuckets(0.1, 2, 15), }), }