Skip to content

Commit

Permalink
*: Use exponential buckets for histogram metrics (thanos-io#1545)
Browse files Browse the repository at this point in the history
* Use exponential buckets for compactor histogram metrics

Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com>

* Update buckets

Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com>

* Adjust histogram buckets

Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com>

* Adjust store gate bucket

Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com>

* Adjust http duration buckets

Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com>
Signed-off-by: Aleksey Sin <asin@ozon.ru>
  • Loading branch information
kakkoyun authored and Aleksey Sin committed Nov 26, 2019
1 parent fb110a7 commit 2feb704
Show file tree
Hide file tree
Showing 7 changed files with 21 additions and 34 deletions.
4 changes: 1 addition & 3 deletions cmd/thanos/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,7 @@ func registerQuery(m map[string]setupFunc, app *kingpin.Application) {
func storeClientGRPCOpts(logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, secure bool, cert, key, caCert, serverName string) ([]grpc.DialOption, error) {
grpcMets := grpc_prometheus.NewClientMetrics()
grpcMets.EnableClientHandlingTimeHistogram(
grpc_prometheus.WithHistogramBuckets([]float64{
0.001, 0.01, 0.05, 0.1, 0.2, 0.4, 0.8, 1.6, 3.2, 6.4,
}),
grpc_prometheus.WithHistogramBuckets(prometheus.ExponentialBuckets(0.001, 2, 15)),
)
dialOpts := []grpc.DialOption{
// We want to make sure that we can receive huge gRPC messages from storeAPI.
Expand Down
16 changes: 6 additions & 10 deletions pkg/compact/compact.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,9 @@ func newSyncerMetrics(reg prometheus.Registerer) *syncerMetrics {
Help: "Total number of failed sync meta operations.",
})
m.syncMetaDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "thanos_compact_sync_meta_duration_seconds",
Help: "Time it took to sync meta files.",
Buckets: []float64{
0.25, 0.6, 1, 2, 3.5, 5, 7.5, 10, 15, 30, 60, 100, 200, 500,
},
Name: "thanos_compact_sync_meta_duration_seconds",
Help: "Time it took to sync meta files.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
})

m.garbageCollectedBlocks = prometheus.NewCounter(prometheus.CounterOpts{
Expand All @@ -101,11 +99,9 @@ func newSyncerMetrics(reg prometheus.Registerer) *syncerMetrics {
Help: "Total number of failed garbage collection operations.",
})
m.garbageCollectionDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "thanos_compact_garbage_collection_duration_seconds",
Help: "Time it took to perform garbage collection iteration.",
Buckets: []float64{
0.25, 0.6, 1, 2, 3.5, 5, 7.5, 10, 15, 30, 60, 100, 200, 500,
},
Name: "thanos_compact_garbage_collection_duration_seconds",
Help: "Time it took to perform garbage collection iteration.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
})

m.compactions = prometheus.NewCounterVec(prometheus.CounterOpts{
Expand Down
5 changes: 3 additions & 2 deletions pkg/extprom/http/instrument_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@ func NewInstrumentationMiddleware(reg prometheus.Registerer) InstrumentationMidd
ins := defaultInstrumentationMiddleware{
requestDuration: prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_request_duration_seconds",
Help: "Tracks the latencies for HTTP requests.",
Name: "http_request_duration_seconds",
Help: "Tracks the latencies for HTTP requests.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 17),
},
[]string{"code", "handler", "method"},
),
Expand Down
2 changes: 1 addition & 1 deletion pkg/objstore/objstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ func BucketWithMetrics(name string, b Bucket, r prometheus.Registerer) Bucket {
Name: "thanos_objstore_bucket_operation_duration_seconds",
Help: "Duration of operations against the bucket",
ConstLabels: prometheus.Labels{"bucket": name},
Buckets: []float64{0.005, 0.01, 0.02, 0.04, 0.08, 0.15, 0.3, 0.6, 1, 1.5, 2.5, 5, 10, 20, 30},
Buckets: prometheus.ExponentialBuckets(0.001, 2, 17),
}, []string{"operation"}),
lastSuccessfullUploadTime: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "thanos_objstore_bucket_last_successful_upload_time",
Expand Down
4 changes: 1 addition & 3 deletions pkg/server/grpc/grpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,7 @@ func New(logger log.Logger, reg prometheus.Registerer, tracer opentracing.Tracer

met := grpc_prometheus.NewServerMetrics()
met.EnableHandlingTimeHistogram(
grpc_prometheus.WithHistogramBuckets([]float64{
0.001, 0.01, 0.05, 0.1, 0.2, 0.4, 0.8, 1.6, 3.2, 6.4,
}),
grpc_prometheus.WithHistogramBuckets(prometheus.ExponentialBuckets(0.001, 2, 15)),
)
panicsTotal := prometheus.NewCounter(prometheus.CounterOpts{
Name: "thanos_grpc_req_panics_recovered_total",
Expand Down
16 changes: 6 additions & 10 deletions pkg/store/bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,18 +135,14 @@ func newBucketStoreMetrics(reg prometheus.Registerer) *bucketStoreMetrics {
Help: "Number of blocks in a bucket store that were touched to satisfy a query.",
})
m.seriesGetAllDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "thanos_bucket_store_series_get_all_duration_seconds",
Help: "Time it takes until all per-block prepares and preloads for a query are finished.",
Buckets: []float64{
0.01, 0.05, 0.1, 0.25, 0.6, 1, 2, 3.5, 5, 7.5, 10, 15, 30, 60,
},
Name: "thanos_bucket_store_series_get_all_duration_seconds",
Help: "Time it takes until all per-block prepares and preloads for a query are finished.",
Buckets: prometheus.ExponentialBuckets(0.01, 2, 15),
})
m.seriesMergeDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "thanos_bucket_store_series_merge_duration_seconds",
Help: "Time it takes to merge sub-results from all queried blocks into a single result.",
Buckets: []float64{
0.01, 0.05, 0.1, 0.25, 0.6, 1, 2, 3.5, 5, 7.5, 10, 15, 30, 60,
},
Name: "thanos_bucket_store_series_merge_duration_seconds",
Help: "Time it takes to merge sub-results from all queried blocks into a single result.",
Buckets: prometheus.ExponentialBuckets(0.01, 2, 15),
})
m.resultSeriesCount = prometheus.NewSummary(prometheus.SummaryOpts{
Name: "thanos_bucket_store_series_result_series",
Expand Down
8 changes: 3 additions & 5 deletions pkg/store/gate.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,9 @@ func NewGate(maxConcurrent int, reg prometheus.Registerer) *Gate {
Help: "Number of queries that are currently in flight.",
}),
gateTiming: prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "gate_duration_seconds",
Help: "How many seconds it took for queries to wait at the gate.",
Buckets: []float64{
0.01, 0.05, 0.1, 0.25, 0.6, 1, 2, 3.5, 5, 10,
},
Name: "gate_duration_seconds",
Help: "How many seconds it took for queries to wait at the gate.",
Buckets: prometheus.ExponentialBuckets(0.1, 2, 15),
}),
}

Expand Down

0 comments on commit 2feb704

Please sign in to comment.