Skip to content

Add additional bucket store metrics #5397

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@
* [ENHANCEMENT] Improving Performance on the API Gzip Handler. #5347
* [ENHANCEMENT] Dynamodb: Add `puller-sync-time` to allow different pull time for ring. #5357
* [ENHANCEMENT] Emit querier `max_concurrent` as a metric. #5362
* [ENHANCEMENT] Do not resync blocks in running store gateways during rollout deployment and container restart. #5363
* [ENHANCEMENT] Store Gateway: Add new metrics `cortex_bucket_store_sent_chunk_size_bytes`, `cortex_bucket_store_postings_size_bytes` and `cortex_bucket_store_empty_postings_total`. #5397
* [BUGFIX] Ruler: Validate if rule group can be safely converted back to rule group yaml from protobuf message #5265
* [BUGFIX] Querier: Convert gRPC `ResourceExhausted` status code from store gateway to 422 limit error. #5286
* [BUGFIX] Alertmanager: Route web-ui requests to the alertmanager distributor when sharding is enabled. #5293
* [BUGFIX] Storage: Bucket index updater should ignore meta not found for partial blocks. #5343
* [BUGFIX] Ring: Add JOINING state to read operation. #5346
* [BUGFIX] Compactor: Partial block with only visit marker should be deleted even there is no deletion marker. #5342
* [ENHANCEMENT] Do not resync blocks in running store gateways during rollout deployment and container restart. #5363

## 1.15.1 2023-04-26

Expand Down
21 changes: 21 additions & 0 deletions pkg/storegateway/bucket_store_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ type BucketStoreMetrics struct {
seriesRefetches *prometheus.Desc
resultSeriesCount *prometheus.Desc
queriesDropped *prometheus.Desc
chunkSizeBytes *prometheus.Desc
postingsSizeBytes *prometheus.Desc
emptyPostingCount *prometheus.Desc

cachedPostingsCompressions *prometheus.Desc
cachedPostingsCompressionErrors *prometheus.Desc
Expand Down Expand Up @@ -109,6 +112,18 @@ func NewBucketStoreMetrics() *BucketStoreMetrics {
"cortex_bucket_store_queries_dropped_total",
"Number of queries that were dropped due to the max chunks per query limit.",
nil, nil),
chunkSizeBytes: prometheus.NewDesc(
"cortex_bucket_store_sent_chunk_size_bytes",
"Size in bytes of the chunks for the single series, which is adequate to the gRPC message size sent to querier.",
nil, nil),
postingsSizeBytes: prometheus.NewDesc(
"cortex_bucket_store_postings_size_bytes",
"Size in bytes of the postings for a single series call.",
nil, nil),
emptyPostingCount: prometheus.NewDesc(
"cortex_bucket_store_empty_postings_total",
"Total number of empty postings when fetching block series.",
nil, nil),

cachedPostingsCompressions: prometheus.NewDesc(
"cortex_bucket_store_cached_postings_compressions_total",
Expand Down Expand Up @@ -187,6 +202,9 @@ func (m *BucketStoreMetrics) Describe(out chan<- *prometheus.Desc) {
out <- m.seriesRefetches
out <- m.resultSeriesCount
out <- m.queriesDropped
out <- m.chunkSizeBytes
out <- m.postingsSizeBytes
out <- m.emptyPostingCount

out <- m.cachedPostingsCompressions
out <- m.cachedPostingsCompressionErrors
Expand Down Expand Up @@ -225,6 +243,9 @@ func (m *BucketStoreMetrics) Collect(out chan<- prometheus.Metric) {
data.SendSumOfCounters(out, m.seriesRefetches, "thanos_bucket_store_series_refetches_total")
data.SendSumOfHistograms(out, m.resultSeriesCount, "thanos_bucket_store_series_result_series")
data.SendSumOfCounters(out, m.queriesDropped, "thanos_bucket_store_queries_dropped_total")
data.SendSumOfHistograms(out, m.chunkSizeBytes, "thanos_bucket_store_sent_chunk_size_bytes")
data.SendSumOfHistograms(out, m.postingsSizeBytes, "thanos_bucket_store_postings_size_bytes")
data.SendSumOfCounters(out, m.emptyPostingCount, "thanos_bucket_store_empty_postings_total")

data.SendSumOfCountersWithLabels(out, m.cachedPostingsCompressions, "thanos_bucket_store_cached_postings_compressions_total", "op")
data.SendSumOfCountersWithLabels(out, m.cachedPostingsCompressionErrors, "thanos_bucket_store_cached_postings_compression_errors_total", "op")
Expand Down
57 changes: 55 additions & 2 deletions pkg/storegateway/bucket_store_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,22 @@ func TestBucketStoreMetrics(t *testing.T) {
# HELP cortex_bucket_store_queries_dropped_total Number of queries that were dropped due to the max chunks per query limit.
# TYPE cortex_bucket_store_queries_dropped_total counter
cortex_bucket_store_queries_dropped_total 698089

# HELP cortex_bucket_store_sent_chunk_size_bytes Size in bytes of the chunks for the single series, which is adequate to the gRPC message size sent to querier.
# TYPE cortex_bucket_store_sent_chunk_size_bytes histogram
cortex_bucket_store_sent_chunk_size_bytes_bucket{le="32"} 0
cortex_bucket_store_sent_chunk_size_bytes_bucket{le="256"} 0
cortex_bucket_store_sent_chunk_size_bytes_bucket{le="512"} 0
cortex_bucket_store_sent_chunk_size_bytes_bucket{le="1024"} 0
cortex_bucket_store_sent_chunk_size_bytes_bucket{le="32768"} 0
cortex_bucket_store_sent_chunk_size_bytes_bucket{le="262144"} 7
cortex_bucket_store_sent_chunk_size_bytes_bucket{le="524288"} 9
cortex_bucket_store_sent_chunk_size_bytes_bucket{le="1.048576e+06"} 9
cortex_bucket_store_sent_chunk_size_bytes_bucket{le="3.3554432e+07"} 9
cortex_bucket_store_sent_chunk_size_bytes_bucket{le="2.68435456e+08"} 9
cortex_bucket_store_sent_chunk_size_bytes_bucket{le="5.36870912e+08"} 9
cortex_bucket_store_sent_chunk_size_bytes_bucket{le="+Inf"} 9
cortex_bucket_store_sent_chunk_size_bytes_sum 1.57633e+06
cortex_bucket_store_sent_chunk_size_bytes_count 9
# HELP cortex_bucket_store_cached_postings_compressions_total Number of postings compressions and decompressions when storing to index cache.
# TYPE cortex_bucket_store_cached_postings_compressions_total counter
cortex_bucket_store_cached_postings_compressions_total{op="encode"} 1125950
Expand Down Expand Up @@ -400,7 +415,9 @@ func TestBucketStoreMetrics(t *testing.T) {
cortex_bucket_store_cached_series_fetch_duration_seconds_bucket{le="+Inf"} 3
cortex_bucket_store_cached_series_fetch_duration_seconds_sum 1.306102e+06
cortex_bucket_store_cached_series_fetch_duration_seconds_count 3

# HELP cortex_bucket_store_empty_postings_total Total number of empty postings when fetching block series.
# TYPE cortex_bucket_store_empty_postings_total counter
cortex_bucket_store_empty_postings_total 112595
# HELP cortex_bucket_store_cached_postings_fetch_duration_seconds Time it takes to fetch postings to respond a request sent to store-gateway. It includes both the time to fetch it from cache and from storage in case of cache misses.
# TYPE cortex_bucket_store_cached_postings_fetch_duration_seconds histogram
cortex_bucket_store_cached_postings_fetch_duration_seconds_bucket{le="0.001"} 0
Expand Down Expand Up @@ -451,6 +468,22 @@ func TestBucketStoreMetrics(t *testing.T) {
# HELP cortex_bucket_store_indexheader_lazy_unload_total Total number of index-header lazy unload operations.
# TYPE cortex_bucket_store_indexheader_lazy_unload_total counter
cortex_bucket_store_indexheader_lazy_unload_total 1.396178e+06
# HELP cortex_bucket_store_postings_size_bytes Size in bytes of the postings for a single series call.
# TYPE cortex_bucket_store_postings_size_bytes histogram
cortex_bucket_store_postings_size_bytes_bucket{le="32"} 0
cortex_bucket_store_postings_size_bytes_bucket{le="256"} 0
cortex_bucket_store_postings_size_bytes_bucket{le="512"} 0
cortex_bucket_store_postings_size_bytes_bucket{le="1024"} 0
cortex_bucket_store_postings_size_bytes_bucket{le="32768"} 0
cortex_bucket_store_postings_size_bytes_bucket{le="262144"} 3
cortex_bucket_store_postings_size_bytes_bucket{le="524288"} 3
cortex_bucket_store_postings_size_bytes_bucket{le="1.048576e+06"} 3
cortex_bucket_store_postings_size_bytes_bucket{le="3.3554432e+07"} 3
cortex_bucket_store_postings_size_bytes_bucket{le="2.68435456e+08"} 3
cortex_bucket_store_postings_size_bytes_bucket{le="5.36870912e+08"} 3
cortex_bucket_store_postings_size_bytes_bucket{le="+Inf"} 3
cortex_bucket_store_postings_size_bytes_sum 225190
cortex_bucket_store_postings_size_bytes_count 3
`))
require.NoError(t, err)
}
Expand Down Expand Up @@ -534,6 +567,9 @@ func populateMockedBucketStoreMetrics(base float64) *prometheus.Registry {
m.queriesDropped.WithLabelValues("chunks").Add(31 * base)
m.queriesDropped.WithLabelValues("series").Add(0)

m.postingsSizeBytes.Observe(10 * base)
m.chunkSizeBytes.Observe(11 * base)

m.seriesRefetches.Add(33 * base)

m.cachedPostingsCompressions.WithLabelValues("encode").Add(50 * base)
Expand All @@ -557,6 +593,8 @@ func populateMockedBucketStoreMetrics(base float64) *prometheus.Registry {
m.indexHeaderLazyUnloadFailedCount.Add(63 * base)
m.indexHeaderLazyLoadDuration.Observe(0.65)

m.emptyPostingCount.Add(5 * base)

return reg
}

Expand All @@ -577,7 +615,9 @@ type mockedBucketStoreMetrics struct {
seriesRefetches prometheus.Counter
resultSeriesCount prometheus.Histogram
chunkSizeBytes prometheus.Histogram
postingsSizeBytes prometheus.Histogram
queriesDropped *prometheus.CounterVec
emptyPostingCount prometheus.Counter

cachedPostingsCompressions *prometheus.CounterVec
cachedPostingsCompressionErrors *prometheus.CounterVec
Expand Down Expand Up @@ -670,6 +710,14 @@ func newMockedBucketStoreMetrics(reg prometheus.Registerer) *mockedBucketStoreMe
},
})

m.postingsSizeBytes = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
Name: "thanos_bucket_store_postings_size_bytes",
Help: "Size in bytes of the postings for a single series call.",
Buckets: []float64{
32, 256, 512, 1024, 32 * 1024, 256 * 1024, 512 * 1024, 1024 * 1024, 32 * 1024 * 1024, 256 * 1024 * 1024, 512 * 1024 * 1024,
},
})

m.queriesDropped = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
Name: "thanos_bucket_store_queries_dropped_total",
Help: "Number of queries that were dropped due to the limit.",
Expand Down Expand Up @@ -733,5 +781,10 @@ func newMockedBucketStoreMetrics(reg prometheus.Registerer) *mockedBucketStoreMe
Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5},
})

m.emptyPostingCount = promauto.With(reg).NewCounter(prometheus.CounterOpts{
Name: "thanos_bucket_store_empty_postings_total",
Help: "Total number of empty postings when fetching block series.",
})

return &m
}