cortexproject · friedrichg · Jun 12, 2023 · Jun 9, 2023 · Jun 9, 2023 · Jun 9, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -22,13 +22,14 @@
 * [ENHANCEMENT] Improving Performance on the API Gzip Handler. #5347
 * [ENHANCEMENT] Dynamodb: Add `puller-sync-time` to allow different pull time for ring. #5357
 * [ENHANCEMENT] Emit querier `max_concurrent` as a metric. #5362
+* [ENHANCEMENT] Do not resync blocks in running store gateways during rollout deployment and container restart. #5363
+* [ENHANCEMENT] Store Gateway: Add new metrics `cortex_bucket_store_sent_chunk_size_bytes`, `cortex_bucket_store_postings_size_bytes` and `cortex_bucket_store_empty_postings_total`. #5397
 * [BUGFIX] Ruler: Validate if rule group can be safely converted back to rule group yaml from protobuf message #5265
 * [BUGFIX] Querier: Convert gRPC `ResourceExhausted` status code from store gateway to 422 limit error. #5286
 * [BUGFIX] Alertmanager: Route web-ui requests to the alertmanager distributor when sharding is enabled. #5293
 * [BUGFIX] Storage: Bucket index updater should ignore meta not found for partial blocks. #5343
 * [BUGFIX] Ring: Add JOINING state to read operation. #5346
 * [BUGFIX] Compactor: Partial block with only visit marker should be deleted even there is no deletion marker. #5342
-* [ENHANCEMENT] Do not resync blocks in running store gateways during rollout deployment and container restart. #5363
 
 ## 1.15.1 2023-04-26
 

diff --git a/pkg/storegateway/bucket_store_metrics.go b/pkg/storegateway/bucket_store_metrics.go
@@ -27,6 +27,9 @@ type BucketStoreMetrics struct {
 	seriesRefetches       *prometheus.Desc
 	resultSeriesCount     *prometheus.Desc
 	queriesDropped        *prometheus.Desc
+	chunkSizeBytes        *prometheus.Desc
+	postingsSizeBytes     *prometheus.Desc
+	emptyPostingCount     *prometheus.Desc
 
 	cachedPostingsCompressions           *prometheus.Desc
 	cachedPostingsCompressionErrors      *prometheus.Desc
@@ -109,6 +112,18 @@ func NewBucketStoreMetrics() *BucketStoreMetrics {
 			"cortex_bucket_store_queries_dropped_total",
 			"Number of queries that were dropped due to the max chunks per query limit.",
 			nil, nil),
+		chunkSizeBytes: prometheus.NewDesc(
+			"cortex_bucket_store_sent_chunk_size_bytes",
+			"Size in bytes of the chunks for the single series, which is adequate to the gRPC message size sent to querier.",
+			nil, nil),
+		postingsSizeBytes: prometheus.NewDesc(
+			"cortex_bucket_store_postings_size_bytes",
+			"Size in bytes of the postings for a single series call.",
+			nil, nil),
+		emptyPostingCount: prometheus.NewDesc(
+			"cortex_bucket_store_empty_postings_total",
+			"Total number of empty postings when fetching block series.",
+			nil, nil),
 
 		cachedPostingsCompressions: prometheus.NewDesc(
 			"cortex_bucket_store_cached_postings_compressions_total",
@@ -187,6 +202,9 @@ func (m *BucketStoreMetrics) Describe(out chan<- *prometheus.Desc) {
 	out <- m.seriesRefetches
 	out <- m.resultSeriesCount
 	out <- m.queriesDropped
+	out <- m.chunkSizeBytes
+	out <- m.postingsSizeBytes
+	out <- m.emptyPostingCount
 
 	out <- m.cachedPostingsCompressions
 	out <- m.cachedPostingsCompressionErrors
@@ -225,6 +243,9 @@ func (m *BucketStoreMetrics) Collect(out chan<- prometheus.Metric) {
 	data.SendSumOfCounters(out, m.seriesRefetches, "thanos_bucket_store_series_refetches_total")
 	data.SendSumOfHistograms(out, m.resultSeriesCount, "thanos_bucket_store_series_result_series")
 	data.SendSumOfCounters(out, m.queriesDropped, "thanos_bucket_store_queries_dropped_total")
+	data.SendSumOfHistograms(out, m.chunkSizeBytes, "thanos_bucket_store_sent_chunk_size_bytes")
+	data.SendSumOfHistograms(out, m.postingsSizeBytes, "thanos_bucket_store_postings_size_bytes")
+	data.SendSumOfCounters(out, m.emptyPostingCount, "thanos_bucket_store_empty_postings_total")
 
 	data.SendSumOfCountersWithLabels(out, m.cachedPostingsCompressions, "thanos_bucket_store_cached_postings_compressions_total", "op")
 	data.SendSumOfCountersWithLabels(out, m.cachedPostingsCompressionErrors, "thanos_bucket_store_cached_postings_compression_errors_total", "op")

diff --git a/pkg/storegateway/bucket_store_metrics_test.go b/pkg/storegateway/bucket_store_metrics_test.go
@@ -357,7 +357,22 @@ func TestBucketStoreMetrics(t *testing.T) {
 			# HELP cortex_bucket_store_queries_dropped_total Number of queries that were dropped due to the max chunks per query limit.
 			# TYPE cortex_bucket_store_queries_dropped_total counter
 			cortex_bucket_store_queries_dropped_total 698089
-
+        	# HELP cortex_bucket_store_sent_chunk_size_bytes Size in bytes of the chunks for the single series, which is adequate to the gRPC message size sent to querier.
+        	# TYPE cortex_bucket_store_sent_chunk_size_bytes histogram
+        	cortex_bucket_store_sent_chunk_size_bytes_bucket{le="32"} 0
+        	cortex_bucket_store_sent_chunk_size_bytes_bucket{le="256"} 0
+        	cortex_bucket_store_sent_chunk_size_bytes_bucket{le="512"} 0
+        	cortex_bucket_store_sent_chunk_size_bytes_bucket{le="1024"} 0
+        	cortex_bucket_store_sent_chunk_size_bytes_bucket{le="32768"} 0
+        	cortex_bucket_store_sent_chunk_size_bytes_bucket{le="262144"} 7
+        	cortex_bucket_store_sent_chunk_size_bytes_bucket{le="524288"} 9
+        	cortex_bucket_store_sent_chunk_size_bytes_bucket{le="1.048576e+06"} 9
+        	cortex_bucket_store_sent_chunk_size_bytes_bucket{le="3.3554432e+07"} 9
+        	cortex_bucket_store_sent_chunk_size_bytes_bucket{le="2.68435456e+08"} 9
+        	cortex_bucket_store_sent_chunk_size_bytes_bucket{le="5.36870912e+08"} 9
+        	cortex_bucket_store_sent_chunk_size_bytes_bucket{le="+Inf"} 9
+        	cortex_bucket_store_sent_chunk_size_bytes_sum 1.57633e+06
+        	cortex_bucket_store_sent_chunk_size_bytes_count 9
 			# HELP cortex_bucket_store_cached_postings_compressions_total Number of postings compressions and decompressions when storing to index cache.
 			# TYPE cortex_bucket_store_cached_postings_compressions_total counter
 			cortex_bucket_store_cached_postings_compressions_total{op="encode"} 1125950
@@ -400,7 +415,9 @@ func TestBucketStoreMetrics(t *testing.T) {
 			cortex_bucket_store_cached_series_fetch_duration_seconds_bucket{le="+Inf"} 3
 			cortex_bucket_store_cached_series_fetch_duration_seconds_sum 1.306102e+06
 			cortex_bucket_store_cached_series_fetch_duration_seconds_count 3
-
+        	# HELP cortex_bucket_store_empty_postings_total Total number of empty postings when fetching block series.
+            # TYPE cortex_bucket_store_empty_postings_total counter
+        	cortex_bucket_store_empty_postings_total 112595
 			# HELP cortex_bucket_store_cached_postings_fetch_duration_seconds Time it takes to fetch postings to respond a request sent to store-gateway. It includes both the time to fetch it from cache and from storage in case of cache misses.
 			# TYPE cortex_bucket_store_cached_postings_fetch_duration_seconds histogram
 			cortex_bucket_store_cached_postings_fetch_duration_seconds_bucket{le="0.001"} 0
@@ -451,6 +468,22 @@ func TestBucketStoreMetrics(t *testing.T) {
 			# HELP cortex_bucket_store_indexheader_lazy_unload_total Total number of index-header lazy unload operations.
 			# TYPE cortex_bucket_store_indexheader_lazy_unload_total counter
 			cortex_bucket_store_indexheader_lazy_unload_total 1.396178e+06
+        	# HELP cortex_bucket_store_postings_size_bytes Size in bytes of the postings for a single series call.
+        	# TYPE cortex_bucket_store_postings_size_bytes histogram
+        	cortex_bucket_store_postings_size_bytes_bucket{le="32"} 0
+        	cortex_bucket_store_postings_size_bytes_bucket{le="256"} 0
+        	cortex_bucket_store_postings_size_bytes_bucket{le="512"} 0
+        	cortex_bucket_store_postings_size_bytes_bucket{le="1024"} 0
+        	cortex_bucket_store_postings_size_bytes_bucket{le="32768"} 0
+        	cortex_bucket_store_postings_size_bytes_bucket{le="262144"} 3
+        	cortex_bucket_store_postings_size_bytes_bucket{le="524288"} 3
+        	cortex_bucket_store_postings_size_bytes_bucket{le="1.048576e+06"} 3
+        	cortex_bucket_store_postings_size_bytes_bucket{le="3.3554432e+07"} 3
+        	cortex_bucket_store_postings_size_bytes_bucket{le="2.68435456e+08"} 3
+        	cortex_bucket_store_postings_size_bytes_bucket{le="5.36870912e+08"} 3
+        	cortex_bucket_store_postings_size_bytes_bucket{le="+Inf"} 3
+        	cortex_bucket_store_postings_size_bytes_sum 225190
+        	cortex_bucket_store_postings_size_bytes_count 3
 `))
 	require.NoError(t, err)
 }
@@ -534,6 +567,9 @@ func populateMockedBucketStoreMetrics(base float64) *prometheus.Registry {
 	m.queriesDropped.WithLabelValues("chunks").Add(31 * base)
 	m.queriesDropped.WithLabelValues("series").Add(0)
 
+	m.postingsSizeBytes.Observe(10 * base)
+	m.chunkSizeBytes.Observe(11 * base)
+
 	m.seriesRefetches.Add(33 * base)
 
 	m.cachedPostingsCompressions.WithLabelValues("encode").Add(50 * base)
@@ -557,6 +593,8 @@ func populateMockedBucketStoreMetrics(base float64) *prometheus.Registry {
 	m.indexHeaderLazyUnloadFailedCount.Add(63 * base)
 	m.indexHeaderLazyLoadDuration.Observe(0.65)
 
+	m.emptyPostingCount.Add(5 * base)
+
 	return reg
 }
 
@@ -577,7 +615,9 @@ type mockedBucketStoreMetrics struct {
 	seriesRefetches       prometheus.Counter
 	resultSeriesCount     prometheus.Histogram
 	chunkSizeBytes        prometheus.Histogram
+	postingsSizeBytes     prometheus.Histogram
 	queriesDropped        *prometheus.CounterVec
+	emptyPostingCount     prometheus.Counter
 
 	cachedPostingsCompressions           *prometheus.CounterVec
 	cachedPostingsCompressionErrors      *prometheus.CounterVec
@@ -670,6 +710,14 @@ func newMockedBucketStoreMetrics(reg prometheus.Registerer) *mockedBucketStoreMe
 		},
 	})
 
+	m.postingsSizeBytes = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
+		Name: "thanos_bucket_store_postings_size_bytes",
+		Help: "Size in bytes of the postings for a single series call.",
+		Buckets: []float64{
+			32, 256, 512, 1024, 32 * 1024, 256 * 1024, 512 * 1024, 1024 * 1024, 32 * 1024 * 1024, 256 * 1024 * 1024, 512 * 1024 * 1024,
+		},
+	})
+
 	m.queriesDropped = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
 		Name: "thanos_bucket_store_queries_dropped_total",
 		Help: "Number of queries that were dropped due to the limit.",
@@ -733,5 +781,10 @@ func newMockedBucketStoreMetrics(reg prometheus.Registerer) *mockedBucketStoreMe
 		Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5},
 	})
 
+	m.emptyPostingCount = promauto.With(reg).NewCounter(prometheus.CounterOpts{
+		Name: "thanos_bucket_store_empty_postings_total",
+		Help: "Total number of empty postings when fetching block series.",
+	})
+
 	return &m
 }