Skip to content

expose indexheader download and load duration histograms #6445

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
* [ENHANCEMENT] Ingester: If a limit per label set entry doesn't have any label, use it as the default partition to catch all series that doesn't match any other label sets entries. #6435
* [ENHANCEMENT] Querier: Add new `cortex_querier_codec_response_size` metric to track the size of the encoded query responses from queriers. #6444
* [ENHANCEMENT] Distributor: Added `cortex_distributor_received_samples_per_labelset_total` metric to calculate ingestion rate per label set. #6443
* [ENHANCEMENT] StoreGateway: Added `cortex_bucket_store_indexheader_load_duration_seconds` and `cortex_bucket_store_indexheader_download_duration_seconds` metrics for time of downloading and loading index header files. #6445
* [BUGFIX] Runtime-config: Handle absolute file paths when working directory is not / #6224
* [BUGFIX] Ruler: Allow rule evaluation to complete during shutdown. #6326
* [BUGFIX] Ring: update ring with new ip address when instance is lost, rejoins, but heartbeat is disabled. #6271
Expand Down
14 changes: 14 additions & 0 deletions pkg/storegateway/bucket_store_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ type BucketStoreMetrics struct {
indexHeaderLazyUnloadCount *prometheus.Desc
indexHeaderLazyUnloadFailedCount *prometheus.Desc
indexHeaderLazyLoadDuration *prometheus.Desc
indexHeaderDownloadDuration *prometheus.Desc
indexHeaderLoadDuration *prometheus.Desc
}

func NewBucketStoreMetrics() *BucketStoreMetrics {
Expand Down Expand Up @@ -205,6 +207,14 @@ func NewBucketStoreMetrics() *BucketStoreMetrics {
"cortex_bucket_store_indexheader_lazy_load_duration_seconds",
"Duration of the index-header lazy loading in seconds.",
nil, nil),
indexHeaderDownloadDuration: prometheus.NewDesc(
"cortex_bucket_store_indexheader_download_duration_seconds",
"Duration of the index-header download from objstore in seconds.",
nil, nil),
indexHeaderLoadDuration: prometheus.NewDesc(
"cortex_bucket_store_indexheader_load_duration_seconds",
"Duration of the index-header loading in seconds.",
nil, nil),

lazyExpandedPostingsCount: prometheus.NewDesc(
"cortex_bucket_store_lazy_expanded_postings_total",
Expand Down Expand Up @@ -272,6 +282,8 @@ func (m *BucketStoreMetrics) Describe(out chan<- *prometheus.Desc) {
out <- m.indexHeaderLazyUnloadCount
out <- m.indexHeaderLazyUnloadFailedCount
out <- m.indexHeaderLazyLoadDuration
out <- m.indexHeaderDownloadDuration
out <- m.indexHeaderLoadDuration

out <- m.lazyExpandedPostingsCount
out <- m.lazyExpandedPostingGroups
Expand Down Expand Up @@ -323,6 +335,8 @@ func (m *BucketStoreMetrics) Collect(out chan<- prometheus.Metric) {
data.SendSumOfCounters(out, m.indexHeaderLazyUnloadCount, "thanos_bucket_store_indexheader_lazy_unload_total")
data.SendSumOfCounters(out, m.indexHeaderLazyUnloadFailedCount, "thanos_bucket_store_indexheader_lazy_unload_failed_total")
data.SendSumOfHistograms(out, m.indexHeaderLazyLoadDuration, "thanos_bucket_store_indexheader_lazy_load_duration_seconds")
data.SendSumOfHistograms(out, m.indexHeaderDownloadDuration, "thanos_bucket_store_indexheader_download_duration_seconds")
data.SendSumOfHistograms(out, m.indexHeaderLoadDuration, "thanos_bucket_store_indexheader_load_duration_seconds")

data.SendSumOfCounters(out, m.lazyExpandedPostingsCount, "thanos_bucket_store_lazy_expanded_postings_total")
data.SendSumOfCountersWithLabels(out, m.lazyExpandedPostingGroups, "thanos_bucket_store_lazy_expanded_posting_groups_total", "reason")
Expand Down
46 changes: 46 additions & 0 deletions pkg/storegateway/bucket_store_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,22 @@ func TestBucketStoreMetrics(t *testing.T) {
# HELP cortex_bucket_store_empty_postings_total Total number of empty postings when fetching block series.
# TYPE cortex_bucket_store_empty_postings_total counter
cortex_bucket_store_empty_postings_total 112595

# HELP cortex_bucket_store_indexheader_download_duration_seconds Duration of the index-header download from objstore in seconds.
# TYPE cortex_bucket_store_indexheader_download_duration_seconds histogram
cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="0.01"} 0
cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="0.02"} 0
cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="0.05"} 0
cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="0.1"} 0
cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="0.2"} 0
cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="0.5"} 0
cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="1"} 3
cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="2"} 3
cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="5"} 3
cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="+Inf"} 3
cortex_bucket_store_indexheader_download_duration_seconds_sum 2.25
cortex_bucket_store_indexheader_download_duration_seconds_count 3

# HELP cortex_bucket_store_postings_fetch_duration_seconds Time it takes to fetch postings to respond a request sent to store-gateway. It includes both the time to fetch it from cache and from storage in case of cache misses.
# TYPE cortex_bucket_store_postings_fetch_duration_seconds histogram
cortex_bucket_store_postings_fetch_duration_seconds_bucket{le="0.001"} 0
Expand Down Expand Up @@ -543,6 +559,22 @@ func TestBucketStoreMetrics(t *testing.T) {
# HELP cortex_bucket_store_indexheader_lazy_unload_total Total number of index-header lazy unload operations.
# TYPE cortex_bucket_store_indexheader_lazy_unload_total counter
cortex_bucket_store_indexheader_lazy_unload_total 1.396178e+06

# HELP cortex_bucket_store_indexheader_load_duration_seconds Duration of the index-header loading in seconds.
# TYPE cortex_bucket_store_indexheader_load_duration_seconds histogram
cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="0.01"} 0
cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="0.02"} 0
cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="0.05"} 0
cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="0.1"} 0
cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="0.2"} 0
cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="0.5"} 0
cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="1"} 3
cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="2"} 3
cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="5"} 3
cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="+Inf"} 3
cortex_bucket_store_indexheader_load_duration_seconds_sum 2.55
cortex_bucket_store_indexheader_load_duration_seconds_count 3

# HELP cortex_bucket_store_lazy_expanded_posting_groups_total Total number of posting groups that are marked as lazy and corresponding reason.
# TYPE cortex_bucket_store_lazy_expanded_posting_groups_total counter
cortex_bucket_store_lazy_expanded_posting_groups_total{reason="keys_limit"} 202671
Expand Down Expand Up @@ -685,6 +717,8 @@ func populateMockedBucketStoreMetrics(base float64) *prometheus.Registry {
m.indexHeaderLazyUnloadCount.Add(62 * base)
m.indexHeaderLazyUnloadFailedCount.Add(63 * base)
m.indexHeaderLazyLoadDuration.Observe(0.65)
m.indexHeaderDownloadDuration.Observe(0.75)
m.indexHeaderLoadDuration.Observe(0.85)

m.emptyPostingCount.Add(5 * base)

Expand Down Expand Up @@ -737,6 +771,8 @@ type mockedBucketStoreMetrics struct {
indexHeaderLazyUnloadCount prometheus.Counter
indexHeaderLazyUnloadFailedCount prometheus.Counter
indexHeaderLazyLoadDuration prometheus.Histogram
indexHeaderDownloadDuration prometheus.Histogram
indexHeaderLoadDuration prometheus.Histogram

lazyExpandedPostingsCount prometheus.Counter
lazyExpandedPostingGroups *prometheus.CounterVec
Expand Down Expand Up @@ -913,6 +949,16 @@ func newMockedBucketStoreMetrics(reg prometheus.Registerer) *mockedBucketStoreMe
Help: "Duration of the index-header lazy loading in seconds.",
Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5},
})
m.indexHeaderDownloadDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
Name: "thanos_bucket_store_indexheader_download_duration_seconds",
Help: "Duration of the index-header download from objstore in seconds.",
Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5},
})
m.indexHeaderLoadDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
Name: "thanos_bucket_store_indexheader_load_duration_seconds",
Help: "Duration of the index-header loading in seconds.",
Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5},
})

m.emptyPostingCount = promauto.With(reg).NewCounter(prometheus.CounterOpts{
Name: "thanos_bucket_store_empty_postings_total",
Expand Down
Loading