From ab4be6b51a628cdb31801da004765645fa346b6c Mon Sep 17 00:00:00 2001 From: Arunprasad Rajkumar Date: Mon, 6 Sep 2021 17:15:19 +0530 Subject: [PATCH] Remove unused implementation of thanos_sidecar_last_heartbeat_success_time_seconds metric Signed-off-by: Arunprasad Rajkumar --- CHANGELOG.md | 2 +- cmd/thanos/sidecar.go | 6 ------ examples/alerts/alerts.md | 10 ++++------ mixin/README.md | 2 +- 4 files changed, 6 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7084b6a0f6c..3ac41a3ffd8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,11 +32,11 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ### Fixed +- [#4508](https://github.com/thanos-io/thanos/pull/4508) Adjust and rename `ThanosSidecarUnhealthy` to `ThanosSidecarNoConnectionToStartedPrometheus`; Remove `ThanosSidecarPrometheusDown` alert; Remove unused `thanos_sidecar_last_heartbeat_success_time_seconds` metrics. - [#4468](https://github.com/thanos-io/thanos/pull/4468) Rule: Fix temporary rule filename composition issue. - [#4476](https://github.com/thanos-io/thanos/pull/4476) UI: fix incorrect html escape sequence used for '>' symbol. - [#4532](https://github.com/thanos-io/thanos/pull/4532) Mixin: Fixed "all jobs" selector in thanos mixin dashboards. - [#4607](https://github.com/thanos-io/thanos/pull/4607) Azure: Fix Azure MSI Rate Limit -- [#4508](https://github.com/thanos-io/thanos/pull/4508) Adjust and rename `ThanosSidecarUnhealthy` to `ThanosSidecarNoConnectionToStartedPrometheus`; Remove `ThanosSidecarPrometheusDown`. ### Changed diff --git a/cmd/thanos/sidecar.go b/cmd/thanos/sidecar.go index fee36d65374..fbed45f4fa9 100644 --- a/cmd/thanos/sidecar.go +++ b/cmd/thanos/sidecar.go @@ -138,10 +138,6 @@ func runSidecar( Name: "thanos_sidecar_prometheus_up", Help: "Boolean indicator whether the sidecar can reach its Prometheus peer.", }) - lastHeartbeat := promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Name: "thanos_sidecar_last_heartbeat_success_time_seconds", - Help: "Timestamp of the last successful heartbeat in seconds.", - }) ctx, cancel := context.WithCancel(context.Background()) g.Add(func() error { @@ -191,7 +187,6 @@ func runSidecar( ) promUp.Set(1) statusProber.Ready() - lastHeartbeat.SetToCurrentTime() return nil }) if err != nil { @@ -213,7 +208,6 @@ func runSidecar( promUp.Set(0) } else { promUp.Set(1) - lastHeartbeat.SetToCurrentTime() } return nil diff --git a/examples/alerts/alerts.md b/examples/alerts/alerts.md index c6e6756cb68..b274f4579f7 100644 --- a/examples/alerts/alerts.md +++ b/examples/alerts/alerts.md @@ -308,16 +308,14 @@ rules: severity: critical - alert: ThanosSidecarNoConnectionToStartedPrometheus annotations: - description: Thanos Sidecar {{$labels.instance}} is unhealthy for more than {{$value}} - seconds. + description: Thanos Sidecar {{$labels.instance}} is unhealthy. runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanossidecarnoconnectiontostartedprometheus summary: Thanos Sidecar cannot access Prometheus, even though Prometheus seems healthy and has reloaded WAL. expr: | - time() - max by (pod, job, instance) (thanos_sidecar_last_heartbeat_success_time_seconds{job=~".*thanos-sidecar.*"}) >= 240 - AND on (pod) ( - min by (pod) (prometheus_tsdb_data_replay_duration_seconds) != 0 - ) + thanos_sidecar_prometheus_up{job=~".*thanos-sidecar.*"} == 0 + AND on (namespace, pod) + prometheus_tsdb_data_replay_duration_seconds != 0 for: 5m labels: severity: critical diff --git a/mixin/README.md b/mixin/README.md index 5c5473e7ee3..baef01946cd 100644 --- a/mixin/README.md +++ b/mixin/README.md @@ -106,7 +106,7 @@ This project is intended to be used as a library. You can extend and customize d }, sidecar+:: { selector: 'job=~".*thanos-sidecar.*"', - thanosPrometheusCommonDimensions: 'pod', + thanosPrometheusCommonDimensions: 'namespace, pod', title: '%(prefix)sSidecar' % $.dashboard.prefix, }, // TODO(kakkoyun): Fix naming convention: bucketReplicate