From 725621269fd36eee87bfa9de86e7317f4c6ca41a Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Mon, 16 Oct 2023 15:13:58 +0800 Subject: [PATCH] *: add MVCC version ratio to slow log metrics (#44897) (#47645) close pingcap/tidb#44896 --- executor/adapter.go | 3 +++ executor/metrics/metrics.go | 2 ++ metrics/grafana/tidb.json | 29 ++++++++++++++++++++++------- metrics/metrics.go | 1 + metrics/server.go | 10 ++++++++++ 5 files changed, 38 insertions(+), 7 deletions(-) diff --git a/executor/adapter.go b/executor/adapter.go index 93c48d8d24f45..42ce555c6b3c3 100644 --- a/executor/adapter.go +++ b/executor/adapter.go @@ -1606,6 +1606,9 @@ func (a *ExecStmt) LogSlowQuery(txnTS uint64, succ bool, hasMoreResults bool) { executor_metrics.TotalQueryProcHistogramGeneral.Observe(costTime.Seconds()) executor_metrics.TotalCopProcHistogramGeneral.Observe(execDetail.TimeDetail.ProcessTime.Seconds()) executor_metrics.TotalCopWaitHistogramGeneral.Observe(execDetail.TimeDetail.WaitTime.Seconds()) + if execDetail.ScanDetail != nil && execDetail.ScanDetail.ProcessedKeys != 0 { + executor_metrics.CopMVCCRatioHistogramGeneral.Observe(float64(execDetail.ScanDetail.TotalKeys) / float64(execDetail.ScanDetail.ProcessedKeys)) + } } var userString string if sessVars.User != nil { diff --git a/executor/metrics/metrics.go b/executor/metrics/metrics.go index 163fd76736cda..867d28a399197 100644 --- a/executor/metrics/metrics.go +++ b/executor/metrics/metrics.go @@ -43,6 +43,7 @@ var ( TotalQueryProcHistogramGeneral prometheus.Observer TotalCopProcHistogramGeneral prometheus.Observer TotalCopWaitHistogramGeneral prometheus.Observer + CopMVCCRatioHistogramGeneral prometheus.Observer TotalQueryProcHistogramInternal prometheus.Observer TotalCopProcHistogramInternal prometheus.Observer TotalCopWaitHistogramInternal prometheus.Observer @@ -137,6 +138,7 @@ func InitMetricsVars() { TotalQueryProcHistogramGeneral = metrics.TotalQueryProcHistogram.WithLabelValues(metrics.LblGeneral) TotalCopProcHistogramGeneral = metrics.TotalCopProcHistogram.WithLabelValues(metrics.LblGeneral) TotalCopWaitHistogramGeneral = metrics.TotalCopWaitHistogram.WithLabelValues(metrics.LblGeneral) + CopMVCCRatioHistogramGeneral = metrics.CopMVCCRatioHistogram.WithLabelValues(metrics.LblGeneral) TotalQueryProcHistogramInternal = metrics.TotalQueryProcHistogram.WithLabelValues(metrics.LblInternal) TotalCopProcHistogramInternal = metrics.TotalCopProcHistogram.WithLabelValues(metrics.LblInternal) TotalCopWaitHistogramInternal = metrics.TotalCopWaitHistogram.WithLabelValues(metrics.LblInternal) diff --git a/metrics/grafana/tidb.json b/metrics/grafana/tidb.json index bf81940462338..9a7ca0b471de3 100644 --- a/metrics/grafana/tidb.json +++ b/metrics/grafana/tidb.json @@ -814,31 +814,44 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "$$hashKey": "object:211", + "alias": "/mvcc_ratio/", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.90, sum(rate(tidb_server_slow_query_process_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,sql_type))", + "expr": "histogram_quantile(0.90, sum(rate(tidb_server_slow_query_process_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", sql_type=\"general\"}[1m])) by (le,sql_type))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "all_proc_{{sql_type}}", + "legendFormat": "all_proc", "refId": "A" }, { - "expr": "histogram_quantile(0.90, sum(rate(tidb_server_slow_query_cop_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,sql_type))", + "expr": "histogram_quantile(0.90, sum(rate(tidb_server_slow_query_cop_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", sql_type=\"general\"}[1m])) by (le,sql_type))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "all_cop_proc_{{sql_type}}", + "legendFormat": "all_cop_proc", "refId": "B" }, { - "expr": "histogram_quantile(0.90, sum(rate(tidb_server_slow_query_wait_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,sql_type))", + "expr": "histogram_quantile(0.90, sum(rate(tidb_server_slow_query_wait_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", sql_type=\"general\"}[1m])) by (le,sql_type))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "all_cop_wait_{{sql_type}}", + "legendFormat": "all_cop_wait", "refId": "C" + }, + { + "expr": "histogram_quantile(0.90, sum(rate(tidb_server_slow_query_cop_mvcc_ratio_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", sql_type=\"general\"}[1m])) by (le,sql_type))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "mvcc_ratio", + "refId": "D" } ], "thresholds": [], @@ -861,6 +874,7 @@ }, "yaxes": [ { + "$$hashKey": "object:119", "format": "s", "label": null, "logBase": 2, @@ -869,6 +883,7 @@ "show": true }, { + "$$hashKey": "object:120", "format": "short", "label": null, "logBase": 1, diff --git a/metrics/metrics.go b/metrics/metrics.go index 7ec9f8aa21069..76e4672913773 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -204,6 +204,7 @@ func RegisterMetrics() { prometheus.MustRegister(TotalQueryProcHistogram) prometheus.MustRegister(TotalCopProcHistogram) prometheus.MustRegister(TotalCopWaitHistogram) + prometheus.MustRegister(CopMVCCRatioHistogram) prometheus.MustRegister(HandleSchemaValidate) prometheus.MustRegister(MaxProcs) prometheus.MustRegister(GOGC) diff --git a/metrics/server.go b/metrics/server.go index bf6dbeed0517c..807b31dda6c07 100644 --- a/metrics/server.go +++ b/metrics/server.go @@ -56,6 +56,7 @@ var ( TotalQueryProcHistogram *prometheus.HistogramVec TotalCopProcHistogram *prometheus.HistogramVec TotalCopWaitHistogram *prometheus.HistogramVec + CopMVCCRatioHistogram *prometheus.HistogramVec MaxProcs prometheus.Gauge GOGC prometheus.Gauge ConnIdleDurationHistogram *prometheus.HistogramVec @@ -256,6 +257,15 @@ func InitServerMetrics() { Buckets: prometheus.ExponentialBuckets(0.001, 2, 28), // 1ms ~ 1.5days }, []string{LblSQLType}) + CopMVCCRatioHistogram = NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "tidb", + Subsystem: "server", + Name: "slow_query_cop_mvcc_ratio", + Help: "Bucketed histogram of all cop total keys / processed keys in slow queries.", + Buckets: prometheus.ExponentialBuckets(0.5, 2, 21), // 0.5 ~ 262144 + }, []string{LblSQLType}) + MaxProcs = NewGauge( prometheus.GaugeOpts{ Namespace: "tidb",