Skip to content

Commit

Permalink
resource_control: add runaway metrics (#47360)
Browse files Browse the repository at this point in the history
ref #43691
  • Loading branch information
CabinfeverB authored Oct 19, 2023
1 parent f4a139f commit af7b32c
Show file tree
Hide file tree
Showing 6 changed files with 1,065 additions and 889 deletions.
2 changes: 2 additions & 0 deletions pkg/domain/resourcegroup/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ go_library(
importpath = "github.com/pingcap/tidb/pkg/domain/resourcegroup",
visibility = ["//visibility:public"],
deps = [
"//pkg/metrics",
"//pkg/util/dbterror/exeerrors",
"//pkg/util/logutil",
"@com_github_jellydator_ttlcache_v3//:ttlcache",
"@com_github_pingcap_kvproto//pkg/resource_manager",
"@com_github_prometheus_client_golang//prometheus",
"@com_github_tikv_client_go_v2//tikv",
"@com_github_tikv_client_go_v2//tikvrpc",
"@com_github_tikv_pd_client//resource_group/controller",
Expand Down
14 changes: 13 additions & 1 deletion pkg/domain/resourcegroup/runaway.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ import (

"github.com/jellydator/ttlcache/v3"
rmpb "github.com/pingcap/kvproto/pkg/resource_manager"
"github.com/pingcap/tidb/pkg/metrics"
"github.com/pingcap/tidb/pkg/util/dbterror/exeerrors"
"github.com/pingcap/tidb/pkg/util/logutil"
"github.com/prometheus/client_golang/prometheus"
"github.com/tikv/client-go/v2/tikv"
"github.com/tikv/client-go/v2/tikvrpc"
rmclient "github.com/tikv/pd/client/resource_group/controller"
Expand Down Expand Up @@ -189,6 +191,7 @@ type RunawayManager struct {
// activeGroup is used to manage the active runaway watches of resource group
activeGroup map[string]int64
activeLock sync.RWMutex
metricsMap map[string]prometheus.Counter

resourceGroupCtl *rmclient.ResourceGroupsController
serverID string
Expand Down Expand Up @@ -222,6 +225,7 @@ func NewRunawayManager(resourceGroupCtl *rmclient.ResourceGroupsController, serv
quarantineChan: make(chan *QuarantineRecord, maxWatchRecordChannelSize),
staleQuarantineRecord: staleQuarantineChan,
activeGroup: make(map[string]int64),
metricsMap: make(map[string]prometheus.Counter),
}
m.insertionCancel = watchList.OnInsertion(func(ctx context.Context, i *ttlcache.Item[string, *QuarantineRecord]) {
m.activeLock.Lock()
Expand Down Expand Up @@ -252,6 +256,12 @@ func (rm *RunawayManager) DeriveChecker(resourceGroupName, originalSQL, sqlDiges
if group.RunawaySettings == nil && rm.activeGroup[resourceGroupName] == 0 {
return nil
}
counter, ok := rm.metricsMap[resourceGroupName]
if !ok {
counter = metrics.RunawayCheckerCounter.WithLabelValues(resourceGroupName, "hit", "")
rm.metricsMap[resourceGroupName] = counter
}
counter.Inc()
return newRunawayChecker(rm, resourceGroupName, group.RunawaySettings, originalSQL, sqlDigest, planDigest)
}

Expand Down Expand Up @@ -544,7 +554,9 @@ func (r *RunawayChecker) markQuarantine(now *time.Time) {
}

func (r *RunawayChecker) markRunaway(matchType RunawayMatchType, action rmpb.RunawayAction, now *time.Time) {
r.manager.markRunaway(r.resourceGroupName, r.originalSQL, r.planDigest, strings.ToLower(rmpb.RunawayAction_name[int32(action)]), matchType, now)
actionStr := strings.ToLower(rmpb.RunawayAction_name[int32(action)])
metrics.RunawayCheckerCounter.WithLabelValues(r.resourceGroupName, matchType.String(), actionStr).Inc()
r.manager.markRunaway(r.resourceGroupName, r.originalSQL, r.planDigest, actionStr, matchType, now)
}

func (r *RunawayChecker) getSettingConvictIdentifier() string {
Expand Down
1 change: 1 addition & 0 deletions pkg/metrics/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ go_library(
"meta.go",
"metrics.go",
"owner.go",
"resource_group.go",
"resourcemanager.go",
"server.go",
"session.go",
Expand Down
Loading

0 comments on commit af7b32c

Please sign in to comment.