Skip to content

Commit

Permalink
scheduler: skip evict-leader-scheduler when setting schedule deny lab…
Browse files Browse the repository at this point in the history
…el (tikv#8303)

ref tikv#7300, close tikv#7853

- add a real cluster test to test `skip evict-leader-scheduler when setting schedule deny label`
- add `DeleteStoreLabel` API and `DeleteScheduler` API

Signed-off-by: okJiang <819421878@qq.com>
  • Loading branch information
okJiang authored and rleungx committed Sep 10, 2024
1 parent d71a1a3 commit 463297b
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 18 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,5 @@ coverage.xml
coverage
*.txt
go.work*
embedded_assets_handler.go
*.log
38 changes: 20 additions & 18 deletions pkg/schedule/schedulers/scheduler_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,8 @@ func (s *ScheduleController) Stop() {

// Schedule tries to create some operators.
func (s *ScheduleController) Schedule(diagnosable bool) []*operator.Operator {
_, isEvictLeaderScheduler := s.Scheduler.(*evictLeaderScheduler)
retry:
for i := 0; i < maxScheduleRetries; i++ {
// no need to retry if schedule should stop to speed exit
select {
Expand All @@ -466,29 +468,29 @@ func (s *ScheduleController) Schedule(diagnosable bool) []*operator.Operator {
if diagnosable {
s.diagnosticRecorder.SetResultFromPlans(ops, plans)
}
foundDisabled := false
if len(ops) == 0 {
continue
}
// If we have schedule, reset interval to the minimal interval.
s.nextInterval = s.Scheduler.GetMinInterval()
for _, op := range ops {
if labelMgr := s.cluster.GetRegionLabeler(); labelMgr != nil {
region := s.cluster.GetRegion(op.RegionID())
if region == nil {
continue
}
if labelMgr.ScheduleDisabled(region) {
denySchedulersByLabelerCounter.Inc()
foundDisabled = true
break
}
region := s.cluster.GetRegion(op.RegionID())
if region == nil {
continue retry
}
}
if len(ops) > 0 {
// If we have schedule, reset interval to the minimal interval.
s.nextInterval = s.Scheduler.GetMinInterval()
// try regenerating operators
if foundDisabled {
labelMgr := s.cluster.GetRegionLabeler()
if labelMgr == nil {
continue
}
return ops

// If the evict-leader-scheduler is disabled, it will obstruct the restart operation of tikv by the operator.
// Refer: https://docs.pingcap.com/tidb-in-kubernetes/stable/restart-a-tidb-cluster#perform-a-graceful-restart-to-a-single-tikv-pod
if labelMgr.ScheduleDisabled(region) && !isEvictLeaderScheduler {
denySchedulersByLabelerCounter.Inc()
continue retry
}
}
return ops
}
s.nextInterval = s.Scheduler.GetNextInterval(s.nextInterval)
return nil
Expand Down
3 changes: 3 additions & 0 deletions server/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -1364,6 +1364,9 @@ func (c *RaftCluster) DeleteStoreLabel(storeID uint64, labelKey string) error {
if store == nil {
return errs.ErrInvalidStoreID.FastGenByArgs(storeID)
}
if len(store.GetLabels()) == 0 {
return errors.Errorf("the label key %s does not exist", labelKey)
}
newStore := typeutil.DeepClone(store.GetMeta(), core.StoreFactory)
labels := make([]*metapb.StoreLabel, 0, len(newStore.GetLabels())-1)
for _, label := range newStore.GetLabels() {
Expand Down

0 comments on commit 463297b

Please sign in to comment.