Skip to content

Commit

Permalink
schedule: fix the bug that the kind of role change operator is marked…
Browse files Browse the repository at this point in the history
… as OpRegion (tikv#3107)

Signed-off-by: Zheng Xiangsheng <hundundm@gmail.com>
  • Loading branch information
HunDunDM authored Oct 28, 2020
1 parent a700fff commit 52f5010
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 30 deletions.
5 changes: 3 additions & 2 deletions server/schedule/operator/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -445,8 +445,6 @@ func (b *Builder) brief() string {

// Using Joint Consensus can ensure the replica safety and reduce the number of steps.
func (b *Builder) buildStepsWithJointConsensus(kind OpKind) (OpKind, error) {
kind |= OpRegion

// Add all the peers as Learner first. Split `Add Voter` to `Add Learner + Promote`
for _, add := range b.toAdd.IDs() {
peer := b.toAdd[add]
Expand All @@ -460,7 +458,9 @@ func (b *Builder) buildStepsWithJointConsensus(kind OpKind) (OpKind, error) {
} else {
b.execAddPeer(peer)
}
kind |= OpRegion
}

b.setTargetLeaderIfNotExist()
if b.targetLeaderStoreID == 0 {
return kind, errors.New("no valid leader")
Expand Down Expand Up @@ -502,6 +502,7 @@ func (b *Builder) buildStepsWithJointConsensus(kind OpKind) (OpKind, error) {
// Finally, remove all the peers as Learner
for _, remove := range b.toRemove.IDs() {
b.execRemovePeer(b.toRemove[remove])
kind |= OpRegion
}

return kind, nil
Expand Down
22 changes: 21 additions & 1 deletion server/schedule/operator/builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,37 +163,43 @@ func (s *testBuilderSuite) TestBuild(c *C) {
useJointConsensus bool
originPeers []*metapb.Peer // first is leader
targetPeers []*metapb.Peer // first is leader
steps []OpStep
kind OpKind
steps []OpStep // empty means error
}
cases := []testCase{
{ // empty step
false, false,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}},
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}},
0,
[]OpStep{},
},
{ // empty step
true, true,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}},
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}},
0,
[]OpStep{},
},
{ // no valid leader
false, false,
[]*metapb.Peer{{Id: 1, StoreId: 1}},
[]*metapb.Peer{{Id: 10, StoreId: 10}},
0,
[]OpStep{},
},
{ // no valid leader
true, true,
[]*metapb.Peer{{Id: 1, StoreId: 1}},
[]*metapb.Peer{{Id: 10, StoreId: 10}},
0,
[]OpStep{},
},
{ // promote learner
false, false,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2, Role: metapb.PeerRole_Learner}},
[]*metapb.Peer{{Id: 2, StoreId: 2}, {Id: 1, StoreId: 1}},
OpLeader,
[]OpStep{
PromoteLearner{ToStore: 2},
TransferLeader{FromStore: 1, ToStore: 2},
Expand All @@ -203,6 +209,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
true, true,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2, Role: metapb.PeerRole_Learner}},
[]*metapb.Peer{{Id: 2, StoreId: 2}, {Id: 1, StoreId: 1}},
OpLeader,
[]OpStep{
PromoteLearner{ToStore: 2},
TransferLeader{FromStore: 1, ToStore: 2},
Expand All @@ -212,6 +219,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
false, false,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}, {Id: 3, StoreId: 3, Role: metapb.PeerRole_Learner}},
[]*metapb.Peer{{StoreId: 4}, {StoreId: 5, Role: metapb.PeerRole_Learner}},
OpLeader | OpRegion,
[]OpStep{
AddLearner{ToStore: 4},
PromoteLearner{ToStore: 4},
Expand All @@ -226,6 +234,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
true, true,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}, {Id: 3, StoreId: 3, Role: metapb.PeerRole_Learner}},
[]*metapb.Peer{{StoreId: 4}, {StoreId: 5, Role: metapb.PeerRole_Learner}},
OpLeader | OpRegion,
[]OpStep{
AddLearner{ToStore: 4},
AddLearner{ToStore: 5},
Expand All @@ -247,6 +256,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
false, false,
[]*metapb.Peer{{Id: 1, StoreId: 1}},
[]*metapb.Peer{{StoreId: 2}},
OpLeader | OpRegion,
[]OpStep{
AddLearner{ToStore: 2},
PromoteLearner{ToStore: 2},
Expand All @@ -258,6 +268,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
true, true,
[]*metapb.Peer{{Id: 1, StoreId: 1}},
[]*metapb.Peer{{StoreId: 2}},
OpLeader | OpRegion,
[]OpStep{
AddLearner{ToStore: 2},
ChangePeerV2Enter{
Expand All @@ -276,6 +287,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
false, false,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}},
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2, Role: metapb.PeerRole_Learner}},
OpRegion,
[]OpStep{
RemovePeer{FromStore: 2},
AddLearner{ToStore: 2},
Expand All @@ -285,6 +297,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
true, true,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}},
[]*metapb.Peer{{StoreId: 1}, {StoreId: 2, Role: metapb.PeerRole_Learner}},
0, // Note that there is no OpRegion here
[]OpStep{
DemoteFollower{ToStore: 2},
},
Expand All @@ -296,6 +309,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
// z1,h1 z1,h2 z2,h1
[]*metapb.Peer{{StoreId: 9}, {StoreId: 7}, {StoreId: 10}},
// z2,h2 z1,h1 z3,h1
OpLeader | OpRegion,
[]OpStep{
// 6->7
AddLearner{ToStore: 7},
Expand All @@ -318,6 +332,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
true, false,
[]*metapb.Peer{{Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, {Id: 2, StoreId: 2, Role: metapb.PeerRole_Learner}},
[]*metapb.Peer{{Id: 2, StoreId: 2, Role: metapb.PeerRole_Voter}, {Id: 1, StoreId: 1, Role: metapb.PeerRole_Learner}, {Id: 3, StoreId: 3, Role: metapb.PeerRole_Learner}},
OpLeader | OpRegion,
[]OpStep{
PromoteLearner{ToStore: 2},
TransferLeader{FromStore: 1, ToStore: 2},
Expand All @@ -329,6 +344,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
false, false,
[]*metapb.Peer{{Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, {Id: 2, StoreId: 2, Role: metapb.PeerRole_Learner}},
[]*metapb.Peer{{Id: 2, StoreId: 2, Role: metapb.PeerRole_Voter}, {Id: 3, StoreId: 3, Role: metapb.PeerRole_Learner}},
OpLeader | OpRegion,
[]OpStep{
AddLearner{ToStore: 3},
PromoteLearner{ToStore: 2},
Expand All @@ -340,6 +356,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
true, false,
[]*metapb.Peer{{Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, {Id: 2, StoreId: 2, Role: metapb.PeerRole_Learner}},
[]*metapb.Peer{{Id: 3, StoreId: 3, Role: metapb.PeerRole_Voter}, {Id: 1, StoreId: 1, Role: metapb.PeerRole_Learner}},
OpLeader | OpRegion,
[]OpStep{
AddLearner{ToStore: 3},
PromoteLearner{ToStore: 3},
Expand All @@ -353,6 +370,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
true, true,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}, {Id: 3, StoreId: 3, Role: metapb.PeerRole_Learner}},
[]*metapb.Peer{{Id: 2, StoreId: 2}, {Id: 3, StoreId: 3}},
OpLeader | OpRegion,
[]OpStep{
TransferLeader{FromStore: 1, ToStore: 2},
ChangePeerV2Enter{
Expand All @@ -370,6 +388,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
true, true,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}, {Id: 3, StoreId: 3, Role: metapb.PeerRole_Learner}},
[]*metapb.Peer{{Id: 3, StoreId: 3}, {Id: 1, StoreId: 1}},
OpLeader | OpRegion,
[]OpStep{
ChangePeerV2Enter{
PromoteLearners: []PromoteLearner{{ToStore: 3}},
Expand Down Expand Up @@ -401,6 +420,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
continue
}
c.Assert(err, IsNil)
c.Assert(op.Kind(), Equals, tc.kind)
c.Assert(op.Len(), Equals, len(tc.steps))
for i := 0; i < op.Len(); i++ {
switch step := op.Step(i).(type) {
Expand Down
2 changes: 1 addition & 1 deletion server/schedule/operator/create_operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ func CreateLeaveJointStateOperator(desc string, cluster opt.Cluster, origin *cor
brief := b.brief()

// buildStepsWithJointConsensus
kind := OpRegion
var kind OpKind

b.setTargetLeaderIfNotExist()
if b.targetLeaderStoreID == 0 {
Expand Down
8 changes: 7 additions & 1 deletion server/schedule/operator/create_operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ func (s *testCreateOperatorSuite) SetUpTest(c *C) {
func (s *testCreateOperatorSuite) TestCreateLeaveJointStateOperator(c *C) {
type testCase struct {
originPeers []*metapb.Peer // first is leader
steps []OpStep
kind OpKind
steps []OpStep // empty means error
}
cases := []testCase{
{
Expand All @@ -65,6 +66,7 @@ func (s *testCreateOperatorSuite) TestCreateLeaveJointStateOperator(c *C) {
{Id: 3, StoreId: 3, Role: metapb.PeerRole_DemotingVoter},
{Id: 4, StoreId: 4, Role: metapb.PeerRole_IncomingVoter},
},
kind: 0,
steps: []OpStep{
ChangePeerV2Leave{
PromoteLearners: []PromoteLearner{{ToStore: 4}},
Expand All @@ -79,6 +81,7 @@ func (s *testCreateOperatorSuite) TestCreateLeaveJointStateOperator(c *C) {
{Id: 3, StoreId: 3, Role: metapb.PeerRole_DemotingVoter},
{Id: 4, StoreId: 4, Role: metapb.PeerRole_IncomingVoter},
},
kind: 0,
steps: []OpStep{
ChangePeerV2Leave{
PromoteLearners: []PromoteLearner{{ToStore: 1}, {ToStore: 4}},
Expand All @@ -93,6 +96,7 @@ func (s *testCreateOperatorSuite) TestCreateLeaveJointStateOperator(c *C) {
{Id: 3, StoreId: 3, Role: metapb.PeerRole_DemotingVoter},
{Id: 4, StoreId: 4, Role: metapb.PeerRole_IncomingVoter},
},
kind: OpLeader,
steps: []OpStep{
TransferLeader{FromStore: 1, ToStore: 2},
ChangePeerV2Leave{
Expand All @@ -108,6 +112,7 @@ func (s *testCreateOperatorSuite) TestCreateLeaveJointStateOperator(c *C) {
{Id: 3, StoreId: 3, Role: metapb.PeerRole_DemotingVoter},
{Id: 4, StoreId: 4, Role: metapb.PeerRole_IncomingVoter},
},
kind: OpLeader,
steps: []OpStep{
TransferLeader{FromStore: 1, ToStore: 4},
ChangePeerV2Leave{
Expand All @@ -126,6 +131,7 @@ func (s *testCreateOperatorSuite) TestCreateLeaveJointStateOperator(c *C) {
continue
}
c.Assert(err, IsNil)
c.Assert(op.Kind(), Equals, tc.kind)
c.Assert(len(op.steps), Equals, len(tc.steps))
for i := 0; i < op.Len(); i++ {
switch step := op.Step(i).(type) {
Expand Down
25 changes: 16 additions & 9 deletions server/schedule/operator/kind.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,22 @@ type OpKind uint32

// Flags for operators.
const (
OpLeader OpKind = 1 << iota // Include leader transfer.
OpRegion // Include peer movement.
OpSplit // Include region split.
OpAdmin // Initiated by admin.
OpHotRegion // Initiated by hot region scheduler.
OpReplica // Initiated by replica checkers.
OpMerge // Initiated by merge checkers or merge schedulers.
OpRange // Initiated by range scheduler.
// Include leader transfer.
OpLeader OpKind = 1 << iota
// Include peer addition or removal. This means that this operator may take a long time.
OpRegion
// Include region split. Initiated by rule checker if `kind & OpAdmin == 0`.
OpSplit
// Initiated by admin.
OpAdmin
// Initiated by hot region scheduler.
OpHotRegion
// Initiated by replica checker.
OpReplica
// Initiated by merge checker or merge scheduler. Note that it may not include region merge.
OpMerge
// Initiated by range scheduler.
OpRange
opMax
)

Expand Down Expand Up @@ -81,5 +89,4 @@ func ParseOperatorKind(str string) (OpKind, error) {
k |= flag
}
return k, nil

}
16 changes: 8 additions & 8 deletions server/schedule/operator/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ const (
// OperatorExpireTime is the duration that when an operator is not started
// after it, the operator will be considered expired.
OperatorExpireTime = 3 * time.Second
// LeaderOperatorWaitTime is the duration that when a leader operator runs
// longer than it, the operator will be considered timeout.
LeaderOperatorWaitTime = 10 * time.Second
// RegionOperatorWaitTime is the duration that when a region operator runs
// longer than it, the operator will be considered timeout.
RegionOperatorWaitTime = 10 * time.Minute
// FastOperatorWaitTime is the duration that when an operator that is not marked
// `OpRegion` runs longer than it, the operator will be considered timeout.
FastOperatorWaitTime = 10 * time.Second
// SlowOperatorWaitTime is the duration that when an operator marked `OpRegion`
// runs longer than it, the operator will be considered timeout.
SlowOperatorWaitTime = 10 * time.Minute
)

// Operator contains execution steps generated by scheduler.
Expand Down Expand Up @@ -202,9 +202,9 @@ func (o *Operator) CheckTimeout() bool {
return false
}
if o.kind&OpRegion != 0 {
return o.status.CheckTimeout(RegionOperatorWaitTime)
return o.status.CheckTimeout(SlowOperatorWaitTime)
}
return o.status.CheckTimeout(LeaderOperatorWaitTime)
return o.status.CheckTimeout(FastOperatorWaitTime)
}

// Len returns the operator's steps count.
Expand Down
14 changes: 7 additions & 7 deletions server/schedule/operator/operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func (s *testOperatorSuite) TestOperator(c *C) {
op.Start()
c.Assert(op.Check(region), IsNil)
c.Assert(op.Status(), Equals, SUCCESS)
SetOperatorStatusReachTime(op, STARTED, time.Now().Add(-RegionOperatorWaitTime-time.Second))
SetOperatorStatusReachTime(op, STARTED, time.Now().Add(-SlowOperatorWaitTime-time.Second))
c.Assert(op.CheckTimeout(), IsFalse)

// addPeer1, transferLeader1, removePeer2
Expand All @@ -126,9 +126,9 @@ func (s *testOperatorSuite) TestOperator(c *C) {
c.Assert(op.Check(region), Equals, RemovePeer{FromStore: 2})
c.Assert(atomic.LoadInt32(&op.currentStep), Equals, int32(2))
c.Assert(op.CheckTimeout(), IsFalse)
SetOperatorStatusReachTime(op, STARTED, op.GetStartTime().Add(-LeaderOperatorWaitTime-time.Second))
SetOperatorStatusReachTime(op, STARTED, op.GetStartTime().Add(-FastOperatorWaitTime-time.Second))
c.Assert(op.CheckTimeout(), IsFalse)
SetOperatorStatusReachTime(op, STARTED, op.GetStartTime().Add(-RegionOperatorWaitTime-time.Second))
SetOperatorStatusReachTime(op, STARTED, op.GetStartTime().Add(-SlowOperatorWaitTime-time.Second))
c.Assert(op.CheckTimeout(), IsTrue)
res, err := json.Marshal(op)
c.Assert(err, IsNil)
Expand All @@ -139,7 +139,7 @@ func (s *testOperatorSuite) TestOperator(c *C) {
op = s.newTestOperator(1, OpLeader, steps...)
op.Start()
c.Assert(op.CheckTimeout(), IsFalse)
SetOperatorStatusReachTime(op, STARTED, op.GetStartTime().Add(-LeaderOperatorWaitTime-time.Second))
SetOperatorStatusReachTime(op, STARTED, op.GetStartTime().Add(-FastOperatorWaitTime-time.Second))
c.Assert(op.CheckTimeout(), IsTrue)
}

Expand Down Expand Up @@ -292,7 +292,7 @@ func (s *testOperatorSuite) TestCheckTimeout(c *C) {
c.Assert(op.Status(), Equals, CREATED)
c.Assert(op.Start(), IsTrue)
op.currentStep = int32(len(op.steps))
SetOperatorStatusReachTime(op, STARTED, time.Now().Add(-RegionOperatorWaitTime))
SetOperatorStatusReachTime(op, STARTED, time.Now().Add(-SlowOperatorWaitTime))
c.Assert(op.CheckTimeout(), IsFalse)
c.Assert(op.Status(), Equals, SUCCESS)
}
Expand Down Expand Up @@ -353,7 +353,7 @@ func (s *testOperatorSuite) TestCheck(c *C) {
c.Assert(op.Start(), IsTrue)
c.Assert(op.Check(region), NotNil)
c.Assert(op.Status(), Equals, STARTED)
op.status.setTime(STARTED, time.Now().Add(-RegionOperatorWaitTime))
op.status.setTime(STARTED, time.Now().Add(-SlowOperatorWaitTime))
c.Assert(op.Check(region), NotNil)
c.Assert(op.Status(), Equals, TIMEOUT)
}
Expand All @@ -368,7 +368,7 @@ func (s *testOperatorSuite) TestCheck(c *C) {
c.Assert(op.Start(), IsTrue)
c.Assert(op.Check(region), NotNil)
c.Assert(op.Status(), Equals, STARTED)
op.status.setTime(STARTED, time.Now().Add(-RegionOperatorWaitTime))
op.status.setTime(STARTED, time.Now().Add(-SlowOperatorWaitTime))
region = s.newTestRegion(1, 1, [2]uint64{1, 1})
c.Assert(op.Check(region), IsNil)
c.Assert(op.Status(), Equals, SUCCESS)
Expand Down
2 changes: 1 addition & 1 deletion server/schedule/operator_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ func (t *testOperatorControllerSuite) TestCheckAddUnexpectedStatus(c *C) {
op := operator.NewOperator("test", "test", 1, &metapb.RegionEpoch{}, operator.OpRegion, steps...)
c.Assert(oc.checkAddOperator(op), IsTrue)
op.Start()
operator.SetOperatorStatusReachTime(op, operator.STARTED, time.Now().Add(-operator.RegionOperatorWaitTime))
operator.SetOperatorStatusReachTime(op, operator.STARTED, time.Now().Add(-operator.SlowOperatorWaitTime))
c.Assert(op.CheckTimeout(), IsTrue)
c.Assert(oc.checkAddOperator(op), IsFalse)
}
Expand Down

0 comments on commit 52f5010

Please sign in to comment.