Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

schedule: fix the bug that the kind of role change operator is marked as OpRegion #3107

Merged
merged 7 commits into from
Oct 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions server/schedule/operator/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -445,8 +445,6 @@ func (b *Builder) brief() string {

// Using Joint Consensus can ensure the replica safety and reduce the number of steps.
func (b *Builder) buildStepsWithJointConsensus(kind OpKind) (OpKind, error) {
kind |= OpRegion

// Add all the peers as Learner first. Split `Add Voter` to `Add Learner + Promote`
for _, add := range b.toAdd.IDs() {
peer := b.toAdd[add]
Expand All @@ -460,7 +458,9 @@ func (b *Builder) buildStepsWithJointConsensus(kind OpKind) (OpKind, error) {
} else {
b.execAddPeer(peer)
}
kind |= OpRegion
}

b.setTargetLeaderIfNotExist()
if b.targetLeaderStoreID == 0 {
return kind, errors.New("no valid leader")
Expand Down Expand Up @@ -502,6 +502,7 @@ func (b *Builder) buildStepsWithJointConsensus(kind OpKind) (OpKind, error) {
// Finally, remove all the peers as Learner
for _, remove := range b.toRemove.IDs() {
b.execRemovePeer(b.toRemove[remove])
kind |= OpRegion
}

return kind, nil
Expand Down
22 changes: 21 additions & 1 deletion server/schedule/operator/builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,37 +163,43 @@ func (s *testBuilderSuite) TestBuild(c *C) {
useJointConsensus bool
originPeers []*metapb.Peer // first is leader
targetPeers []*metapb.Peer // first is leader
steps []OpStep
kind OpKind
steps []OpStep // empty means error
}
cases := []testCase{
{ // empty step
false, false,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}},
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}},
0,
[]OpStep{},
},
{ // empty step
true, true,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}},
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}},
0,
[]OpStep{},
},
{ // no valid leader
false, false,
[]*metapb.Peer{{Id: 1, StoreId: 1}},
[]*metapb.Peer{{Id: 10, StoreId: 10}},
0,
[]OpStep{},
},
{ // no valid leader
true, true,
[]*metapb.Peer{{Id: 1, StoreId: 1}},
[]*metapb.Peer{{Id: 10, StoreId: 10}},
0,
[]OpStep{},
},
{ // promote learner
false, false,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2, Role: metapb.PeerRole_Learner}},
[]*metapb.Peer{{Id: 2, StoreId: 2}, {Id: 1, StoreId: 1}},
OpLeader,
[]OpStep{
PromoteLearner{ToStore: 2},
TransferLeader{FromStore: 1, ToStore: 2},
Expand All @@ -203,6 +209,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
true, true,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2, Role: metapb.PeerRole_Learner}},
[]*metapb.Peer{{Id: 2, StoreId: 2}, {Id: 1, StoreId: 1}},
OpLeader,
[]OpStep{
PromoteLearner{ToStore: 2},
TransferLeader{FromStore: 1, ToStore: 2},
Expand All @@ -212,6 +219,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
false, false,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}, {Id: 3, StoreId: 3, Role: metapb.PeerRole_Learner}},
[]*metapb.Peer{{StoreId: 4}, {StoreId: 5, Role: metapb.PeerRole_Learner}},
OpLeader | OpRegion,
[]OpStep{
AddLearner{ToStore: 4},
PromoteLearner{ToStore: 4},
Expand All @@ -226,6 +234,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
true, true,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}, {Id: 3, StoreId: 3, Role: metapb.PeerRole_Learner}},
[]*metapb.Peer{{StoreId: 4}, {StoreId: 5, Role: metapb.PeerRole_Learner}},
OpLeader | OpRegion,
[]OpStep{
AddLearner{ToStore: 4},
AddLearner{ToStore: 5},
Expand All @@ -247,6 +256,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
false, false,
[]*metapb.Peer{{Id: 1, StoreId: 1}},
[]*metapb.Peer{{StoreId: 2}},
OpLeader | OpRegion,
[]OpStep{
AddLearner{ToStore: 2},
PromoteLearner{ToStore: 2},
Expand All @@ -258,6 +268,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
true, true,
[]*metapb.Peer{{Id: 1, StoreId: 1}},
[]*metapb.Peer{{StoreId: 2}},
OpLeader | OpRegion,
[]OpStep{
AddLearner{ToStore: 2},
ChangePeerV2Enter{
Expand All @@ -276,6 +287,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
false, false,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}},
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2, Role: metapb.PeerRole_Learner}},
OpRegion,
[]OpStep{
RemovePeer{FromStore: 2},
AddLearner{ToStore: 2},
Expand All @@ -285,6 +297,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
true, true,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}},
[]*metapb.Peer{{StoreId: 1}, {StoreId: 2, Role: metapb.PeerRole_Learner}},
0, // Note that there is no OpRegion here
[]OpStep{
DemoteFollower{ToStore: 2},
},
Expand All @@ -296,6 +309,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
// z1,h1 z1,h2 z2,h1
[]*metapb.Peer{{StoreId: 9}, {StoreId: 7}, {StoreId: 10}},
// z2,h2 z1,h1 z3,h1
OpLeader | OpRegion,
[]OpStep{
// 6->7
AddLearner{ToStore: 7},
Expand All @@ -318,6 +332,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
true, false,
[]*metapb.Peer{{Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, {Id: 2, StoreId: 2, Role: metapb.PeerRole_Learner}},
[]*metapb.Peer{{Id: 2, StoreId: 2, Role: metapb.PeerRole_Voter}, {Id: 1, StoreId: 1, Role: metapb.PeerRole_Learner}, {Id: 3, StoreId: 3, Role: metapb.PeerRole_Learner}},
OpLeader | OpRegion,
[]OpStep{
PromoteLearner{ToStore: 2},
TransferLeader{FromStore: 1, ToStore: 2},
Expand All @@ -329,6 +344,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
false, false,
[]*metapb.Peer{{Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, {Id: 2, StoreId: 2, Role: metapb.PeerRole_Learner}},
[]*metapb.Peer{{Id: 2, StoreId: 2, Role: metapb.PeerRole_Voter}, {Id: 3, StoreId: 3, Role: metapb.PeerRole_Learner}},
OpLeader | OpRegion,
[]OpStep{
AddLearner{ToStore: 3},
PromoteLearner{ToStore: 2},
Expand All @@ -340,6 +356,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
true, false,
[]*metapb.Peer{{Id: 1, StoreId: 1, Role: metapb.PeerRole_Voter}, {Id: 2, StoreId: 2, Role: metapb.PeerRole_Learner}},
[]*metapb.Peer{{Id: 3, StoreId: 3, Role: metapb.PeerRole_Voter}, {Id: 1, StoreId: 1, Role: metapb.PeerRole_Learner}},
OpLeader | OpRegion,
[]OpStep{
AddLearner{ToStore: 3},
PromoteLearner{ToStore: 3},
Expand All @@ -353,6 +370,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
true, true,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}, {Id: 3, StoreId: 3, Role: metapb.PeerRole_Learner}},
[]*metapb.Peer{{Id: 2, StoreId: 2}, {Id: 3, StoreId: 3}},
OpLeader | OpRegion,
[]OpStep{
TransferLeader{FromStore: 1, ToStore: 2},
ChangePeerV2Enter{
Expand All @@ -370,6 +388,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
true, true,
[]*metapb.Peer{{Id: 1, StoreId: 1}, {Id: 2, StoreId: 2}, {Id: 3, StoreId: 3, Role: metapb.PeerRole_Learner}},
[]*metapb.Peer{{Id: 3, StoreId: 3}, {Id: 1, StoreId: 1}},
OpLeader | OpRegion,
[]OpStep{
ChangePeerV2Enter{
PromoteLearners: []PromoteLearner{{ToStore: 3}},
Expand Down Expand Up @@ -401,6 +420,7 @@ func (s *testBuilderSuite) TestBuild(c *C) {
continue
}
c.Assert(err, IsNil)
c.Assert(op.Kind(), Equals, tc.kind)
c.Assert(op.Len(), Equals, len(tc.steps))
for i := 0; i < op.Len(); i++ {
switch step := op.Step(i).(type) {
Expand Down
2 changes: 1 addition & 1 deletion server/schedule/operator/create_operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ func CreateLeaveJointStateOperator(desc string, cluster opt.Cluster, origin *cor
brief := b.brief()

// buildStepsWithJointConsensus
kind := OpRegion
var kind OpKind

b.setTargetLeaderIfNotExist()
if b.targetLeaderStoreID == 0 {
Expand Down
8 changes: 7 additions & 1 deletion server/schedule/operator/create_operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ func (s *testCreateOperatorSuite) SetUpTest(c *C) {
func (s *testCreateOperatorSuite) TestCreateLeaveJointStateOperator(c *C) {
type testCase struct {
originPeers []*metapb.Peer // first is leader
steps []OpStep
kind OpKind
steps []OpStep // empty means error
}
cases := []testCase{
{
Expand All @@ -65,6 +66,7 @@ func (s *testCreateOperatorSuite) TestCreateLeaveJointStateOperator(c *C) {
{Id: 3, StoreId: 3, Role: metapb.PeerRole_DemotingVoter},
{Id: 4, StoreId: 4, Role: metapb.PeerRole_IncomingVoter},
},
kind: 0,
steps: []OpStep{
ChangePeerV2Leave{
PromoteLearners: []PromoteLearner{{ToStore: 4}},
Expand All @@ -79,6 +81,7 @@ func (s *testCreateOperatorSuite) TestCreateLeaveJointStateOperator(c *C) {
{Id: 3, StoreId: 3, Role: metapb.PeerRole_DemotingVoter},
{Id: 4, StoreId: 4, Role: metapb.PeerRole_IncomingVoter},
},
kind: 0,
steps: []OpStep{
ChangePeerV2Leave{
PromoteLearners: []PromoteLearner{{ToStore: 1}, {ToStore: 4}},
Expand All @@ -93,6 +96,7 @@ func (s *testCreateOperatorSuite) TestCreateLeaveJointStateOperator(c *C) {
{Id: 3, StoreId: 3, Role: metapb.PeerRole_DemotingVoter},
{Id: 4, StoreId: 4, Role: metapb.PeerRole_IncomingVoter},
},
kind: OpLeader,
steps: []OpStep{
TransferLeader{FromStore: 1, ToStore: 2},
ChangePeerV2Leave{
Expand All @@ -108,6 +112,7 @@ func (s *testCreateOperatorSuite) TestCreateLeaveJointStateOperator(c *C) {
{Id: 3, StoreId: 3, Role: metapb.PeerRole_DemotingVoter},
{Id: 4, StoreId: 4, Role: metapb.PeerRole_IncomingVoter},
},
kind: OpLeader,
steps: []OpStep{
TransferLeader{FromStore: 1, ToStore: 4},
ChangePeerV2Leave{
Expand All @@ -126,6 +131,7 @@ func (s *testCreateOperatorSuite) TestCreateLeaveJointStateOperator(c *C) {
continue
}
c.Assert(err, IsNil)
c.Assert(op.Kind(), Equals, tc.kind)
c.Assert(len(op.steps), Equals, len(tc.steps))
for i := 0; i < op.Len(); i++ {
switch step := op.Step(i).(type) {
Expand Down
25 changes: 16 additions & 9 deletions server/schedule/operator/kind.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,22 @@ type OpKind uint32

// Flags for operators.
const (
OpLeader OpKind = 1 << iota // Include leader transfer.
OpRegion // Include peer movement.
OpSplit // Include region split.
OpAdmin // Initiated by admin.
OpHotRegion // Initiated by hot region scheduler.
OpReplica // Initiated by replica checkers.
OpMerge // Initiated by merge checkers or merge schedulers.
OpRange // Initiated by range scheduler.
// Include leader transfer.
OpLeader OpKind = 1 << iota
// Include peer addition or removal. This means that this operator may take a long time.
OpRegion
// Include region split. Initiated by rule checker if `kind & OpAdmin == 0`.
OpSplit
// Initiated by admin.
OpAdmin
// Initiated by hot region scheduler.
OpHotRegion
// Initiated by replica checker.
OpReplica
// Initiated by merge checker or merge scheduler. Note that it may not include region merge.
OpMerge
// Initiated by range scheduler.
OpRange
opMax
)

Expand Down Expand Up @@ -81,5 +89,4 @@ func ParseOperatorKind(str string) (OpKind, error) {
k |= flag
}
return k, nil

}
16 changes: 8 additions & 8 deletions server/schedule/operator/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ const (
// OperatorExpireTime is the duration that when an operator is not started
// after it, the operator will be considered expired.
OperatorExpireTime = 3 * time.Second
// LeaderOperatorWaitTime is the duration that when a leader operator runs
// longer than it, the operator will be considered timeout.
LeaderOperatorWaitTime = 10 * time.Second
// RegionOperatorWaitTime is the duration that when a region operator runs
// longer than it, the operator will be considered timeout.
RegionOperatorWaitTime = 10 * time.Minute
// FastOperatorWaitTime is the duration that when an operator that is not marked
// `OpRegion` runs longer than it, the operator will be considered timeout.
FastOperatorWaitTime = 10 * time.Second
// SlowOperatorWaitTime is the duration that when an operator marked `OpRegion`
// runs longer than it, the operator will be considered timeout.
SlowOperatorWaitTime = 10 * time.Minute
)

// Operator contains execution steps generated by scheduler.
Expand Down Expand Up @@ -202,9 +202,9 @@ func (o *Operator) CheckTimeout() bool {
return false
}
if o.kind&OpRegion != 0 {
return o.status.CheckTimeout(RegionOperatorWaitTime)
return o.status.CheckTimeout(SlowOperatorWaitTime)
}
return o.status.CheckTimeout(LeaderOperatorWaitTime)
return o.status.CheckTimeout(FastOperatorWaitTime)
}

// Len returns the operator's steps count.
Expand Down
14 changes: 7 additions & 7 deletions server/schedule/operator/operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func (s *testOperatorSuite) TestOperator(c *C) {
op.Start()
c.Assert(op.Check(region), IsNil)
c.Assert(op.Status(), Equals, SUCCESS)
SetOperatorStatusReachTime(op, STARTED, time.Now().Add(-RegionOperatorWaitTime-time.Second))
SetOperatorStatusReachTime(op, STARTED, time.Now().Add(-SlowOperatorWaitTime-time.Second))
c.Assert(op.CheckTimeout(), IsFalse)

// addPeer1, transferLeader1, removePeer2
Expand All @@ -126,9 +126,9 @@ func (s *testOperatorSuite) TestOperator(c *C) {
c.Assert(op.Check(region), Equals, RemovePeer{FromStore: 2})
c.Assert(atomic.LoadInt32(&op.currentStep), Equals, int32(2))
c.Assert(op.CheckTimeout(), IsFalse)
SetOperatorStatusReachTime(op, STARTED, op.GetStartTime().Add(-LeaderOperatorWaitTime-time.Second))
SetOperatorStatusReachTime(op, STARTED, op.GetStartTime().Add(-FastOperatorWaitTime-time.Second))
c.Assert(op.CheckTimeout(), IsFalse)
SetOperatorStatusReachTime(op, STARTED, op.GetStartTime().Add(-RegionOperatorWaitTime-time.Second))
SetOperatorStatusReachTime(op, STARTED, op.GetStartTime().Add(-SlowOperatorWaitTime-time.Second))
c.Assert(op.CheckTimeout(), IsTrue)
res, err := json.Marshal(op)
c.Assert(err, IsNil)
Expand All @@ -139,7 +139,7 @@ func (s *testOperatorSuite) TestOperator(c *C) {
op = s.newTestOperator(1, OpLeader, steps...)
op.Start()
c.Assert(op.CheckTimeout(), IsFalse)
SetOperatorStatusReachTime(op, STARTED, op.GetStartTime().Add(-LeaderOperatorWaitTime-time.Second))
SetOperatorStatusReachTime(op, STARTED, op.GetStartTime().Add(-FastOperatorWaitTime-time.Second))
c.Assert(op.CheckTimeout(), IsTrue)
}

Expand Down Expand Up @@ -292,7 +292,7 @@ func (s *testOperatorSuite) TestCheckTimeout(c *C) {
c.Assert(op.Status(), Equals, CREATED)
c.Assert(op.Start(), IsTrue)
op.currentStep = int32(len(op.steps))
SetOperatorStatusReachTime(op, STARTED, time.Now().Add(-RegionOperatorWaitTime))
SetOperatorStatusReachTime(op, STARTED, time.Now().Add(-SlowOperatorWaitTime))
c.Assert(op.CheckTimeout(), IsFalse)
c.Assert(op.Status(), Equals, SUCCESS)
}
Expand Down Expand Up @@ -353,7 +353,7 @@ func (s *testOperatorSuite) TestCheck(c *C) {
c.Assert(op.Start(), IsTrue)
c.Assert(op.Check(region), NotNil)
c.Assert(op.Status(), Equals, STARTED)
op.status.setTime(STARTED, time.Now().Add(-RegionOperatorWaitTime))
op.status.setTime(STARTED, time.Now().Add(-SlowOperatorWaitTime))
c.Assert(op.Check(region), NotNil)
c.Assert(op.Status(), Equals, TIMEOUT)
}
Expand All @@ -368,7 +368,7 @@ func (s *testOperatorSuite) TestCheck(c *C) {
c.Assert(op.Start(), IsTrue)
c.Assert(op.Check(region), NotNil)
c.Assert(op.Status(), Equals, STARTED)
op.status.setTime(STARTED, time.Now().Add(-RegionOperatorWaitTime))
op.status.setTime(STARTED, time.Now().Add(-SlowOperatorWaitTime))
region = s.newTestRegion(1, 1, [2]uint64{1, 1})
c.Assert(op.Check(region), IsNil)
c.Assert(op.Status(), Equals, SUCCESS)
Expand Down
2 changes: 1 addition & 1 deletion server/schedule/operator_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ func (t *testOperatorControllerSuite) TestCheckAddUnexpectedStatus(c *C) {
op := operator.NewOperator("test", "test", 1, &metapb.RegionEpoch{}, operator.OpRegion, steps...)
c.Assert(oc.checkAddOperator(op), IsTrue)
op.Start()
operator.SetOperatorStatusReachTime(op, operator.STARTED, time.Now().Add(-operator.RegionOperatorWaitTime))
operator.SetOperatorStatusReachTime(op, operator.STARTED, time.Now().Add(-operator.SlowOperatorWaitTime))
c.Assert(op.CheckTimeout(), IsTrue)
c.Assert(oc.checkAddOperator(op), IsFalse)
}
Expand Down