Skip to content

Commit

Permalink
enhance: refactor leader_observer to leader_checker (#29454)
Browse files Browse the repository at this point in the history
issue: #29453 

sync distribution by rpc will also call loadSegment/releaseSegment,
which may cause all kinds of concurrent case on same segment, such as
concurrent load and release on one segment.
This PR add leader_checker which generate load/release task to correct
the leader view, instead of calling sync distribution by rpc

---------

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
  • Loading branch information
weiliu1031 authored Jan 5, 2024
1 parent 9e2e715 commit e98c62a
Show file tree
Hide file tree
Showing 19 changed files with 698 additions and 942 deletions.
5 changes: 3 additions & 2 deletions internal/querycoordv2/checkers/balance_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/internal/querycoordv2/task"
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
Expand All @@ -55,8 +56,8 @@ func NewBalanceChecker(meta *meta.Meta, balancer balance.Balance, nodeMgr *sessi
}
}

func (b *BalanceChecker) ID() CheckerType {
return balanceChecker
func (b *BalanceChecker) ID() utils.CheckerType {
return utils.BalanceChecker
}

func (b *BalanceChecker) Description() string {
Expand Down
4 changes: 2 additions & 2 deletions internal/querycoordv2/checkers/channel_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ func NewChannelChecker(
}
}

func (c *ChannelChecker) ID() CheckerType {
return channelChecker
func (c *ChannelChecker) ID() utils.CheckerType {
return utils.ChannelChecker
}

func (c *ChannelChecker) Description() string {
Expand Down
3 changes: 2 additions & 1 deletion internal/querycoordv2/checkers/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ import (
"sync/atomic"

"github.com/milvus-io/milvus/internal/querycoordv2/task"
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
)

type Checker interface {
ID() CheckerType
ID() utils.CheckerType
Description() string
Check(ctx context.Context) []task.Task
IsActive() bool
Expand Down
78 changes: 26 additions & 52 deletions internal/querycoordv2/checkers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,44 +29,15 @@ import (
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/internal/querycoordv2/task"
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
"github.com/milvus-io/milvus/pkg/log"
)

const (
segmentCheckerName = "segment_checker"
channelCheckerName = "channel_checker"
balanceCheckerName = "balance_checker"
indexCheckerName = "index_checker"
)

type CheckerType int32

const (
channelChecker CheckerType = iota + 1
segmentChecker
balanceChecker
indexChecker
)

var (
checkRoundTaskNumLimit = 256
checkerOrder = []string{channelCheckerName, segmentCheckerName, balanceCheckerName, indexCheckerName}
checkerNames = map[CheckerType]string{
segmentChecker: segmentCheckerName,
channelChecker: channelCheckerName,
balanceChecker: balanceCheckerName,
indexChecker: indexCheckerName,
}
errTypeNotFound = errors.New("checker type not found")
)

func (s CheckerType) String() string {
return checkerNames[s]
}
var errTypeNotFound = errors.New("checker type not found")

type CheckerController struct {
cancel context.CancelFunc
manualCheckChs map[CheckerType]chan struct{}
manualCheckChs map[utils.CheckerType]chan struct{}
meta *meta.Meta
dist *meta.DistributionManager
targetMgr *meta.TargetManager
Expand All @@ -75,7 +46,7 @@ type CheckerController struct {
balancer balance.Balance

scheduler task.Scheduler
checkers map[CheckerType]Checker
checkers map[utils.CheckerType]Checker

stopOnce sync.Once
}
Expand All @@ -91,17 +62,18 @@ func NewCheckerController(
) *CheckerController {
// CheckerController runs checkers with the order,
// the former checker has higher priority
checkers := map[CheckerType]Checker{
channelChecker: NewChannelChecker(meta, dist, targetMgr, balancer),
segmentChecker: NewSegmentChecker(meta, dist, targetMgr, balancer, nodeMgr),
balanceChecker: NewBalanceChecker(meta, balancer, nodeMgr, scheduler),
indexChecker: NewIndexChecker(meta, dist, broker, nodeMgr),
checkers := map[utils.CheckerType]Checker{
utils.ChannelChecker: NewChannelChecker(meta, dist, targetMgr, balancer),
utils.SegmentChecker: NewSegmentChecker(meta, dist, targetMgr, balancer, nodeMgr),
utils.BalanceChecker: NewBalanceChecker(meta, balancer, nodeMgr, scheduler),
utils.IndexChecker: NewIndexChecker(meta, dist, broker, nodeMgr),
utils.LeaderChecker: NewLeaderChecker(meta, dist, targetMgr, nodeMgr),
}

manualCheckChs := map[CheckerType]chan struct{}{
channelChecker: make(chan struct{}, 1),
segmentChecker: make(chan struct{}, 1),
balanceChecker: make(chan struct{}, 1),
manualCheckChs := map[utils.CheckerType]chan struct{}{
utils.ChannelChecker: make(chan struct{}, 1),
utils.SegmentChecker: make(chan struct{}, 1),
utils.BalanceChecker: make(chan struct{}, 1),
}

return &CheckerController{
Expand All @@ -124,22 +96,24 @@ func (controller *CheckerController) Start() {
}
}

func getCheckerInterval(checker CheckerType) time.Duration {
func getCheckerInterval(checker utils.CheckerType) time.Duration {
switch checker {
case segmentChecker:
case utils.SegmentChecker:
return Params.QueryCoordCfg.SegmentCheckInterval.GetAsDuration(time.Millisecond)
case channelChecker:
case utils.ChannelChecker:
return Params.QueryCoordCfg.ChannelCheckInterval.GetAsDuration(time.Millisecond)
case balanceChecker:
case utils.BalanceChecker:
return Params.QueryCoordCfg.BalanceCheckInterval.GetAsDuration(time.Millisecond)
case indexChecker:
case utils.IndexChecker:
return Params.QueryCoordCfg.IndexCheckInterval.GetAsDuration(time.Millisecond)
case utils.LeaderChecker:
return Params.QueryCoordCfg.LeaderViewUpdateInterval.GetAsDuration(time.Millisecond)
default:
return Params.QueryCoordCfg.CheckInterval.GetAsDuration(time.Millisecond)
}
}

func (controller *CheckerController) startChecker(ctx context.Context, checker CheckerType) {
func (controller *CheckerController) startChecker(ctx context.Context, checker utils.CheckerType) {
interval := getCheckerInterval(checker)
ticker := time.NewTicker(interval)
defer ticker.Stop()
Expand Down Expand Up @@ -180,7 +154,7 @@ func (controller *CheckerController) Check() {
}

// check is the real implementation of Check
func (controller *CheckerController) check(ctx context.Context, checkType CheckerType) {
func (controller *CheckerController) check(ctx context.Context, checkType utils.CheckerType) {
checker := controller.checkers[checkType]
tasks := checker.Check(ctx)

Expand All @@ -193,7 +167,7 @@ func (controller *CheckerController) check(ctx context.Context, checkType Checke
}
}

func (controller *CheckerController) Deactivate(typ CheckerType) error {
func (controller *CheckerController) Deactivate(typ utils.CheckerType) error {
for _, checker := range controller.checkers {
if checker.ID() == typ {
checker.Deactivate()
Expand All @@ -203,7 +177,7 @@ func (controller *CheckerController) Deactivate(typ CheckerType) error {
return errTypeNotFound
}

func (controller *CheckerController) Activate(typ CheckerType) error {
func (controller *CheckerController) Activate(typ utils.CheckerType) error {
for _, checker := range controller.checkers {
if checker.ID() == typ {
checker.Activate()
Expand All @@ -213,7 +187,7 @@ func (controller *CheckerController) Activate(typ CheckerType) error {
return errTypeNotFound
}

func (controller *CheckerController) IsActive(typ CheckerType) (bool, error) {
func (controller *CheckerController) IsActive(typ utils.CheckerType) (bool, error) {
for _, checker := range controller.checkers {
if checker.ID() == typ {
return checker.IsActive(), nil
Expand Down
15 changes: 8 additions & 7 deletions internal/querycoordv2/checkers/controller_base_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/internal/querycoordv2/task"
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
"github.com/milvus-io/milvus/pkg/util/etcd"
"github.com/milvus-io/milvus/pkg/util/paramtable"
)
Expand Down Expand Up @@ -80,28 +81,28 @@ func (suite *ControllerBaseTestSuite) SetupTest() {
}

func (s *ControllerBaseTestSuite) TestActivation() {
active, err := s.controller.IsActive(segmentChecker)
active, err := s.controller.IsActive(utils.SegmentChecker)
s.NoError(err)
s.True(active)
err = s.controller.Deactivate(segmentChecker)
err = s.controller.Deactivate(utils.SegmentChecker)
s.NoError(err)
active, err = s.controller.IsActive(segmentChecker)
active, err = s.controller.IsActive(utils.SegmentChecker)
s.NoError(err)
s.False(active)
err = s.controller.Activate(segmentChecker)
err = s.controller.Activate(utils.SegmentChecker)
s.NoError(err)
active, err = s.controller.IsActive(segmentChecker)
active, err = s.controller.IsActive(utils.SegmentChecker)
s.NoError(err)
s.True(active)

invalidTyp := -1
_, err = s.controller.IsActive(CheckerType(invalidTyp))
_, err = s.controller.IsActive(utils.CheckerType(invalidTyp))
s.Equal(errTypeNotFound, err)
}

func (s *ControllerBaseTestSuite) TestListCheckers() {
checkers := s.controller.Checkers()
s.Equal(4, len(checkers))
s.Equal(5, len(checkers))
}

func TestControllerBaseTestSuite(t *testing.T) {
Expand Down
5 changes: 3 additions & 2 deletions internal/querycoordv2/checkers/index_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/internal/querycoordv2/task"
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
Expand Down Expand Up @@ -58,8 +59,8 @@ func NewIndexChecker(
}
}

func (c *IndexChecker) ID() CheckerType {
return indexChecker
func (c *IndexChecker) ID() utils.CheckerType {
return utils.IndexChecker
}

func (c *IndexChecker) Description() string {
Expand Down
Loading

0 comments on commit e98c62a

Please sign in to comment.