From 0805e850d41ed72ee709fb175d4646be8a24f18c Mon Sep 17 00:00:00 2001 From: Jianjun Liao <36503113+Leavrth@users.noreply.github.com> Date: Wed, 24 Apr 2024 13:06:10 +0800 Subject: [PATCH] br: handle region leader miss (#52822) close pingcap/tidb#50501, close pingcap/tidb#51124 --- br/pkg/restore/import.go | 3 +- br/pkg/restore/import_retry_test.go | 11 ++-- br/pkg/restore/split/mock_pd_client.go | 7 ++- br/pkg/restore/split/split.go | 16 +++++ br/pkg/restore/split/split_test.go | 76 +++++++++++++++++++++++ pkg/lightning/backend/local/BUILD.bazel | 1 + pkg/lightning/backend/local/duplicate.go | 4 +- pkg/lightning/backend/local/region_job.go | 4 +- 8 files changed, 114 insertions(+), 8 deletions(-) diff --git a/br/pkg/restore/import.go b/br/pkg/restore/import.go index be549ac13c6a0..6f21ca7229ad3 100644 --- a/br/pkg/restore/import.go +++ b/br/pkg/restore/import.go @@ -1340,7 +1340,8 @@ func (importer *FileImporter) ingestSSTs( ) (*import_sstpb.IngestResponse, error) { leader := regionInfo.Leader if leader == nil { - leader = regionInfo.Region.GetPeers()[0] + return nil, errors.Annotatef(berrors.ErrPDLeaderNotFound, + "region id %d has no leader", regionInfo.Region.Id) } reqCtx := &kvrpcpb.Context{ RegionId: regionInfo.Region.GetId(), diff --git a/br/pkg/restore/import_retry_test.go b/br/pkg/restore/import_retry_test.go index c10a6c56b14c8..8e2a386b0e5f5 100644 --- a/br/pkg/restore/import_retry_test.go +++ b/br/pkg/restore/import_retry_test.go @@ -70,7 +70,8 @@ func initTestClient(isRawKv bool) *TestClient { } regions[i] = &split.RegionInfo{ Leader: &metapb.Peer{ - Id: i, + Id: i, + StoreId: 1, }, Region: &metapb.Region{ Id: i, @@ -281,7 +282,7 @@ func TestEpochNotMatch(t *testing.T) { {Id: 43}, }, }, - Leader: &metapb.Peer{Id: 43}, + Leader: &metapb.Peer{Id: 43, StoreId: 1}, } newRegion := pdtypes.NewRegionInfo(info.Region, info.Leader) mergeRegion := func() { @@ -340,7 +341,8 @@ func TestRegionSplit(t *testing.T) { EndKey: codec.EncodeBytes(nil, []byte("aayy")), }, Leader: &metapb.Peer{ - Id: 43, + Id: 43, + StoreId: 1, }, }, { @@ -350,7 +352,8 @@ func TestRegionSplit(t *testing.T) { EndKey: target.Region.EndKey, }, Leader: &metapb.Peer{ - Id: 45, + Id: 45, + StoreId: 1, }, }, } diff --git a/br/pkg/restore/split/mock_pd_client.go b/br/pkg/restore/split/mock_pd_client.go index cc01d68ecfc45..4bd709260e90a 100644 --- a/br/pkg/restore/split/mock_pd_client.go +++ b/br/pkg/restore/split/mock_pd_client.go @@ -74,8 +74,13 @@ func (c *MockPDClientForSplit) setRegions(boundaries [][]byte) []*metapb.Region StartKey: boundaries[i-1], EndKey: boundaries[i], } + p := &metapb.Peer{ + Id: c.lastRegionID, + StoreId: 1, + } c.Regions.SetRegion(&pdtypes.Region{ - Meta: r, + Meta: r, + Leader: p, }) ret = append(ret, r) } diff --git a/br/pkg/restore/split/split.go b/br/pkg/restore/split/split.go index 97197df839ccb..c69e5959f9812 100644 --- a/br/pkg/restore/split/split.go +++ b/br/pkg/restore/split/split.go @@ -59,7 +59,23 @@ func checkRegionConsistency(startKey, endKey []byte, regions []*RegionInfo) erro } cur := regions[0] + if cur.Leader == nil { + return errors.Annotatef(berrors.ErrPDBatchScanRegion, + "region %d's leader is nil", cur.Region.Id) + } + if cur.Leader.StoreId == 0 { + return errors.Annotatef(berrors.ErrPDBatchScanRegion, + "region %d's leader's store id is 0", cur.Region.Id) + } for _, r := range regions[1:] { + if r.Leader == nil { + return errors.Annotatef(berrors.ErrPDBatchScanRegion, + "region %d's leader is nil", r.Region.Id) + } + if r.Leader.StoreId == 0 { + return errors.Annotatef(berrors.ErrPDBatchScanRegion, + "region %d's leader's store id is 0", r.Region.Id) + } if !bytes.Equal(cur.Region.EndKey, r.Region.StartKey) { return errors.Annotatef(berrors.ErrPDBatchScanRegion, "region %d's endKey not equal to next region %d's startKey, endKey: %s, startKey: %s, region epoch: %s %s", diff --git a/br/pkg/restore/split/split_test.go b/br/pkg/restore/split/split_test.go index 077cdcdd1cb54..9ca523fe214f4 100644 --- a/br/pkg/restore/split/split_test.go +++ b/br/pkg/restore/split/split_test.go @@ -504,6 +504,10 @@ func TestPaginateScanRegion(t *testing.T) { StartKey: []byte{1}, EndKey: []byte{2}, }, + Leader: &metapb.Peer{ + Id: 1, + StoreId: 1, + }, }) mockPDClient.Regions.SetRegion(&pdtypes.Region{ Meta: &metapb.Region{ @@ -511,6 +515,10 @@ func TestPaginateScanRegion(t *testing.T) { StartKey: []byte{4}, EndKey: []byte{5}, }, + Leader: &metapb.Peer{ + Id: 4, + StoreId: 1, + }, }) _, err = PaginateScanRegion(ctx, mockClient, []byte{1}, []byte{5}, 3) @@ -525,6 +533,10 @@ func TestPaginateScanRegion(t *testing.T) { StartKey: []byte{2}, EndKey: []byte{3}, }, + Leader: &metapb.Peer{ + Id: 2, + StoreId: 1, + }, }, { Meta: &metapb.Region{ @@ -532,6 +544,10 @@ func TestPaginateScanRegion(t *testing.T) { StartKey: []byte{3}, EndKey: []byte{4}, }, + Leader: &metapb.Peer{ + Id: 3, + StoreId: 1, + }, }, } mockPDClient.scanRegions.beforeHook = func() { @@ -590,6 +606,10 @@ func TestRegionConsistency(t *testing.T) { "region 6's endKey not equal to next region 8's startKey(.*?)", []*RegionInfo{ { + Leader: &metapb.Peer{ + Id: 6, + StoreId: 1, + }, Region: &metapb.Region{ Id: 6, StartKey: codec.EncodeBytes([]byte{}, []byte("b")), @@ -598,6 +618,10 @@ func TestRegionConsistency(t *testing.T) { }, }, { + Leader: &metapb.Peer{ + Id: 8, + StoreId: 1, + }, Region: &metapb.Region{ Id: 8, StartKey: codec.EncodeBytes([]byte{}, []byte("e")), @@ -606,6 +630,58 @@ func TestRegionConsistency(t *testing.T) { }, }, }, + { + codec.EncodeBytes([]byte{}, []byte("c")), + codec.EncodeBytes([]byte{}, []byte("e")), + "region 6's leader is nil(.*?)", + []*RegionInfo{ + { + Region: &metapb.Region{ + Id: 6, + StartKey: codec.EncodeBytes([]byte{}, []byte("c")), + EndKey: codec.EncodeBytes([]byte{}, []byte("d")), + RegionEpoch: nil, + }, + }, + { + Region: &metapb.Region{ + Id: 8, + StartKey: codec.EncodeBytes([]byte{}, []byte("d")), + EndKey: codec.EncodeBytes([]byte{}, []byte("e")), + }, + }, + }, + }, + { + codec.EncodeBytes([]byte{}, []byte("c")), + codec.EncodeBytes([]byte{}, []byte("e")), + "region 6's leader's store id is 0(.*?)", + []*RegionInfo{ + { + Leader: &metapb.Peer{ + Id: 6, + StoreId: 0, + }, + Region: &metapb.Region{ + Id: 6, + StartKey: codec.EncodeBytes([]byte{}, []byte("c")), + EndKey: codec.EncodeBytes([]byte{}, []byte("d")), + RegionEpoch: nil, + }, + }, + { + Leader: &metapb.Peer{ + Id: 6, + StoreId: 0, + }, + Region: &metapb.Region{ + Id: 8, + StartKey: codec.EncodeBytes([]byte{}, []byte("d")), + EndKey: codec.EncodeBytes([]byte{}, []byte("e")), + }, + }, + }, + }, } for _, ca := range cases { err := checkRegionConsistency(ca.startKey, ca.endKey, ca.regions) diff --git a/pkg/lightning/backend/local/BUILD.bazel b/pkg/lightning/backend/local/BUILD.bazel index 13eb66eab9019..c297e333d2d7d 100644 --- a/pkg/lightning/backend/local/BUILD.bazel +++ b/pkg/lightning/backend/local/BUILD.bazel @@ -23,6 +23,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//br/pkg/checksum", + "//br/pkg/errors", "//br/pkg/logutil", "//br/pkg/membuf", "//br/pkg/pdutil", diff --git a/pkg/lightning/backend/local/duplicate.go b/pkg/lightning/backend/local/duplicate.go index 7f72b7b998448..2e93a25abe513 100644 --- a/pkg/lightning/backend/local/duplicate.go +++ b/pkg/lightning/backend/local/duplicate.go @@ -31,6 +31,7 @@ import ( "github.com/pingcap/kvproto/pkg/errorpb" "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/kvproto/pkg/kvrpcpb" + berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/restore/split" "github.com/pingcap/tidb/pkg/distsql" @@ -312,7 +313,8 @@ func getDupDetectClient( ) (import_sstpb.ImportSST_DuplicateDetectClient, error) { leader := region.Leader if leader == nil { - leader = region.Region.GetPeers()[0] + return nil, errors.Annotatef(berrors.ErrPDLeaderNotFound, + "region id %d has no leader", region.Region.Id) } importClient, err := importClientFactory.Create(ctx, leader.GetStoreId()) if err != nil { diff --git a/pkg/lightning/backend/local/region_job.go b/pkg/lightning/backend/local/region_job.go index db9170e6af6d2..7bc812e4b9bb6 100644 --- a/pkg/lightning/backend/local/region_job.go +++ b/pkg/lightning/backend/local/region_job.go @@ -30,6 +30,7 @@ import ( sst "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/kvproto/pkg/kvrpcpb" "github.com/pingcap/kvproto/pkg/metapb" + berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/restore/split" "github.com/pingcap/tidb/pkg/kv" @@ -624,7 +625,8 @@ func (local *Backend) doIngest(ctx context.Context, j *regionJob) (*sst.IngestRe leader := j.region.Leader if leader == nil { - leader = j.region.Region.GetPeers()[0] + return nil, errors.Annotatef(berrors.ErrPDLeaderNotFound, + "region id %d has no leader", j.region.Region.Id) } cli, err := clientFactory.Create(ctx, leader.StoreId)