From 2ea6d00a56e04460a8774a0278513465f42aced3 Mon Sep 17 00:00:00 2001 From: Zak Zhao <57036248+joccau@users.noreply.github.com> Date: Thu, 16 Mar 2023 14:24:39 +0800 Subject: [PATCH] br: wait more time to wait spitting the region (#42182) close pingcap/tidb#42001 --- br/pkg/restore/split/BUILD.bazel | 8 +++- br/pkg/restore/split/split.go | 34 +++++++++----- br/pkg/restore/split/split_test.go | 73 ++++++++++++++++++++++++++++++ br/pkg/utils/backoff.go | 4 ++ 4 files changed, 107 insertions(+), 12 deletions(-) create mode 100644 br/pkg/restore/split/split_test.go diff --git a/br/pkg/restore/split/BUILD.bazel b/br/pkg/restore/split/BUILD.bazel index 7c91f32cfae90..a5b133c1a13f8 100644 --- a/br/pkg/restore/split/BUILD.bazel +++ b/br/pkg/restore/split/BUILD.bazel @@ -43,10 +43,16 @@ go_library( go_test( name = "split_test", timeout = "short", - srcs = ["sum_sorted_test.go"], + srcs = [ + "split_test.go", + "sum_sorted_test.go", + ], flaky = True, deps = [ ":split", + "//br/pkg/errors", + "//br/pkg/utils", + "@com_github_pingcap_failpoint//:failpoint", "@com_github_stretchr_testify//require", ], ) diff --git a/br/pkg/restore/split/split.go b/br/pkg/restore/split/split.go index 2084df07acf51..a8f7b5a1d4bfb 100644 --- a/br/pkg/restore/split/split.go +++ b/br/pkg/restore/split/split.go @@ -9,6 +9,7 @@ import ( "time" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/log" berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/logutil" @@ -17,7 +18,7 @@ import ( ) var ( - ScanRegionAttemptTimes = 128 + ScanRegionAttemptTimes = 150 ) // Constants for split retry machinery. @@ -115,7 +116,7 @@ func PaginateScanRegion( return err } return nil - }, newScanRegionBackoffer()) + }, NewScanRegionBackoffer()) return regions, err } @@ -174,33 +175,44 @@ func ScanRegionsWithRetry( } return nil - }, newScanRegionBackoffer()) + }, NewScanRegionBackoffer()) return regions, err } type scanRegionBackoffer struct { - attempt int + stat utils.RetryState } -func newScanRegionBackoffer() utils.Backoffer { +// NewScanRegionBackoffer create a backoff to retry to scan regions. +func NewScanRegionBackoffer() utils.Backoffer { return &scanRegionBackoffer{ - attempt: ScanRegionAttemptTimes, + stat: utils.InitialRetryState( + ScanRegionAttemptTimes, + time.Millisecond*10, + time.Second*2, + ), } } // NextBackoff returns a duration to wait before retrying again func (b *scanRegionBackoffer) NextBackoff(err error) time.Duration { if berrors.ErrPDBatchScanRegion.Equal(err) { - // 1s * 60 could be enough for splitting remain regions in the hole. - b.attempt-- - return time.Second + // it needs more time to wait splitting the regions that contains data in PITR. + // 2s * 150 + delayTime := b.stat.ExponentialBackoff() + failpoint.Inject("hint-scan-region-backoff", func(val failpoint.Value) { + if val.(bool) { + delayTime = time.Microsecond + } + }) + return delayTime } - b.attempt = 0 + b.stat.StopRetry() return 0 } // Attempt returns the remain attempt times func (b *scanRegionBackoffer) Attempt() int { - return b.attempt + return b.stat.Attempt() } diff --git a/br/pkg/restore/split/split_test.go b/br/pkg/restore/split/split_test.go new file mode 100644 index 0000000000000..43e5afcff87b8 --- /dev/null +++ b/br/pkg/restore/split/split_test.go @@ -0,0 +1,73 @@ +// Copyright 2022 PingCAP, Inc. Licensed under Apache-2.0. +package split_test + +import ( + "context" + "testing" + + "github.com/pingcap/failpoint" + berrors "github.com/pingcap/tidb/br/pkg/errors" + "github.com/pingcap/tidb/br/pkg/restore/split" + "github.com/pingcap/tidb/br/pkg/utils" + "github.com/stretchr/testify/require" +) + +func TestScanRegionBackOfferWithSuccess(t *testing.T) { + var counter int + bo := split.NewScanRegionBackoffer() + + err := utils.WithRetry(context.Background(), func() error { + defer func() { + counter++ + }() + + if counter == 3 { + return nil + } + return berrors.ErrPDBatchScanRegion + }, bo) + require.NoError(t, err) + require.Equal(t, counter, 4) +} + +func TestScanRegionBackOfferWithFail(t *testing.T) { + _ = failpoint.Enable("github.com/pingcap/tidb/br/pkg/restore/split/hint-scan-region-backoff", "return(true)") + defer func() { + _ = failpoint.Disable("github.com/pingcap/tidb/br/pkg/restore/split/hint-scan-region-backoff") + }() + + var counter int + bo := split.NewScanRegionBackoffer() + + err := utils.WithRetry(context.Background(), func() error { + defer func() { + counter++ + }() + return berrors.ErrPDBatchScanRegion + }, bo) + require.Error(t, err) + require.Equal(t, counter, split.ScanRegionAttemptTimes) +} + +func TestScanRegionBackOfferWithStopRetry(t *testing.T) { + _ = failpoint.Enable("github.com/pingcap/tidb/br/pkg/restore/split/hint-scan-region-backoff", "return(true)") + defer func() { + _ = failpoint.Disable("github.com/pingcap/tidb/br/pkg/restore/split/hint-scan-region-backoff") + }() + + var counter int + bo := split.NewScanRegionBackoffer() + + err := utils.WithRetry(context.Background(), func() error { + defer func() { + counter++ + }() + + if counter < 5 { + return berrors.ErrPDBatchScanRegion + } + return berrors.ErrKVUnknown + }, bo) + require.Error(t, err) + require.Equal(t, counter, 6) +} diff --git a/br/pkg/utils/backoff.go b/br/pkg/utils/backoff.go index 5f19bc6a29078..67ded2df49172 100644 --- a/br/pkg/utils/backoff.go +++ b/br/pkg/utils/backoff.go @@ -98,6 +98,10 @@ func (rs *RetryState) Attempt() int { return rs.maxRetry - rs.retryTimes } +func (rs *RetryState) StopRetry() { + rs.retryTimes = rs.maxRetry +} + // NextBackoff implements the `Backoffer`. func (rs *RetryState) NextBackoff(error) time.Duration { return rs.ExponentialBackoff()