Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

store/tikv: increase max backoff time of read requests #25153

Merged
merged 15 commits into from
Jun 6, 2021
Merged
5 changes: 3 additions & 2 deletions store/tikv/2pc.go
Original file line number Diff line number Diff line change
Expand Up @@ -730,7 +730,8 @@ func (tm *ttlManager) close() {
close(tm.ch)
}

const pessimisticLockMaxBackoff = 20000
const keepAliveMaxBackoff = 20000 // 20 seconds
const pessimisticLockMaxBackoff = 600000 // 10 minutes

func (tm *ttlManager) keepAlive(c *twoPhaseCommitter) {
// Ticker is set to 1/2 of the ManagedLockTTL.
Expand All @@ -745,7 +746,7 @@ func (tm *ttlManager) keepAlive(c *twoPhaseCommitter) {
if tm.lockCtx != nil && tm.lockCtx.Killed != nil && atomic.LoadUint32(tm.lockCtx.Killed) != 0 {
return
}
bo := retry.NewBackofferWithVars(context.Background(), pessimisticLockMaxBackoff, c.txn.vars)
bo := retry.NewBackofferWithVars(context.Background(), keepAliveMaxBackoff, c.txn.vars)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it shouldn't break the loop if the error is not a key error at L779.

now, err := c.store.getTimestampWithRetry(bo, c.txn.GetScope())
if err != nil {
logutil.Logger(bo.GetCtx()).Warn("keepAlive get tso fail",
Expand Down
14 changes: 13 additions & 1 deletion store/tikv/pessimistic.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,22 @@ func (action actionPessimisticLock) handleSingleBatch(c *twoPhaseCommitter, bo *
return errors.Trace(err)
}
if regionErr != nil {
err = bo.Backoff(retry.BoRegionMiss, errors.New(regionErr.String()))
/// For other region error and the fake region error, backoff because
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// For other region error and the fake region error, backoff because
// For other region error and the fake region error, backoff because

// there's something wrong.
// For the real EpochNotMatch error, don't backoff.
if regionErr.GetEpochNotMatch() == nil || isFakeRegionError(regionErr) {
err = bo.Backoff(retry.BoRegionMiss, errors.New(regionErr.String()))
if err != nil {
return errors.Trace(err)
}
}
same, err := batch.relocate(bo, c.store.regionCache)
if err != nil {
return errors.Trace(err)
}
if same {
continue
}
err = c.pessimisticLockMutations(bo, action.LockCtx, batch.mutations)
return errors.Trace(err)
}
Expand Down
13 changes: 9 additions & 4 deletions store/tikv/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func (s *Scanner) Value() []byte {
return nil
}

const scannerNextMaxBackoff = 20000
const scannerNextMaxBackoff = 600000 // 10 minutes

// Next return next element.
func (s *Scanner) Next() error {
Expand Down Expand Up @@ -229,9 +229,14 @@ func (s *Scanner) getData(bo *Backoffer) error {
if regionErr != nil {
logutil.BgLogger().Debug("scanner getData failed",
zap.Stringer("regionErr", regionErr))
err = bo.Backoff(retry.BoRegionMiss, errors.New(regionErr.String()))
if err != nil {
return errors.Trace(err)
// For other region error and the fake region error, backoff because
// there's something wrong.
// For the real EpochNotMatch error, don't backoff.
if regionErr.GetEpochNotMatch() == nil || isFakeRegionError(regionErr) {
err = bo.Backoff(retry.BoRegionMiss, errors.New(regionErr.String()))
if err != nil {
return errors.Trace(err)
}
}
continue
}
Expand Down
42 changes: 36 additions & 6 deletions store/tikv/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ func newTiKVSnapshot(store *KVStore, ts uint64, replicaReadSeed uint32) *KVSnaps
}
}

const batchGetMaxBackoff = 20000
const batchGetMaxBackoff = 600000 // 10 minutes

// SetSnapshotTS resets the timestamp for reads.
func (s *KVSnapshot) SetSnapshotTS(ts uint64) {
Expand Down Expand Up @@ -235,6 +235,19 @@ type batchKeys struct {
keys [][]byte
}

func (b *batchKeys) relocate(bo *Backoffer, c *RegionCache) (bool, error) {
begin, end := b.keys[0], b.keys[len(b.keys)-1]
loc, err := c.LocateKey(bo, begin)
if err != nil {
return false, errors.Trace(err)
}
if !loc.Contains(end) {
return false, nil
}
b.region = loc.Region
return true, nil
}

// appendBatchKeysBySize appends keys to b. It may split the keys to make
// sure each batch's size does not exceed the limit.
func appendBatchKeysBySize(b []batchKeys, region RegionVerID, keys [][]byte, sizeFn func([]byte) int, limit int) []batchKeys {
Expand Down Expand Up @@ -339,10 +352,22 @@ func (s *KVSnapshot) batchGetSingleRegion(bo *Backoffer, batch batchKeys, collec
return errors.Trace(err)
}
if regionErr != nil {
err = bo.Backoff(retry.BoRegionMiss, errors.New(regionErr.String()))
// For other region error and the fake region error, backoff because
// there's something wrong.
// For the real EpochNotMatch error, don't backoff.
if regionErr.GetEpochNotMatch() == nil || isFakeRegionError(regionErr) {
err = bo.Backoff(retry.BoRegionMiss, errors.New(regionErr.String()))
if err != nil {
return errors.Trace(err)
}
}
same, err := batch.relocate(bo, cli.regionCache)
if err != nil {
return errors.Trace(err)
}
if same {
continue
}
err = s.batchGetKeysByRegions(bo, pending, collectF)
return errors.Trace(err)
}
Expand Down Expand Up @@ -402,7 +427,7 @@ func (s *KVSnapshot) batchGetSingleRegion(bo *Backoffer, batch batchKeys, collec
}
}

const getMaxBackoff = 20000
const getMaxBackoff = 600000 // 10 minutes

// Get gets the value for key k from snapshot.
func (s *KVSnapshot) Get(ctx context.Context, k []byte) ([]byte, error) {
Expand Down Expand Up @@ -497,9 +522,14 @@ func (s *KVSnapshot) get(ctx context.Context, bo *Backoffer, k []byte) ([]byte,
return nil, errors.Trace(err)
}
if regionErr != nil {
err = bo.Backoff(retry.BoRegionMiss, errors.New(regionErr.String()))
if err != nil {
return nil, errors.Trace(err)
// For other region error and the fake region error, backoff because
// there's something wrong.
// For the real EpochNotMatch error, don't backoff.
if regionErr.GetEpochNotMatch() == nil || isFakeRegionError(regionErr) {
err = bo.Backoff(retry.BoRegionMiss, errors.New(regionErr.String()))
if err != nil {
return nil, errors.Trace(err)
}
}
continue
}
Expand Down