Skip to content

Commit

Permalink
tikv: refine region-cache log and metric (#10589) (#10667)
Browse files Browse the repository at this point in the history
  • Loading branch information
lysu authored and zz-jason committed Jun 3, 2019
1 parent 5927716 commit ce3e35d
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 16 deletions.
36 changes: 25 additions & 11 deletions store/tikv/region_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,14 @@ const (
)

var (
tikvRegionCacheCounterWithDropRegionFromCacheOK = metrics.TiKVRegionCacheCounter.WithLabelValues("drop_region_from_cache", "ok")
tikvRegionCacheCounterWithGetRegionByIDOK = metrics.TiKVRegionCacheCounter.WithLabelValues("get_region_by_id", "ok")
tikvRegionCacheCounterWithGetRegionByIDError = metrics.TiKVRegionCacheCounter.WithLabelValues("get_region_by_id", "err")
tikvRegionCacheCounterWithGetRegionOK = metrics.TiKVRegionCacheCounter.WithLabelValues("get_region", "ok")
tikvRegionCacheCounterWithGetRegionError = metrics.TiKVRegionCacheCounter.WithLabelValues("get_region", "err")
tikvRegionCacheCounterWithGetStoreOK = metrics.TiKVRegionCacheCounter.WithLabelValues("get_store", "ok")
tikvRegionCacheCounterWithGetStoreError = metrics.TiKVRegionCacheCounter.WithLabelValues("get_store", "err")
tikvRegionCacheCounterWithInvalidateRegionFromCacheOK = metrics.TiKVRegionCacheCounter.WithLabelValues("invalidate_region_from_cache", "ok")
tikvRegionCacheCounterWithSendFail = metrics.TiKVRegionCacheCounter.WithLabelValues("send_fail", "ok")
tikvRegionCacheCounterWithGetRegionByIDOK = metrics.TiKVRegionCacheCounter.WithLabelValues("get_region_by_id", "ok")
tikvRegionCacheCounterWithGetRegionByIDError = metrics.TiKVRegionCacheCounter.WithLabelValues("get_region_by_id", "err")
tikvRegionCacheCounterWithGetRegionOK = metrics.TiKVRegionCacheCounter.WithLabelValues("get_region", "ok")
tikvRegionCacheCounterWithGetRegionError = metrics.TiKVRegionCacheCounter.WithLabelValues("get_region", "err")
tikvRegionCacheCounterWithGetStoreOK = metrics.TiKVRegionCacheCounter.WithLabelValues("get_store", "ok")
tikvRegionCacheCounterWithGetStoreError = metrics.TiKVRegionCacheCounter.WithLabelValues("get_store", "err")
)

const (
Expand Down Expand Up @@ -121,6 +122,7 @@ func (r *Region) checkRegionCacheTTL(ts int64) bool {

// invalidate invalidates a region, next time it will got null result.
func (r *Region) invalidate() {
tikvRegionCacheCounterWithInvalidateRegionFromCacheOK.Inc()
atomic.StoreInt64(&r.lastAccess, invalidatedLastAccessTime)
}

Expand Down Expand Up @@ -359,13 +361,18 @@ func (c *RegionCache) findRegionByKey(bo *Backoffer, key []byte, isEndKey bool)
}

// OnSendFail handles send request fail logic.
func (c *RegionCache) OnSendFail(bo *Backoffer, ctx *RPCContext, scheduleReload bool) {
func (c *RegionCache) OnSendFail(bo *Backoffer, ctx *RPCContext, scheduleReload bool, err error) {
tikvRegionCacheCounterWithSendFail.Inc()
r := c.getCachedRegionWithRLock(ctx.Region)
if r != nil {
c.switchNextPeer(r, ctx.PeerIdx)
if scheduleReload {
r.scheduleReload()
}
logutil.Logger(bo.ctx).Info("switch region peer to next due to send request fail",
zap.Stringer("current", ctx),
zap.Bool("needReload", scheduleReload),
zap.Error(err))
}
}

Expand Down Expand Up @@ -457,7 +464,6 @@ func (c *RegionCache) InvalidateCachedRegion(id RegionVerID) {
if cachedRegion == nil {
return
}
tikvRegionCacheCounterWithDropRegionFromCacheOK.Inc()
cachedRegion.invalidate()
}

Expand All @@ -473,14 +479,23 @@ func (c *RegionCache) UpdateLeader(regionID RegionVerID, leaderStoreID uint64, c

if leaderStoreID == 0 {
c.switchNextPeer(r, currentPeerIdx)
logutil.Logger(context.Background()).Info("switch region peer to next due to NotLeader with NULL leader",
zap.Int("currIdx", currentPeerIdx),
zap.Uint64("regionID", regionID.GetID()))
return
}

if !c.switchToPeer(r, leaderStoreID) {
logutil.Logger(context.Background()).Debug("regionCache: cannot find peer when updating leader",
logutil.Logger(context.Background()).Info("invalidate region cache due to cannot find peer when updating leader",
zap.Uint64("regionID", regionID.GetID()),
zap.Int("currIdx", currentPeerIdx),
zap.Uint64("leaderStoreID", leaderStoreID))
r.invalidate()
} else {
logutil.Logger(context.Background()).Info("switch region leader to specific leader due to kv return NotLeader",
zap.Uint64("regionID", regionID.GetID()),
zap.Int("currIdx", currentPeerIdx),
zap.Uint64("leaderStoreID", leaderStoreID))
}
}

Expand Down Expand Up @@ -712,7 +727,6 @@ func (c *RegionCache) OnRegionEpochNotMatch(bo *Backoffer, ctx *RPCContext, curr
if needInvalidateOld {
cachedRegion, ok := c.mu.regions[ctx.Region]
if ok {
tikvRegionCacheCounterWithDropRegionFromCacheOK.Inc()
cachedRegion.invalidate()
}
}
Expand Down
8 changes: 4 additions & 4 deletions store/tikv/region_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ func (s *testRegionCacheSuite) TestSendFailedButLeaderNotChange(c *C) {
c.Assert(len(ctx.Meta.Peers), Equals, 3)

// send fail leader switch to 2
s.cache.OnSendFail(s.bo, ctx, false)
s.cache.OnSendFail(s.bo, ctx, false, nil)
ctx, err = s.cache.GetRPCContext(s.bo, loc.Region)
c.Assert(err, IsNil)
c.Assert(ctx.Peer.Id, Equals, s.peer2)
Expand Down Expand Up @@ -267,7 +267,7 @@ func (s *testRegionCacheSuite) TestSendFailedInHibernateRegion(c *C) {
c.Assert(len(ctx.Meta.Peers), Equals, 3)

// send fail leader switch to 2
s.cache.OnSendFail(s.bo, ctx, false)
s.cache.OnSendFail(s.bo, ctx, false, nil)
ctx, err = s.cache.GetRPCContext(s.bo, loc.Region)
c.Assert(err, IsNil)
c.Assert(ctx.Peer.Id, Equals, s.peer2)
Expand Down Expand Up @@ -303,13 +303,13 @@ func (s *testRegionCacheSuite) TestSendFailedInMultipleNode(c *C) {
c.Assert(len(ctx.Meta.Peers), Equals, 3)

// send fail leader switch to 2
s.cache.OnSendFail(s.bo, ctx, false)
s.cache.OnSendFail(s.bo, ctx, false, nil)
ctx, err = s.cache.GetRPCContext(s.bo, loc.Region)
c.Assert(err, IsNil)
c.Assert(ctx.Peer.Id, Equals, s.peer2)

// send 2 fail leader switch to 3
s.cache.OnSendFail(s.bo, ctx, false)
s.cache.OnSendFail(s.bo, ctx, false, nil)
ctx, err = s.cache.GetRPCContext(s.bo, loc.Region)
c.Assert(err, IsNil)
c.Assert(ctx.Peer.Id, Equals, peer3)
Expand Down
2 changes: 1 addition & 1 deletion store/tikv/region_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ func (s *RegionRequestSender) onSendFail(bo *Backoffer, ctx *RPCContext, err err
}
}

s.regionCache.OnSendFail(bo, ctx, s.needReloadRegion(ctx))
s.regionCache.OnSendFail(bo, ctx, s.needReloadRegion(ctx), err)

// Retry on send request failure when it's not canceled.
// When a store is not available, the leader of related region should be elected quickly.
Expand Down

0 comments on commit ce3e35d

Please sign in to comment.