Skip to content

Commit

Permalink
statistics: add undersized-region-count and `oversized-region-count…
Browse files Browse the repository at this point in the history
…` unhealthy region type (tikv#4784)

close tikv#4783

Signed-off-by: bufferflies <1045931706@qq.com>
  • Loading branch information
bufferflies authored Apr 19, 2022
1 parent 412472c commit 56a015c
Show file tree
Hide file tree
Showing 9 changed files with 71 additions and 17 deletions.
34 changes: 34 additions & 0 deletions server/api/region.go
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,40 @@ func (h *regionsHandler) GetOfflinePeerRegions(w http.ResponseWriter, r *http.Re
h.rd.JSON(w, http.StatusOK, regionsInfo)
}

// @Tags region
// @Summary List all regions that are oversized.
// @Produce json
// @Success 200 {object} RegionsInfo
// @Failure 500 {string} string "PD server failed to proceed the request."
// @Router /regions/check/oversized-region [get]
func (h *regionsHandler) GetOverSizedRegions(w http.ResponseWriter, r *http.Request) {
handler := h.svr.GetHandler()
regions, err := handler.GetRegionsByType(statistics.OversizedRegion)
if err != nil {
h.rd.JSON(w, http.StatusInternalServerError, err.Error())
return
}
regionsInfo := convertToAPIRegions(regions)
h.rd.JSON(w, http.StatusOK, regionsInfo)
}

// @Tags region
// @Summary List all regions that are undersized.
// @Produce json
// @Success 200 {object} RegionsInfo
// @Failure 500 {string} string "PD server failed to proceed the request."
// @Router /regions/check/undersized-region [get]
func (h *regionsHandler) GetUndersizedRegions(w http.ResponseWriter, r *http.Request) {
handler := h.svr.GetHandler()
regions, err := handler.GetRegionsByType(statistics.UndersizedRegion)
if err != nil {
h.rd.JSON(w, http.StatusInternalServerError, err.Error())
return
}
regionsInfo := convertToAPIRegions(regions)
h.rd.JSON(w, http.StatusOK, regionsInfo)
}

// @Tags region
// @Summary List all empty regions.
// @Produce json
Expand Down
4 changes: 3 additions & 1 deletion server/api/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ func (s *serviceMiddlewareBuilder) middlewareFunc(next func(http.ResponseWriter,
}

func getFunctionName(f interface{}) string {
strs := strings.Split((runtime.FuncForPC(reflect.ValueOf(f).Pointer()).Name()), ".")
strs := strings.Split(runtime.FuncForPC(reflect.ValueOf(f).Pointer()).Name(), ".")
return strings.Split(strs[len(strs)-1], "-")[0]
}

Expand Down Expand Up @@ -275,6 +275,8 @@ func createRouter(prefix string, svr *server.Server) *mux.Router {
registerFunc(clusterRouter, "/regions/check/learner-peer", regionsHandler.GetLearnerPeerRegions, setMethods("GET"))
registerFunc(clusterRouter, "/regions/check/empty-region", regionsHandler.GetEmptyRegions, setMethods("GET"))
registerFunc(clusterRouter, "/regions/check/offline-peer", regionsHandler.GetOfflinePeerRegions, setMethods("GET"))
registerFunc(clusterRouter, "/regions/check/oversized-region", regionsHandler.GetOverSizedRegions, setMethods("GET"))
registerFunc(clusterRouter, "/regions/check/undersized-region", regionsHandler.GetUndersizedRegions, setMethods("GET"))

registerFunc(clusterRouter, "/regions/check/hist-size", regionsHandler.GetSizeHistogram, setMethods("GET"))
registerFunc(clusterRouter, "/regions/check/hist-keys", regionsHandler.GetKeysHistogram, setMethods("GET"))
Expand Down
2 changes: 1 addition & 1 deletion server/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ func (c *RaftCluster) Start(s Server) error {
}

c.coordinator = newCoordinator(c.ctx, cluster, s.GetHBStreams())
c.regionStats = statistics.NewRegionStatistics(c.opt, c.ruleManager)
c.regionStats = statistics.NewRegionStatistics(c.opt, c.ruleManager, c.storeConfigManager)
c.limiter = NewStoreLimiter(s.GetPersistOptions())
c.unsafeRecoveryController = newUnsafeRecoveryController(cluster)

Expand Down
2 changes: 1 addition & 1 deletion server/cluster/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1046,7 +1046,7 @@ func (s *testClusterInfoSuite) TestOfflineAndMerge(c *C) {
panic(err)
}
}
cluster.regionStats = statistics.NewRegionStatistics(cluster.GetOpts(), cluster.ruleManager)
cluster.regionStats = statistics.NewRegionStatistics(cluster.GetOpts(), cluster.ruleManager, cluster.storeConfigManager)
cluster.coordinator = newCoordinator(s.ctx, cluster, nil)

// Put 3 stores.
Expand Down
2 changes: 1 addition & 1 deletion server/cluster/coordinator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ func dispatchHeartbeat(co *coordinator, region *core.RegionInfo, stream hbstream

func (s *testCoordinatorSuite) TestCollectMetrics(c *C) {
tc, co, cleanup := prepare(nil, func(tc *testCluster) {
tc.regionStats = statistics.NewRegionStatistics(tc.GetOpts(), nil)
tc.regionStats = statistics.NewRegionStatistics(tc.GetOpts(), nil, tc.storeConfigManager)
}, func(co *coordinator) { co.run() }, c)
defer cleanup()

Expand Down
20 changes: 15 additions & 5 deletions server/statistics/region_collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ const (
OfflinePeer
LearnerPeer
EmptyRegion
OversizedRegion
UndersizedRegion
)

const nonIsolation = "none"
Expand All @@ -54,12 +56,15 @@ type RegionStatistics struct {
index map[uint64]RegionStatisticType
offlineIndex map[uint64]RegionStatisticType
ruleManager *placement.RuleManager
storeManager *config.StoreConfigManager
}

// NewRegionStatistics creates a new RegionStatistics.
func NewRegionStatistics(opt *config.PersistOptions, ruleManager *placement.RuleManager) *RegionStatistics {
func NewRegionStatistics(opt *config.PersistOptions, ruleManager *placement.RuleManager, storeManager *config.StoreConfigManager) *RegionStatistics {
r := &RegionStatistics{
opt: opt,
ruleManager: ruleManager,
storeManager: storeManager,
stats: make(map[RegionStatisticType]map[uint64]*RegionInfo),
offlineStats: make(map[RegionStatisticType]map[uint64]*core.RegionInfo),
index: make(map[uint64]RegionStatisticType),
Expand All @@ -71,15 +76,15 @@ func NewRegionStatistics(opt *config.PersistOptions, ruleManager *placement.Rule
r.stats[PendingPeer] = make(map[uint64]*RegionInfo)
r.stats[LearnerPeer] = make(map[uint64]*RegionInfo)
r.stats[EmptyRegion] = make(map[uint64]*RegionInfo)
r.stats[OversizedRegion] = make(map[uint64]*RegionInfo)
r.stats[UndersizedRegion] = make(map[uint64]*RegionInfo)

r.offlineStats[MissPeer] = make(map[uint64]*core.RegionInfo)
r.offlineStats[ExtraPeer] = make(map[uint64]*core.RegionInfo)
r.offlineStats[DownPeer] = make(map[uint64]*core.RegionInfo)
r.offlineStats[PendingPeer] = make(map[uint64]*core.RegionInfo)
r.offlineStats[LearnerPeer] = make(map[uint64]*core.RegionInfo)
r.offlineStats[EmptyRegion] = make(map[uint64]*core.RegionInfo)
r.offlineStats[OfflinePeer] = make(map[uint64]*core.RegionInfo)
r.ruleManager = ruleManager
return r
}

Expand Down Expand Up @@ -163,11 +168,15 @@ func (r *RegionStatistics) Observe(region *core.RegionInfo, stores []*core.Store
PendingPeer: len(region.GetPendingPeers()) > 0,
LearnerPeer: len(region.GetLearners()) > 0,
EmptyRegion: region.GetApproximateSize() <= core.EmptyRegionApproximateSize,
OversizedRegion: region.GetApproximateSize() >= int64(r.storeManager.GetStoreConfig().GetRegionMaxSize()) ||
region.GetApproximateKeys() >= int64(r.storeManager.GetStoreConfig().GetRegionMaxKeys()),
UndersizedRegion: region.GetApproximateSize() < int64(r.opt.GetScheduleConfig().MaxMergeRegionSize) &&
region.GetApproximateSize() < int64(r.opt.GetScheduleConfig().MaxMergeRegionKeys),
}

for typ, c := range conditions {
if c {
if isRemoving {
if isRemoving && typ < EmptyRegion {
r.offlineStats[typ][regionID] = region
offlinePeerTypeIndex |= typ
}
Expand Down Expand Up @@ -232,13 +241,14 @@ func (r *RegionStatistics) Collect() {
regionStatusGauge.WithLabelValues("pending-peer-region-count").Set(float64(len(r.stats[PendingPeer])))
regionStatusGauge.WithLabelValues("learner-peer-region-count").Set(float64(len(r.stats[LearnerPeer])))
regionStatusGauge.WithLabelValues("empty-region-count").Set(float64(len(r.stats[EmptyRegion])))
regionStatusGauge.WithLabelValues("oversized-region-count").Set(float64(len(r.stats[OversizedRegion])))
regionStatusGauge.WithLabelValues("undersized-region-count").Set(float64(len(r.stats[UndersizedRegion])))

offlineRegionStatusGauge.WithLabelValues("miss-peer-region-count").Set(float64(len(r.offlineStats[MissPeer])))
offlineRegionStatusGauge.WithLabelValues("extra-peer-region-count").Set(float64(len(r.offlineStats[ExtraPeer])))
offlineRegionStatusGauge.WithLabelValues("down-peer-region-count").Set(float64(len(r.offlineStats[DownPeer])))
offlineRegionStatusGauge.WithLabelValues("pending-peer-region-count").Set(float64(len(r.offlineStats[PendingPeer])))
offlineRegionStatusGauge.WithLabelValues("learner-peer-region-count").Set(float64(len(r.offlineStats[LearnerPeer])))
offlineRegionStatusGauge.WithLabelValues("empty-region-count").Set(float64(len(r.offlineStats[EmptyRegion])))
offlineRegionStatusGauge.WithLabelValues("offline-peer-region-count").Set(float64(len(r.offlineStats[OfflinePeer])))
}

Expand Down
12 changes: 7 additions & 5 deletions server/statistics/region_collection_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,14 @@ func (t *testRegionStatisticsSuite) TestRegionStatistics(c *C) {
r2 := &metapb.Region{Id: 2, Peers: peers[0:2], StartKey: []byte("cc"), EndKey: []byte("dd")}
region1 := core.NewRegionInfo(r1, peers[0])
region2 := core.NewRegionInfo(r2, peers[0])
regionStats := NewRegionStatistics(opt, t.manager)
regionStats := NewRegionStatistics(opt, t.manager, nil)
regionStats.Observe(region1, stores)
c.Assert(regionStats.stats[ExtraPeer], HasLen, 1)
c.Assert(regionStats.stats[LearnerPeer], HasLen, 1)
c.Assert(regionStats.stats[EmptyRegion], HasLen, 1)
c.Assert(regionStats.stats[UndersizedRegion], HasLen, 1)
c.Assert(regionStats.offlineStats[ExtraPeer], HasLen, 1)
c.Assert(regionStats.offlineStats[LearnerPeer], HasLen, 1)
c.Assert(regionStats.offlineStats[EmptyRegion], HasLen, 1)
c.Assert(regionStats.offlineStats[OfflinePeer], HasLen, 1)

region1 = region1.Clone(
core.WithDownPeers(downPeers),
Expand All @@ -102,12 +101,13 @@ func (t *testRegionStatisticsSuite) TestRegionStatistics(c *C) {
c.Assert(regionStats.stats[PendingPeer], HasLen, 1)
c.Assert(regionStats.stats[LearnerPeer], HasLen, 1)
c.Assert(regionStats.stats[EmptyRegion], HasLen, 0)
c.Assert(regionStats.stats[OversizedRegion], HasLen, 1)
c.Assert(regionStats.stats[UndersizedRegion], HasLen, 0)
c.Assert(regionStats.offlineStats[ExtraPeer], HasLen, 1)
c.Assert(regionStats.offlineStats[MissPeer], HasLen, 0)
c.Assert(regionStats.offlineStats[DownPeer], HasLen, 1)
c.Assert(regionStats.offlineStats[PendingPeer], HasLen, 1)
c.Assert(regionStats.offlineStats[LearnerPeer], HasLen, 1)
c.Assert(regionStats.offlineStats[EmptyRegion], HasLen, 0)
c.Assert(regionStats.offlineStats[OfflinePeer], HasLen, 1)

region2 = region2.Clone(core.WithDownPeers(downPeers[0:1]))
Expand All @@ -117,6 +117,8 @@ func (t *testRegionStatisticsSuite) TestRegionStatistics(c *C) {
c.Assert(regionStats.stats[DownPeer], HasLen, 2)
c.Assert(regionStats.stats[PendingPeer], HasLen, 1)
c.Assert(regionStats.stats[LearnerPeer], HasLen, 1)
c.Assert(regionStats.stats[OversizedRegion], HasLen, 1)
c.Assert(regionStats.stats[UndersizedRegion], HasLen, 1)
c.Assert(regionStats.offlineStats[ExtraPeer], HasLen, 1)
c.Assert(regionStats.offlineStats[MissPeer], HasLen, 0)
c.Assert(regionStats.offlineStats[DownPeer], HasLen, 1)
Expand Down Expand Up @@ -171,7 +173,7 @@ func (t *testRegionStatisticsSuite) TestRegionStatisticsWithPlacementRule(c *C)
region2 := core.NewRegionInfo(r2, peers[0])
region3 := core.NewRegionInfo(r3, peers[0])
region4 := core.NewRegionInfo(r4, peers[0])
regionStats := NewRegionStatistics(opt, t.manager)
regionStats := NewRegionStatistics(opt, t.manager, nil)
// r2 didn't match the rules
regionStats.Observe(region2, stores)
c.Assert(regionStats.stats[MissPeer], HasLen, 1)
Expand Down
10 changes: 8 additions & 2 deletions tests/pdctl/region/region_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,15 @@ func (s *regionTestSuite) TestRegion(c *C) {

downPeer := &metapb.Peer{Id: 8, StoreId: 3}
r1 := pdctl.MustPutRegion(c, cluster, 1, 1, []byte("a"), []byte("b"),
core.SetWrittenBytes(1000), core.SetReadBytes(1000), core.SetRegionConfVer(1), core.SetRegionVersion(1), core.SetApproximateSize(10),
core.SetWrittenBytes(1000), core.SetReadBytes(1000), core.SetRegionConfVer(1), core.SetRegionVersion(1), core.SetApproximateSize(1),
core.SetPeers([]*metapb.Peer{
{Id: 1, StoreId: 1},
{Id: 5, StoreId: 2},
{Id: 6, StoreId: 3},
{Id: 7, StoreId: 4},
}))
r2 := pdctl.MustPutRegion(c, cluster, 2, 1, []byte("b"), []byte("c"),
core.SetWrittenBytes(2000), core.SetReadBytes(0), core.SetRegionConfVer(2), core.SetRegionVersion(3), core.SetApproximateSize(20))
core.SetWrittenBytes(2000), core.SetReadBytes(0), core.SetRegionConfVer(2), core.SetRegionVersion(3), core.SetApproximateSize(144))
r3 := pdctl.MustPutRegion(c, cluster, 3, 1, []byte("c"), []byte("d"),
core.SetWrittenBytes(500), core.SetReadBytes(800), core.SetRegionConfVer(3), core.SetRegionVersion(2), core.SetApproximateSize(30),
core.WithDownPeers([]*pdpb.PeerStats{{Peer: downPeer, DownSeconds: 3600}}),
Expand Down Expand Up @@ -144,6 +144,12 @@ func (s *regionTestSuite) TestRegion(c *C) {
{[]string{"region", "check", "down-peer"}, []*core.RegionInfo{r3}},
// region check learner-peer command
{[]string{"region", "check", "learner-peer"}, []*core.RegionInfo{r3}},
// region check empty-region command
{[]string{"region", "check", "empty-region"}, []*core.RegionInfo{r1}},
// region check undersized-region command
{[]string{"region", "check", "undersized-region"}, []*core.RegionInfo{r1, r4}},
// region check oversized-region command
{[]string{"region", "check", "oversized-region"}, []*core.RegionInfo{r2}},
// region keys --format=raw <start_key> <end_key> <limit> command
{[]string{"region", "keys", "--format=raw", "b"}, []*core.RegionInfo{r2, r3, r4}},
// region keys --format=raw <start_key> <end_key> <limit> command
Expand Down
2 changes: 1 addition & 1 deletion tools/pd-ctl/pdctl/command/region_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ func showRegionsByKeysCommandFunc(cmd *cobra.Command, args []string) {
// NewRegionWithCheckCommand returns a region with check subcommand of regionCmd
func NewRegionWithCheckCommand() *cobra.Command {
r := &cobra.Command{
Use: "check [miss-peer|extra-peer|down-peer|learner-peer|pending-peer|offline-peer|empty-region|hist-size|hist-keys]",
Use: "check [miss-peer|extra-peer|down-peer|learner-peer|pending-peer|offline-peer|empty-region|oversized-region|undersized-region|hist-size|hist-keys]",
Short: "show the region with check specific status",
Run: showRegionWithCheckCommandFunc,
}
Expand Down

0 comments on commit 56a015c

Please sign in to comment.