Skip to content

Commit be1faf6

Browse files
authored
Merge branch 'master' into master
2 parents 1d334a3 + b267249 commit be1faf6

7 files changed

+80
-13
lines changed

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ require (
3434
github.com/pingcap/kvproto v0.0.0-20211109071446-a8b4d34474bc
3535
github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7
3636
github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d
37-
github.com/pingcap/tidb-dashboard v0.0.0-20211107164327-80363dfbe884
37+
github.com/pingcap/tidb-dashboard v0.0.0-20211206031355-bcc43a01d537
3838
github.com/prometheus/client_golang v1.1.0
3939
github.com/prometheus/common v0.6.0
4040
github.com/sasha-s/go-deadlock v0.2.0

go.sum

+2-2
Original file line numberDiff line numberDiff line change
@@ -341,8 +341,8 @@ github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuR
341341
github.com/pingcap/sysutil v0.0.0-20210315073920-cc0985d983a3/go.mod h1:tckvA041UWP+NqYzrJ3fMgC/Hw9wnmQ/tUkp/JaHly8=
342342
github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d h1:k3/APKZjXOyJrFy8VyYwRlZhMelpD3qBLJNsw3bPl/g=
343343
github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d/go.mod h1:7j18ezaWTao2LHOyMlsc2Dg1vW+mDY9dEbPzVyOlaeM=
344-
github.com/pingcap/tidb-dashboard v0.0.0-20211107164327-80363dfbe884 h1:6/yOhY2X0oNidvVK1PdjVoSThfUQ+GK/1UaHeXZfnrM=
345-
github.com/pingcap/tidb-dashboard v0.0.0-20211107164327-80363dfbe884/go.mod h1:OCXbZTBTIMRcIt0jFsuCakZP+goYRv6IjawKbwLS2TQ=
344+
github.com/pingcap/tidb-dashboard v0.0.0-20211206031355-bcc43a01d537 h1:qoKNQJY2hQ9/+q+aD6SxrNX5cZ8A9XZyw6rQg7MbwlE=
345+
github.com/pingcap/tidb-dashboard v0.0.0-20211206031355-bcc43a01d537/go.mod h1:OCXbZTBTIMRcIt0jFsuCakZP+goYRv6IjawKbwLS2TQ=
346346
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
347347
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
348348
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=

server/cluster/cluster.go

+10-7
Original file line numberDiff line numberDiff line change
@@ -1077,10 +1077,9 @@ func (c *RaftCluster) RemoveStore(storeID uint64, physicallyDestroyed bool) erro
10771077
return err
10781078
}
10791079

1080-
// buryStore marks a store as tombstone in cluster.
1081-
// The store should be empty before calling this func
1082-
// State transition: Offline -> Tombstone.
1083-
func (c *RaftCluster) buryStore(storeID uint64) error {
1080+
// BuryStore marks a store as tombstone in cluster.
1081+
// If forceBury is false, the store should be offlined and emptied before calling this func.
1082+
func (c *RaftCluster) BuryStore(storeID uint64, forceBury bool) error {
10841083
c.Lock()
10851084
defer c.Unlock()
10861085

@@ -1095,7 +1094,11 @@ func (c *RaftCluster) buryStore(storeID uint64) error {
10951094
}
10961095

10971096
if store.IsUp() {
1098-
return errs.ErrStoreIsUp.FastGenByArgs()
1097+
if !forceBury {
1098+
return errs.ErrStoreIsUp.FastGenByArgs()
1099+
} else if !store.IsDisconnected() {
1100+
return errors.Errorf("The store %v is not offline nor disconnected", storeID)
1101+
}
10991102
}
11001103

11011104
newStore := store.Clone(core.TombstoneStore())
@@ -1219,7 +1222,7 @@ func (c *RaftCluster) checkStores() {
12191222
// If the store is empty, it can be buried.
12201223
regionCount := c.core.GetStoreRegionCount(offlineStore.GetId())
12211224
if regionCount == 0 {
1222-
if err := c.buryStore(offlineStore.GetId()); err != nil {
1225+
if err := c.BuryStore(offlineStore.GetId(), false); err != nil {
12231226
log.Error("bury store failed",
12241227
zap.Stringer("store", offlineStore),
12251228
errs.ZapError(err))
@@ -1248,7 +1251,7 @@ func (c *RaftCluster) RemoveTombStoneRecords() error {
12481251

12491252
for _, store := range c.GetStores() {
12501253
if store.IsTombstone() {
1251-
if store.GetRegionCount() > 0 {
1254+
if c.core.GetStoreRegionCount(store.GetID()) > 0 {
12521255
log.Warn("skip removing tombstone", zap.Stringer("store", store.GetMeta()))
12531256
continue
12541257
}

server/cluster/cluster_test.go

+18-3
Original file line numberDiff line numberDiff line change
@@ -250,13 +250,28 @@ func (s *testClusterInfoSuite) TestSetOfflineStore(c *C) {
250250
for storeID := uint64(0); storeID <= 4; storeID++ {
251251
store := cluster.GetStore(storeID)
252252
if store == nil || store.IsUp() {
253-
c.Assert(cluster.buryStore(storeID), NotNil)
253+
c.Assert(cluster.BuryStore(storeID, false), NotNil)
254254
} else {
255-
c.Assert(cluster.buryStore(storeID), IsNil)
255+
c.Assert(cluster.BuryStore(storeID, false), IsNil)
256256
}
257257
}
258258
}
259259

260+
func (s *testClusterInfoSuite) TestForceBuryStore(c *C) {
261+
_, opt, err := newTestScheduleConfig()
262+
c.Assert(err, IsNil)
263+
cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, core.NewStorage(kv.NewMemoryKV()), core.NewBasicCluster())
264+
// Put 2 stores.
265+
stores := newTestStores(2, "5.3.0")
266+
stores[1] = stores[1].Clone(core.SetLastHeartbeatTS(time.Now()))
267+
for _, store := range stores {
268+
c.Assert(cluster.PutStore(store.GetMeta()), IsNil)
269+
}
270+
c.Assert(cluster.BuryStore(uint64(1), true), IsNil)
271+
c.Assert(cluster.BuryStore(uint64(2), true), NotNil)
272+
c.Assert(errors.ErrorEqual(cluster.BuryStore(uint64(3), true), errs.ErrStoreNotFound.FastGenByArgs(uint64(3))), IsTrue)
273+
}
274+
260275
func (s *testClusterInfoSuite) TestReuseAddress(c *C) {
261276
_, opt, err := newTestScheduleConfig()
262277
c.Assert(err, IsNil)
@@ -272,7 +287,7 @@ func (s *testClusterInfoSuite) TestReuseAddress(c *C) {
272287
c.Assert(cluster.RemoveStore(3, true), IsNil)
273288
// store 4: tombstone
274289
c.Assert(cluster.RemoveStore(4, true), IsNil)
275-
c.Assert(cluster.buryStore(4), IsNil)
290+
c.Assert(cluster.BuryStore(4, false), IsNil)
276291

277292
for id := uint64(1); id <= 4; id++ {
278293
storeInfo := cluster.GetStore(id)

server/cluster/unsafe_recovery_controller.go

+13
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"github.com/pingcap/kvproto/pkg/metapb"
2828
"github.com/pingcap/kvproto/pkg/pdpb"
2929
"github.com/pingcap/log"
30+
"github.com/tikv/pd/pkg/errs"
3031
"github.com/tikv/pd/server/core"
3132
"go.uber.org/zap"
3233
)
@@ -79,6 +80,18 @@ func (u *unsafeRecoveryController) RemoveFailedStores(failedStores map[uint64]st
7980
return errors.Errorf("Another request is working in progress")
8081
}
8182
u.reset()
83+
for failedStore := range failedStores {
84+
store := u.cluster.GetStore(failedStore)
85+
if store != nil && store.IsUp() && !store.IsDisconnected() {
86+
return errors.Errorf("Store %v is up and connected", failedStore)
87+
}
88+
}
89+
for failedStore := range failedStores {
90+
err := u.cluster.BuryStore(failedStore, true)
91+
if !errors.ErrorEqual(err, errs.ErrStoreNotFound.FastGenByArgs(failedStore)) {
92+
return err
93+
}
94+
}
8295
u.failedStores = failedStores
8396
for _, s := range u.cluster.GetStores() {
8497
if s.IsTombstone() || s.IsPhysicallyDestroyed() || core.IsStoreContainLabel(s.GetMeta(), core.EngineKey, core.EngineTiFlash) {

server/cluster/unsafe_recovery_controller_test.go

+26
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ package cluster
1717
import (
1818
"bytes"
1919
"context"
20+
"time"
2021

2122
. "github.com/pingcap/check"
2223
"github.com/pingcap/kvproto/pkg/metapb"
@@ -617,3 +618,28 @@ func (s *testUnsafeRecoverSuite) TestPlanExecution(c *C) {
617618
c.Assert(recoveryController.numStoresPlanExecuted, Equals, 2)
618619
c.Assert(recoveryController.stage, Equals, finished)
619620
}
621+
622+
func (s *testUnsafeRecoverSuite) TestRemoveFailedStores(c *C) {
623+
_, opt, _ := newTestScheduleConfig()
624+
cluster := newTestRaftCluster(s.ctx, mockid.NewIDAllocator(), opt, core.NewStorage(kv.NewMemoryKV()), core.NewBasicCluster())
625+
stores := newTestStores(2, "5.3.0")
626+
stores[1] = stores[1].Clone(core.SetLastHeartbeatTS(time.Now()))
627+
for _, store := range stores {
628+
c.Assert(cluster.PutStore(store.GetMeta()), IsNil)
629+
}
630+
recoveryController := newUnsafeRecoveryController(cluster)
631+
failedStores := map[uint64]string{
632+
1: "",
633+
3: "",
634+
}
635+
636+
c.Assert(recoveryController.RemoveFailedStores(failedStores), IsNil)
637+
c.Assert(cluster.GetStore(uint64(1)).IsTombstone(), IsTrue)
638+
639+
// Store 2's last heartbeat is recent, and is not allowed to be removed.
640+
failedStores = map[uint64]string{
641+
2: "",
642+
}
643+
644+
c.Assert(recoveryController.RemoveFailedStores(failedStores), NotNil)
645+
}

server/handler.go

+10
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,8 @@ func (h *Handler) AddScheduler(name string, args ...string) error {
217217
log.Error("can not add scheduler", zap.String("scheduler-name", s.GetName()), zap.Strings("scheduler-args", args), errs.ZapError(err))
218218
} else if err = h.opt.Persist(c.GetStorage()); err != nil {
219219
log.Error("can not persist scheduler config", errs.ZapError(err))
220+
} else {
221+
log.Info("add scheduler successfully", zap.String("scheduler-name", name), zap.Strings("scheduler-args", args))
220222
}
221223
return err
222224
}
@@ -229,6 +231,8 @@ func (h *Handler) RemoveScheduler(name string) error {
229231
}
230232
if err = c.RemoveScheduler(name); err != nil {
231233
log.Error("can not remove scheduler", zap.String("scheduler-name", name), errs.ZapError(err))
234+
} else {
235+
log.Info("remove scheduler successfully", zap.String("scheduler-name", name))
232236
}
233237
return err
234238
}
@@ -247,6 +251,12 @@ func (h *Handler) PauseOrResumeScheduler(name string, t int64) error {
247251
} else {
248252
log.Error("can not pause scheduler", zap.String("scheduler-name", name), errs.ZapError(err))
249253
}
254+
} else {
255+
if t == 0 {
256+
log.Info("resume scheduler successfully", zap.String("scheduler-name", name))
257+
} else {
258+
log.Info("pause scheduler successfully", zap.String("scheduler-name", name), zap.Int64("pause-seconds", t))
259+
}
250260
}
251261
return err
252262
}

0 commit comments

Comments
 (0)