Skip to content

Commit

Permalink
gc_worker: Fix redundantly delete placement rules after dropping data…
Browse files Browse the repository at this point in the history
…base (#33082)

close #33069
  • Loading branch information
MyonKeminta authored Mar 16, 2022
1 parent 93d44e2 commit cce729e
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 5 deletions.
22 changes: 18 additions & 4 deletions store/gcworker/gc_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,9 @@ func (w *GCWorker) deleteRanges(ctx context.Context, safePoint uint64, concurren
return errors.Trace(err)
}

// Cache table ids on which placement rules have been GC-ed, to avoid redundantly GC the same table id multiple times.
gcPlacementRuleCache := make(map[int64]interface{}, len(ranges))

logutil.Logger(ctx).Info("[gc worker] start delete ranges",
zap.String("uuid", w.uuid),
zap.Int("ranges", len(ranges)))
Expand Down Expand Up @@ -730,7 +733,7 @@ func (w *GCWorker) deleteRanges(ctx context.Context, safePoint uint64, concurren
metrics.GCUnsafeDestroyRangeFailuresCounterVec.WithLabelValues("save").Inc()
}

if err := w.doGCPlacementRules(r); err != nil {
if err := w.doGCPlacementRules(safePoint, r, gcPlacementRuleCache); err != nil {
logutil.Logger(ctx).Error("[gc worker] gc placement rules failed on range",
zap.String("uuid", w.uuid),
zap.Int64("jobID", r.JobID),
Expand Down Expand Up @@ -1862,7 +1865,7 @@ func (w *GCWorker) saveValueToSysTable(key, value string) error {
// GC placement rules when the partitions are removed by the GC worker.
// Placement rules cannot be removed immediately after drop table / truncate table,
// because the tables can be flashed back or recovered.
func (w *GCWorker) doGCPlacementRules(dr util.DelRangeTask) (err error) {
func (w *GCWorker) doGCPlacementRules(safePoint uint64, dr util.DelRangeTask, gcPlacementRuleCache map[int64]interface{}) (err error) {
// Get the job from the job history
var historyJob *model.Job
failpoint.Inject("mockHistoryJobForGC", func(v failpoint.Value) {
Expand All @@ -1873,6 +1876,7 @@ func (w *GCWorker) doGCPlacementRules(dr util.DelRangeTask) (err error) {
historyJob = &model.Job{
ID: dr.JobID,
Type: model.ActionDropTable,
TableID: int64(v.(int)),
RawArgs: args,
}
})
Expand Down Expand Up @@ -1916,12 +1920,22 @@ func (w *GCWorker) doGCPlacementRules(dr util.DelRangeTask) (err error) {
}

for _, id := range physicalTableIDs {
// Skip table ids that's already successfully deleted.
if _, ok := gcPlacementRuleCache[id]; ok {
continue
}
// Delete pd rule
logutil.BgLogger().Info("try delete TiFlash pd rule", zap.Int64("tableID", id), zap.String("endKey", string(dr.EndKey)))
failpoint.Inject("gcDeletePlacementRuleCounter", func() {})
logutil.BgLogger().Info("try delete TiFlash pd rule",
zap.Int64("tableID", id), zap.String("endKey", string(dr.EndKey)), zap.Uint64("safePoint", safePoint))
ruleID := fmt.Sprintf("table-%v-r", id)
if err := infosync.DeleteTiFlashPlacementRule(context.Background(), "tiflash", ruleID); err != nil {
// If DeletePlacementRule fails here, the rule will be deleted in `HandlePlacementRuleRoutine`.
logutil.BgLogger().Error("delete TiFlash pd rule failed when gc", zap.Error(err), zap.String("ruleID", ruleID))
logutil.BgLogger().Error("delete TiFlash pd rule failed when gc",
zap.Error(err), zap.String("ruleID", ruleID), zap.Uint64("safePoint", safePoint))
} else {
// Cache the table id if its related rule are deleted successfully.
gcPlacementRuleCache[id] = struct{}{}
}
}
return infosync.PutRuleBundlesWithDefaultRetry(context.TODO(), bundles)
Expand Down
20 changes: 19 additions & 1 deletion store/gcworker/gc_worker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1655,6 +1655,16 @@ func TestGCPlacementRules(t *testing.T) {
require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/store/gcworker/mockHistoryJobForGC"))
}()

gcPlacementRuleCache := make(map[int64]interface{})
deletePlacementRuleCounter := 0
require.NoError(t, failpoint.EnableWith("github.com/pingcap/tidb/store/gcworker/gcDeletePlacementRuleCounter", "return", func() error {
deletePlacementRuleCounter++
return nil
}))
defer func() {
require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/store/gcworker/gcDeletePlacementRuleCounter"))
}()

bundleID := "TiDB_DDL_10"
bundle, err := placement.NewBundleFromOptions(&model.PlacementSettings{
PrimaryRegion: "r1",
Expand All @@ -1672,14 +1682,22 @@ func TestGCPlacementRules(t *testing.T) {

// do gc
dr := util.DelRangeTask{JobID: 1, ElementID: 10}
err = s.gcWorker.doGCPlacementRules(dr)
err = s.gcWorker.doGCPlacementRules(1, dr, gcPlacementRuleCache)
require.NoError(t, err)
require.Equal(t, map[int64]interface{}{10: struct{}{}}, gcPlacementRuleCache)
require.Equal(t, 1, deletePlacementRuleCounter)

// check bundle deleted after gc
got, err = infosync.GetRuleBundle(context.Background(), bundleID)
require.NoError(t, err)
require.NotNil(t, got)
require.True(t, got.IsEmpty())

// gc the same table id repeatedly
err = s.gcWorker.doGCPlacementRules(1, dr, gcPlacementRuleCache)
require.NoError(t, err)
require.Equal(t, map[int64]interface{}{10: struct{}{}}, gcPlacementRuleCache)
require.Equal(t, 1, deletePlacementRuleCounter)
}

func TestGCLabelRules(t *testing.T) {
Expand Down

0 comments on commit cce729e

Please sign in to comment.