From ebca7ba716477d107f218c51f6defdf461413fb1 Mon Sep 17 00:00:00 2001 From: Rustin Liu Date: Sat, 25 Nov 2023 00:19:12 +0800 Subject: [PATCH] statistics: do not directly update global stats when dropping a partition (#48846) ref pingcap/tidb#48182 --- pkg/statistics/handle/ddl/ddl.go | 12 +- pkg/statistics/handle/globalstats/BUILD.bazel | 1 - .../handle/globalstats/global_stats.go | 111 ------------------ .../handle/globalstats/globalstats_test.go | 16 +-- pkg/statistics/handle/types/interfaces.go | 3 - 5 files changed, 5 insertions(+), 138 deletions(-) diff --git a/pkg/statistics/handle/ddl/ddl.go b/pkg/statistics/handle/ddl/ddl.go index e4657614a5219..6fcef63be23dc 100644 --- a/pkg/statistics/handle/ddl/ddl.go +++ b/pkg/statistics/handle/ddl/ddl.go @@ -116,16 +116,8 @@ func (h *ddlHandlerImpl) HandleDDLEvent(t *util.DDLEvent) error { } } case model.ActionDropTablePartition: - pruneMode, err := util.GetCurrentPruneMode(h.statsHandler.SPool()) - if err != nil { - return err - } - globalTableInfo, droppedPartitionInfo := t.GetDropPartitionInfo() - if variable.PartitionPruneMode(pruneMode) == variable.Dynamic && droppedPartitionInfo != nil { - if err := h.globalStatsHandler.UpdateGlobalStats(globalTableInfo); err != nil { - return err - } - } + // TODO: Update the modify count and count for the global table. + _, droppedPartitionInfo := t.GetDropPartitionInfo() for _, def := range droppedPartitionInfo.Definitions { if err := h.statsWriter.ResetTableStats2KVForDrop(def.ID); err != nil { return err diff --git a/pkg/statistics/handle/globalstats/BUILD.bazel b/pkg/statistics/handle/globalstats/BUILD.bazel index 81f237df5ae2c..c629c1c5c1f05 100644 --- a/pkg/statistics/handle/globalstats/BUILD.bazel +++ b/pkg/statistics/handle/globalstats/BUILD.bazel @@ -16,7 +16,6 @@ go_library( "//pkg/parser/model", "//pkg/sessionctx", "//pkg/sessionctx/stmtctx", - "//pkg/sessiontxn", "//pkg/statistics", "//pkg/statistics/handle/logutil", "//pkg/statistics/handle/storage", diff --git a/pkg/statistics/handle/globalstats/global_stats.go b/pkg/statistics/handle/globalstats/global_stats.go index af2eaf52d745f..0d6e52cd06968 100644 --- a/pkg/statistics/handle/globalstats/global_stats.go +++ b/pkg/statistics/handle/globalstats/global_stats.go @@ -22,10 +22,8 @@ import ( "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/parser/model" "github.com/pingcap/tidb/pkg/sessionctx" - "github.com/pingcap/tidb/pkg/sessiontxn" "github.com/pingcap/tidb/pkg/statistics" statstypes "github.com/pingcap/tidb/pkg/statistics/handle/types" - "github.com/pingcap/tidb/pkg/statistics/handle/util" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/logutil" "github.com/tiancaiamao/gp" @@ -57,14 +55,6 @@ func (sg *statsGlobalImpl) MergePartitionStats2GlobalStatsByTableID(sc sessionct return MergePartitionStats2GlobalStatsByTableID(sc, sg.statsHandler, opts, is, physicalID, isIndex, histIDs) } -// UpdateGlobalStats will trigger the merge of global-stats when we drop table partition -func (sg *statsGlobalImpl) UpdateGlobalStats(tblInfo *model.TableInfo) error { - // We need to merge the partition-level stats to global-stats when we drop table partition in dynamic mode. - return util.CallWithSCtx(sg.statsHandler.SPool(), func(sctx sessionctx.Context) error { - return UpdateGlobalStats(sctx, sg.statsHandler, tblInfo) - }) -} - // GlobalStats is used to store the statistics contained in the global-level stats // which is generated by the merge of partition-level stats. // It will both store the column stats and index stats. @@ -170,107 +160,6 @@ var analyzeOptionDefault = map[ast.AnalyzeOptionType]uint64{ ast.AnalyzeOptNumTopN: 20, } -// UpdateGlobalStats update the global-level stats based on the partition-level stats. -func UpdateGlobalStats( - sctx sessionctx.Context, - statsHandle statstypes.StatsHandle, - tblInfo *model.TableInfo) error { - tableID := tblInfo.ID - is := sessiontxn.GetTxnManager(sctx).GetTxnInfoSchema() - globalStats, err := statsHandle.TableStatsFromStorage(tblInfo, tableID, true, 0) - if err != nil { - return err - } - // If we do not currently have global-stats, no new global-stats will be generated. - if globalStats == nil { - return nil - } - opts := make(map[ast.AnalyzeOptionType]uint64, len(analyzeOptionDefault)) - for key, val := range analyzeOptionDefault { - opts[key] = val - } - // Use current global-stats related information to construct the opts for `MergePartitionStats2GlobalStats` function. - globalColStatsTopNNum, globalColStatsBucketNum := 0, 0 - for colID := range globalStats.Columns { - globalColStatsTopN := globalStats.Columns[colID].TopN - if globalColStatsTopN != nil && len(globalColStatsTopN.TopN) > globalColStatsTopNNum { - globalColStatsTopNNum = len(globalColStatsTopN.TopN) - } - globalColStats := globalStats.Columns[colID] - if globalColStats != nil && len(globalColStats.Buckets) > globalColStatsBucketNum { - globalColStatsBucketNum = len(globalColStats.Buckets) - } - } - if globalColStatsTopNNum != 0 { - opts[ast.AnalyzeOptNumTopN] = uint64(globalColStatsTopNNum) - } - if globalColStatsBucketNum != 0 { - opts[ast.AnalyzeOptNumBuckets] = uint64(globalColStatsBucketNum) - } - // Generate the new column global-stats - newColGlobalStats, err := MergePartitionStats2GlobalStats(sctx, statsHandle, opts, is, tblInfo, false, nil) - if err != nil { - return err - } - if len(newColGlobalStats.MissingPartitionStats) > 0 { - logutil.BgLogger().Warn("missing partition stats when merging global stats", zap.String("table", tblInfo.Name.L), - zap.String("item", "columns"), zap.Strings("missing", newColGlobalStats.MissingPartitionStats)) - } - for i := 0; i < newColGlobalStats.Num; i++ { - hg, cms, topN := newColGlobalStats.Hg[i], newColGlobalStats.Cms[i], newColGlobalStats.TopN[i] - if hg == nil { - // All partitions have no stats so global stats are not created. - continue - } - // fms for global stats doesn't need to dump to kv. - err = statsHandle.SaveStatsToStorage(tableID, newColGlobalStats.Count, newColGlobalStats.ModifyCount, - 0, hg, cms, topN, 2, 1, false, util.StatsMetaHistorySourceSchemaChange) - if err != nil { - return err - } - } - - // Generate the new index global-stats - globalIdxStatsTopNNum, globalIdxStatsBucketNum := 0, 0 - for _, idx := range tblInfo.Indices { - globalIdxStatsTopN := globalStats.Indices[idx.ID].TopN - if globalIdxStatsTopN != nil && len(globalIdxStatsTopN.TopN) > globalIdxStatsTopNNum { - globalIdxStatsTopNNum = len(globalIdxStatsTopN.TopN) - } - globalIdxStats := globalStats.Indices[idx.ID] - if globalIdxStats != nil && len(globalIdxStats.Buckets) > globalIdxStatsBucketNum { - globalIdxStatsBucketNum = len(globalIdxStats.Buckets) - } - if globalIdxStatsTopNNum != 0 { - opts[ast.AnalyzeOptNumTopN] = uint64(globalIdxStatsTopNNum) - } - if globalIdxStatsBucketNum != 0 { - opts[ast.AnalyzeOptNumBuckets] = uint64(globalIdxStatsBucketNum) - } - newIndexGlobalStats, err := MergePartitionStats2GlobalStats(sctx, statsHandle, opts, is, tblInfo, true, []int64{idx.ID}) - if err != nil { - return err - } - if len(newIndexGlobalStats.MissingPartitionStats) > 0 { - logutil.BgLogger().Warn("missing partition stats when merging global stats", zap.String("table", tblInfo.Name.L), - zap.String("item", "index "+idx.Name.L), zap.Strings("missing", newIndexGlobalStats.MissingPartitionStats)) - } - for i := 0; i < newIndexGlobalStats.Num; i++ { - hg, cms, topN := newIndexGlobalStats.Hg[i], newIndexGlobalStats.Cms[i], newIndexGlobalStats.TopN[i] - if hg == nil { - // All partitions have no stats so global stats are not created. - continue - } - // fms for global stats doesn't need to dump to kv. - err = statsHandle.SaveStatsToStorage(tableID, newIndexGlobalStats.Count, newIndexGlobalStats.ModifyCount, 1, hg, cms, topN, 2, 1, false, util.StatsMetaHistorySourceSchemaChange) - if err != nil { - return err - } - } - } - return nil -} - // blockingMergePartitionStats2GlobalStats merge the partition-level stats to global-level stats based on the tableInfo. // It is the old algorithm to merge partition-level stats to global-level stats. It will happen the OOM. because it will load all the partition-level stats into memory. func blockingMergePartitionStats2GlobalStats( diff --git a/pkg/statistics/handle/globalstats/globalstats_test.go b/pkg/statistics/handle/globalstats/globalstats_test.go index a996d8b689d1b..1d58856c32484 100644 --- a/pkg/statistics/handle/globalstats/globalstats_test.go +++ b/pkg/statistics/handle/globalstats/globalstats_test.go @@ -903,16 +903,6 @@ func TestDDLPartition4GlobalStats(t *testing.T) { globalStats := h.GetTableStats(tableInfo) require.Equal(t, int64(15), globalStats.RealtimeCount) - tk.MustExec("alter table t drop partition p3, p5;") - require.NoError(t, h.DumpStatsDeltaToKV(true)) - require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh())) - require.NoError(t, h.Update(is)) - result = tk.MustQuery("show stats_meta where table_name = 't';").Rows() - require.Len(t, result, 5) - // The value of global.count will be updated automatically after we drop the table partition. - globalStats = h.GetTableStats(tableInfo) - require.Equal(t, int64(11), globalStats.RealtimeCount) - tk.MustExec("alter table t truncate partition p2, p4;") require.NoError(t, h.DumpStatsDeltaToKV(true)) require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh())) @@ -920,15 +910,15 @@ func TestDDLPartition4GlobalStats(t *testing.T) { // The value of global.count will not be updated automatically when we truncate the table partition. // Because the partition-stats in the partition table which have been truncated has not been updated. globalStats = h.GetTableStats(tableInfo) - require.Equal(t, int64(11), globalStats.RealtimeCount) + require.Equal(t, int64(15), globalStats.RealtimeCount) tk.MustExec("analyze table t;") result = tk.MustQuery("show stats_meta where table_name = 't';").Rows() // The truncate operation only delete the data from the partition p2 and p4. It will not delete the partition-stats. - require.Len(t, result, 5) + require.Len(t, result, 7) // The result for the globalStats.count will be right now globalStats = h.GetTableStats(tableInfo) - require.Equal(t, int64(7), globalStats.RealtimeCount) + require.Equal(t, int64(11), globalStats.RealtimeCount) } func TestGlobalStatsNDV(t *testing.T) { diff --git a/pkg/statistics/handle/types/interfaces.go b/pkg/statistics/handle/types/interfaces.go index c65de0f68802d..1bec477a7b24f 100644 --- a/pkg/statistics/handle/types/interfaces.go +++ b/pkg/statistics/handle/types/interfaces.go @@ -348,9 +348,6 @@ type StatsGlobal interface { isIndex bool, histIDs []int64, ) (globalStats interface{}, err error) - - // UpdateGlobalStats will trigger the merge of global-stats when we drop table partition - UpdateGlobalStats(tblInfo *model.TableInfo) error } // DDL is used to handle ddl events.