diff --git a/pkg/executor/test/analyzetest/BUILD.bazel b/pkg/executor/test/analyzetest/BUILD.bazel index f3504f59f6e80..e456d85511a68 100644 --- a/pkg/executor/test/analyzetest/BUILD.bazel +++ b/pkg/executor/test/analyzetest/BUILD.bazel @@ -26,7 +26,10 @@ go_test( "//pkg/sessionctx", "//pkg/sessionctx/variable", "//pkg/statistics", +<<<<<<< HEAD "//pkg/statistics/handle/autoanalyze", +======= +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) "//pkg/testkit", "//pkg/util/dbterror/exeerrors", "@com_github_pingcap_errors//:errors", diff --git a/pkg/executor/test/analyzetest/analyze_test.go b/pkg/executor/test/analyzetest/analyze_test.go index 59de964d96ace..88b9f61371103 100644 --- a/pkg/executor/test/analyzetest/analyze_test.go +++ b/pkg/executor/test/analyzetest/analyze_test.go @@ -39,7 +39,10 @@ import ( "github.com/pingcap/tidb/pkg/sessionctx" "github.com/pingcap/tidb/pkg/sessionctx/variable" "github.com/pingcap/tidb/pkg/statistics" +<<<<<<< HEAD "github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze" +======= +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) "github.com/pingcap/tidb/pkg/testkit" "github.com/pingcap/tidb/pkg/util/dbterror/exeerrors" "github.com/stretchr/testify/require" @@ -743,11 +746,19 @@ func TestSavedAnalyzeOptions(t *testing.T) { tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_ratio = %v", originalVal2)) }() tk.MustExec("set global tidb_auto_analyze_ratio = 0.01") +<<<<<<< HEAD originalVal3 := autoanalyze.AutoAnalyzeMinCnt defer func() { autoanalyze.AutoAnalyzeMinCnt = originalVal3 }() autoanalyze.AutoAnalyzeMinCnt = 0 +======= + originalVal3 := statistics.AutoAnalyzeMinCnt + defer func() { + statistics.AutoAnalyzeMinCnt = originalVal3 + }() + statistics.AutoAnalyzeMinCnt = 0 +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) tk.MustExec("use test") tk.MustExec("set @@session.tidb_analyze_version = 2") @@ -1085,11 +1096,19 @@ func TestSavedAnalyzeColumnOptions(t *testing.T) { tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_ratio = %v", originalVal2)) }() tk.MustExec("set global tidb_auto_analyze_ratio = 0.01") +<<<<<<< HEAD originalVal3 := autoanalyze.AutoAnalyzeMinCnt defer func() { autoanalyze.AutoAnalyzeMinCnt = originalVal3 }() autoanalyze.AutoAnalyzeMinCnt = 0 +======= + originalVal3 := statistics.AutoAnalyzeMinCnt + defer func() { + statistics.AutoAnalyzeMinCnt = originalVal3 + }() + statistics.AutoAnalyzeMinCnt = 0 +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) originalVal4 := tk.MustQuery("select @@tidb_enable_column_tracking").Rows()[0][0].(string) defer func() { tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal4)) @@ -1966,9 +1985,15 @@ func testKillAutoAnalyze(t *testing.T, ver int) { tk := testkit.NewTestKit(t, store) oriStart := tk.MustQuery("select @@tidb_auto_analyze_start_time").Rows()[0][0].(string) oriEnd := tk.MustQuery("select @@tidb_auto_analyze_end_time").Rows()[0][0].(string) +<<<<<<< HEAD autoanalyze.AutoAnalyzeMinCnt = 0 defer func() { autoanalyze.AutoAnalyzeMinCnt = 1000 +======= + statistics.AutoAnalyzeMinCnt = 0 + defer func() { + statistics.AutoAnalyzeMinCnt = 1000 +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_start_time='%v'", oriStart)) tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_end_time='%v'", oriEnd)) }() @@ -2050,9 +2075,15 @@ func TestKillAutoAnalyzeIndex(t *testing.T) { tk := testkit.NewTestKit(t, store) oriStart := tk.MustQuery("select @@tidb_auto_analyze_start_time").Rows()[0][0].(string) oriEnd := tk.MustQuery("select @@tidb_auto_analyze_end_time").Rows()[0][0].(string) +<<<<<<< HEAD autoanalyze.AutoAnalyzeMinCnt = 0 defer func() { autoanalyze.AutoAnalyzeMinCnt = 1000 +======= + statistics.AutoAnalyzeMinCnt = 0 + defer func() { + statistics.AutoAnalyzeMinCnt = 1000 +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_start_time='%v'", oriStart)) tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_end_time='%v'", oriEnd)) }() @@ -2805,12 +2836,21 @@ func TestAutoAnalyzeAwareGlobalVariableChange(t *testing.T) { "3 0", )) +<<<<<<< HEAD originalVal1 := autoanalyze.AutoAnalyzeMinCnt originalVal2 := tk.MustQuery("select @@global.tidb_auto_analyze_ratio").Rows()[0][0].(string) autoanalyze.AutoAnalyzeMinCnt = 0 tk.MustExec("set global tidb_auto_analyze_ratio = 0.001") defer func() { autoanalyze.AutoAnalyzeMinCnt = originalVal1 +======= + originalVal1 := statistics.AutoAnalyzeMinCnt + originalVal2 := tk.MustQuery("select @@global.tidb_auto_analyze_ratio").Rows()[0][0].(string) + statistics.AutoAnalyzeMinCnt = 0 + tk.MustExec("set global tidb_auto_analyze_ratio = 0.001") + defer func() { + statistics.AutoAnalyzeMinCnt = originalVal1 +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_ratio = %v", originalVal2)) }() diff --git a/pkg/executor/test/analyzetest/memorycontrol/BUILD.bazel b/pkg/executor/test/analyzetest/memorycontrol/BUILD.bazel index bf8c263bb6016..adf5bc82f47f4 100644 --- a/pkg/executor/test/analyzetest/memorycontrol/BUILD.bazel +++ b/pkg/executor/test/analyzetest/memorycontrol/BUILD.bazel @@ -13,7 +13,11 @@ go_test( "//pkg/config", "//pkg/executor", "//pkg/sessionctx/variable", +<<<<<<< HEAD "//pkg/statistics/handle/autoanalyze", +======= + "//pkg/statistics", +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) "//pkg/testkit", "//pkg/util", "//pkg/util/memory", diff --git a/pkg/executor/test/analyzetest/memorycontrol/memory_control_test.go b/pkg/executor/test/analyzetest/memorycontrol/memory_control_test.go index a64e517e60d95..1d55edb74b3ca 100644 --- a/pkg/executor/test/analyzetest/memorycontrol/memory_control_test.go +++ b/pkg/executor/test/analyzetest/memorycontrol/memory_control_test.go @@ -22,7 +22,11 @@ import ( "github.com/pingcap/failpoint" "github.com/pingcap/tidb/pkg/executor" +<<<<<<< HEAD "github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze" +======= + "github.com/pingcap/tidb/pkg/statistics" +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) "github.com/pingcap/tidb/pkg/testkit" "github.com/pingcap/tidb/pkg/util" "github.com/pingcap/tidb/pkg/util/memory" @@ -144,12 +148,21 @@ func TestGlobalMemoryControlForAutoAnalyze(t *testing.T) { require.Len(t, rs0.Rows(), 0) h := dom.StatsHandle() +<<<<<<< HEAD originalVal4 := autoanalyze.AutoAnalyzeMinCnt originalVal5 := tk.MustQuery("select @@global.tidb_auto_analyze_ratio").Rows()[0][0].(string) autoanalyze.AutoAnalyzeMinCnt = 0 tk.MustExec("set global tidb_auto_analyze_ratio = 0.001") defer func() { autoanalyze.AutoAnalyzeMinCnt = originalVal4 +======= + originalVal4 := statistics.AutoAnalyzeMinCnt + originalVal5 := tk.MustQuery("select @@global.tidb_auto_analyze_ratio").Rows()[0][0].(string) + statistics.AutoAnalyzeMinCnt = 0 + tk.MustExec("set global tidb_auto_analyze_ratio = 0.001") + defer func() { + statistics.AutoAnalyzeMinCnt = originalVal4 +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_ratio = %v", originalVal5)) }() diff --git a/pkg/statistics/BUILD.bazel b/pkg/statistics/BUILD.bazel index 20ee8a66d1270..5232508310598 100644 --- a/pkg/statistics/BUILD.bazel +++ b/pkg/statistics/BUILD.bazel @@ -86,7 +86,10 @@ go_test( "//pkg/parser/mysql", "//pkg/sessionctx", "//pkg/sessionctx/stmtctx", +<<<<<<< HEAD "//pkg/statistics/handle/autoanalyze", +======= +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) "//pkg/testkit", "//pkg/testkit/testdata", "//pkg/testkit/testmain", diff --git a/pkg/statistics/handle/autoanalyze/BUILD.bazel b/pkg/statistics/handle/autoanalyze/BUILD.bazel index e961e88a518de..fab6adfaa0a32 100644 --- a/pkg/statistics/handle/autoanalyze/BUILD.bazel +++ b/pkg/statistics/handle/autoanalyze/BUILD.bazel @@ -40,6 +40,11 @@ go_test( "//pkg/sessionctx", "//pkg/sessionctx/variable", "//pkg/statistics", +<<<<<<< HEAD +======= + "//pkg/statistics/handle/util", + "//pkg/statistics/handle/util/test", +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) "//pkg/testkit", "@com_github_stretchr_testify//require", "@com_github_tikv_client_go_v2//oracle", diff --git a/pkg/statistics/handle/autoanalyze/autoanalyze.go b/pkg/statistics/handle/autoanalyze/autoanalyze.go index 29c0c1f9af1c7..5dd37ca18ff55 100644 --- a/pkg/statistics/handle/autoanalyze/autoanalyze.go +++ b/pkg/statistics/handle/autoanalyze/autoanalyze.go @@ -277,11 +277,37 @@ func RandomPickOneTableAndTryAutoAnalyze( // AutoAnalyzeMinCnt means if the count of table is less than this value, we needn't do auto analyze. var AutoAnalyzeMinCnt int64 = 1000 +<<<<<<< HEAD func autoAnalyzeTable(sctx sessionctx.Context, statsHandle statsutil.StatsHandle, tblInfo *model.TableInfo, statsTbl *statistics.Table, ratio float64, sql string, params ...interface{}) bool { if statsTbl.Pseudo || statsTbl.RealtimeCount < AutoAnalyzeMinCnt { +======= + for _, def := range defs { + partitionStats[def.ID] = statsHandle.GetPartitionStatsForAutoAnalyze(tblInfo, def.ID) + } + + return partitionStats +} + +// Determine whether the table and index require analysis. +func tryAutoAnalyzeTable( + sctx sessionctx.Context, + statsHandle statstypes.StatsHandle, + sysProcTracker sysproctrack.Tracker, + tblInfo *model.TableInfo, + statsTbl *statistics.Table, + ratio float64, + sql string, + params ...any, +) bool { + // 1. If the statistics are either not loaded or are classified as pseudo, there is no need for analyze + // Pseudo statistics can be created by the optimizer, so we need to double check it. + // 2. If the table is too small, we don't want to waste time to analyze it. + // Leave the opportunity to other bigger tables. + if statsTbl == nil || statsTbl.Pseudo || statsTbl.RealtimeCount < statistics.AutoAnalyzeMinCnt { +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) return false } if needAnalyze, reason := NeedAnalyzeTable(statsTbl, 20*statsHandle.Lease(), ratio); needAnalyze { @@ -374,8 +400,17 @@ func autoAnalyzePartitionTableInDynamicMode(sctx sessionctx.Context, analyzePartitionBatchSize := int(variable.AutoAnalyzePartitionBatchSize.Load()) partitionNames := make([]interface{}, 0, len(partitionDefs)) for _, def := range partitionDefs { +<<<<<<< HEAD partitionStatsTbl := statsHandle.GetPartitionStats(tblInfo, def.ID) if partitionStatsTbl.Pseudo || partitionStatsTbl.RealtimeCount < AutoAnalyzeMinCnt { +======= + partitionStats := partitionStats[def.ID] + // 1. If the statistics are either not loaded or are classified as pseudo, there is no need for analyze. + // Pseudo statistics can be created by the optimizer, so we need to double check it. + // 2. If the table is too small, we don't want to waste time to analyze it. + // Leave the opportunity to other bigger tables. + if partitionStats == nil || partitionStats.Pseudo || partitionStats.RealtimeCount < statistics.AutoAnalyzeMinCnt { +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) continue } if needAnalyze, reason := NeedAnalyzeTable(partitionStatsTbl, 20*statsHandle.Lease(), ratio); needAnalyze { diff --git a/pkg/statistics/handle/autoanalyze/autoanalyze_test.go b/pkg/statistics/handle/autoanalyze/autoanalyze_test.go index b090c416c86fb..1447dacd18255 100644 --- a/pkg/statistics/handle/autoanalyze/autoanalyze_test.go +++ b/pkg/statistics/handle/autoanalyze/autoanalyze_test.go @@ -25,11 +25,41 @@ import ( "github.com/pingcap/tidb/pkg/sessionctx/variable" "github.com/pingcap/tidb/pkg/statistics" "github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze" +<<<<<<< HEAD +======= + statsutil "github.com/pingcap/tidb/pkg/statistics/handle/util" + "github.com/pingcap/tidb/pkg/statistics/handle/util/test" +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) "github.com/pingcap/tidb/pkg/testkit" "github.com/stretchr/testify/require" "github.com/tikv/client-go/v2/oracle" ) +<<<<<<< HEAD +======= +func TestEnableAutoAnalyzePriorityQueue(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t (a int)") + tk.MustExec("insert into t values (1)") + // Enable auto analyze priority queue. + tk.MustExec("SET GLOBAL tidb_enable_auto_analyze_priority_queue=ON") + require.True(t, variable.EnableAutoAnalyzePriorityQueue.Load()) + h := dom.StatsHandle() + err := h.HandleDDLEvent(<-h.DDLEventCh()) + require.NoError(t, err) + require.NoError(t, h.DumpStatsDeltaToKV(true)) + is := dom.InfoSchema() + require.NoError(t, h.Update(context.Background(), is)) + statistics.AutoAnalyzeMinCnt = 0 + defer func() { + statistics.AutoAnalyzeMinCnt = 1000 + }() + require.True(t, dom.StatsHandle().HandleAutoAnalyze()) +} + +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) func TestAutoAnalyzeLockedTable(t *testing.T) { store, dom := testkit.CreateMockStoreAndDomain(t) tk := testkit.NewTestKit(t, store) @@ -43,10 +73,17 @@ func TestAutoAnalyzeLockedTable(t *testing.T) { // Lock the table. tk.MustExec("lock stats t") is := dom.InfoSchema() +<<<<<<< HEAD require.NoError(t, h.Update(is)) autoanalyze.AutoAnalyzeMinCnt = 0 defer func() { autoanalyze.AutoAnalyzeMinCnt = 1000 +======= + require.NoError(t, h.Update(context.Background(), is)) + statistics.AutoAnalyzeMinCnt = 0 + defer func() { + statistics.AutoAnalyzeMinCnt = 1000 +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) }() // Try to analyze the locked table, it should not analyze the table. require.False(t, dom.StatsHandle().HandleAutoAnalyze(dom.InfoSchema())) @@ -54,7 +91,50 @@ func TestAutoAnalyzeLockedTable(t *testing.T) { // Unlock the table. tk.MustExec("unlock stats t") // Try again, it should analyze the table. +<<<<<<< HEAD require.True(t, dom.StatsHandle().HandleAutoAnalyze(dom.InfoSchema())) +======= + require.True(t, dom.StatsHandle().HandleAutoAnalyze()) +} + +func TestAutoAnalyzeWithPredicateColumns(t *testing.T) { + // Create a table and add it to stats cache. + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t (a int, b int)") + tk.MustExec("insert into t values (1, 1)") + tk.MustQuery("select * from t where a > 0").Check(testkit.Rows("1 1")) + h := dom.StatsHandle() + err := h.HandleDDLEvent(<-h.DDLEventCh()) + require.NoError(t, err) + require.NoError(t, h.DumpColStatsUsageToKV()) + require.NoError(t, h.DumpStatsDeltaToKV(true)) + is := dom.InfoSchema() + require.NoError(t, h.Update(context.Background(), is)) + statistics.AutoAnalyzeMinCnt = 0 + defer func() { + statistics.AutoAnalyzeMinCnt = 1000 + }() + + // Check column_stats_usage. + rows := tk.MustQuery( + "show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null", + ).Rows() + require.Equal(t, 1, len(rows)) + require.Equal(t, "a", rows[0][3]) + + // Set tidb_analyze_column_options to PREDICATE. + tk.MustExec("set global tidb_analyze_column_options='PREDICATE'") + + // Trigger auto analyze. + require.True(t, dom.StatsHandle().HandleAutoAnalyze()) + + // Check analyze jobs. + tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( + testkit.Rows("t auto analyze table column a with 256 buckets, 100 topn, 1 samplerate"), + ) +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) } func TestDisableAutoAnalyze(t *testing.T) { @@ -70,11 +150,18 @@ func TestDisableAutoAnalyze(t *testing.T) { is := dom.InfoSchema() require.NoError(t, h.Update(is)) +<<<<<<< HEAD // Set auto analyze ratio to 0. tk.MustExec("set @@global.tidb_auto_analyze_ratio = 0") autoanalyze.AutoAnalyzeMinCnt = 0 defer func() { autoanalyze.AutoAnalyzeMinCnt = 1000 +======= + tk.MustExec("set @@global.tidb_enable_auto_analyze = 0") + statistics.AutoAnalyzeMinCnt = 0 + defer func() { + statistics.AutoAnalyzeMinCnt = 1000 +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) }() // Even auto analyze ratio is set to 0, we still need to analyze the unanalyzed tables. require.True(t, dom.StatsHandle().HandleAutoAnalyze(dom.InfoSchema())) @@ -97,9 +184,15 @@ func TestAutoAnalyzeOnChangeAnalyzeVer(t *testing.T) { tk.MustExec("insert into t values(1)") tk.MustExec("set @@global.tidb_analyze_version = 1") do := dom +<<<<<<< HEAD autoanalyze.AutoAnalyzeMinCnt = 0 defer func() { autoanalyze.AutoAnalyzeMinCnt = 1000 +======= + statistics.AutoAnalyzeMinCnt = 0 + defer func() { + statistics.AutoAnalyzeMinCnt = 1000 +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) }() h := do.StatsHandle() err := h.HandleDDLEvent(<-h.DDLEventCh()) @@ -286,10 +379,17 @@ func TestAutoAnalyzeSkipColumnTypes(t *testing.T) { require.NoError(t, h.Update(dom.InfoSchema())) tk.MustExec("set @@global.tidb_analyze_skip_column_types = 'json,blob,mediumblob,text,mediumtext'") +<<<<<<< HEAD originalVal := autoanalyze.AutoAnalyzeMinCnt autoanalyze.AutoAnalyzeMinCnt = 0 defer func() { autoanalyze.AutoAnalyzeMinCnt = originalVal +======= + originalVal := statistics.AutoAnalyzeMinCnt + statistics.AutoAnalyzeMinCnt = 0 + defer func() { + statistics.AutoAnalyzeMinCnt = originalVal +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) }() require.True(t, h.HandleAutoAnalyze(dom.InfoSchema())) tk.MustQuery("select job_info from mysql.analyze_jobs where job_info like '%auto analyze table%'").Check(testkit.Rows("auto analyze table columns a, b, d with 256 buckets, 500 topn, 1 samplerate")) @@ -318,7 +418,11 @@ func TestAutoAnalyzeOnEmptyTable(t *testing.T) { // to pass the stats.Pseudo check in autoAnalyzeTable tk.MustExec("analyze table t") // to pass the AutoAnalyzeMinCnt check in autoAnalyzeTable +<<<<<<< HEAD tk.MustExec("insert into t values (1)" + strings.Repeat(", (1)", int(autoanalyze.AutoAnalyzeMinCnt))) +======= + tk.MustExec("insert into t values (1)" + strings.Repeat(", (1)", int(statistics.AutoAnalyzeMinCnt))) +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) require.NoError(t, dom.StatsHandle().DumpStatsDeltaToKV(true)) require.NoError(t, dom.StatsHandle().Update(dom.InfoSchema())) @@ -353,7 +457,11 @@ func TestAutoAnalyzeOutOfSpecifiedTime(t *testing.T) { // to pass the stats.Pseudo check in autoAnalyzeTable tk.MustExec("analyze table t") // to pass the AutoAnalyzeMinCnt check in autoAnalyzeTable +<<<<<<< HEAD tk.MustExec("insert into t values (1)" + strings.Repeat(", (1)", int(autoanalyze.AutoAnalyzeMinCnt))) +======= + tk.MustExec("insert into t values (1)" + strings.Repeat(", (1)", int(statistics.AutoAnalyzeMinCnt))) +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) require.NoError(t, dom.StatsHandle().DumpStatsDeltaToKV(true)) require.NoError(t, dom.StatsHandle().Update(dom.InfoSchema())) diff --git a/pkg/statistics/handle/autoanalyze/exec/exec.go b/pkg/statistics/handle/autoanalyze/exec/exec.go new file mode 100644 index 0000000000000..8fa9999d525b7 --- /dev/null +++ b/pkg/statistics/handle/autoanalyze/exec/exec.go @@ -0,0 +1,132 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package exec + +import ( + "math" + "strconv" + "time" + + "github.com/pingcap/errors" + "github.com/pingcap/tidb/pkg/metrics" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/sessionctx" + "github.com/pingcap/tidb/pkg/sessionctx/sysproctrack" + "github.com/pingcap/tidb/pkg/sessionctx/variable" + "github.com/pingcap/tidb/pkg/statistics" + statslogutil "github.com/pingcap/tidb/pkg/statistics/handle/logutil" + statstypes "github.com/pingcap/tidb/pkg/statistics/handle/types" + statsutil "github.com/pingcap/tidb/pkg/statistics/handle/util" + "github.com/pingcap/tidb/pkg/util/chunk" + "github.com/pingcap/tidb/pkg/util/sqlescape" + "github.com/pingcap/tidb/pkg/util/sqlexec" + "go.uber.org/zap" +) + +var execOptionForAnalyze = map[int]sqlexec.OptionFuncAlias{ + statistics.Version0: sqlexec.ExecOptionAnalyzeVer1, + statistics.Version1: sqlexec.ExecOptionAnalyzeVer1, + statistics.Version2: sqlexec.ExecOptionAnalyzeVer2, +} + +// AutoAnalyze executes the auto analyze task. +func AutoAnalyze( + sctx sessionctx.Context, + statsHandle statstypes.StatsHandle, + sysProcTracker sysproctrack.Tracker, + statsVer int, + sql string, + params ...any, +) { + startTime := time.Now() + _, _, err := execAnalyzeStmt(sctx, statsHandle, sysProcTracker, statsVer, sql, params...) + dur := time.Since(startTime) + metrics.AutoAnalyzeHistogram.Observe(dur.Seconds()) + if err != nil { + escaped, err1 := sqlescape.EscapeSQL(sql, params...) + if err1 != nil { + escaped = "" + } + statslogutil.StatsLogger().Error( + "auto analyze failed", + zap.String("sql", escaped), + zap.Duration("cost_time", dur), + zap.Error(err), + ) + metrics.AutoAnalyzeCounter.WithLabelValues("failed").Inc() + } else { + metrics.AutoAnalyzeCounter.WithLabelValues("succ").Inc() + } +} + +func execAnalyzeStmt( + sctx sessionctx.Context, + statsHandle statstypes.StatsHandle, + sysProcTracker sysproctrack.Tracker, + statsVer int, + sql string, + params ...any, +) ([]chunk.Row, []*ast.ResultField, error) { + pruneMode := sctx.GetSessionVars().PartitionPruneMode.Load() + analyzeSnapshot := sctx.GetSessionVars().EnableAnalyzeSnapshot + optFuncs := []sqlexec.OptionFuncAlias{ + execOptionForAnalyze[statsVer], + sqlexec.GetAnalyzeSnapshotOption(analyzeSnapshot), + sqlexec.GetPartitionPruneModeOption(pruneMode), + sqlexec.ExecOptionUseCurSession, + sqlexec.ExecOptionWithSysProcTrack(statsHandle.AutoAnalyzeProcID(), sysProcTracker.Track, sysProcTracker.UnTrack), + } + return statsutil.ExecWithOpts(sctx, optFuncs, sql, params...) +} + +// GetAutoAnalyzeParameters gets the auto analyze parameters from mysql.global_variables. +func GetAutoAnalyzeParameters(sctx sessionctx.Context) map[string]string { + sql := "select variable_name, variable_value from mysql.global_variables where variable_name in (%?, %?, %?)" + rows, _, err := statsutil.ExecWithOpts(sctx, nil, sql, variable.TiDBAutoAnalyzeRatio, variable.TiDBAutoAnalyzeStartTime, variable.TiDBAutoAnalyzeEndTime) + if err != nil { + return map[string]string{} + } + parameters := make(map[string]string, len(rows)) + for _, row := range rows { + parameters[row.GetString(0)] = row.GetString(1) + } + return parameters +} + +// ParseAutoAnalyzeRatio parses the auto analyze ratio from the string. +func ParseAutoAnalyzeRatio(ratio string) float64 { + autoAnalyzeRatio, err := strconv.ParseFloat(ratio, 64) + if err != nil { + return variable.DefAutoAnalyzeRatio + } + return math.Max(autoAnalyzeRatio, 0) +} + +// ParseAutoAnalysisWindow parses the time window for auto analysis. +// It parses the times in UTC location. +func ParseAutoAnalysisWindow(start, end string) (time.Time, time.Time, error) { + if start == "" { + start = variable.DefAutoAnalyzeStartTime + } + if end == "" { + end = variable.DefAutoAnalyzeEndTime + } + s, err := time.ParseInLocation(variable.FullDayTimeFormat, start, time.UTC) + if err != nil { + return s, s, errors.Trace(err) + } + e, err := time.ParseInLocation(variable.FullDayTimeFormat, end, time.UTC) + return s, e, err +} diff --git a/pkg/statistics/handle/autoanalyze/refresher/BUILD.bazel b/pkg/statistics/handle/autoanalyze/refresher/BUILD.bazel new file mode 100644 index 0000000000000..a26587cd67d9b --- /dev/null +++ b/pkg/statistics/handle/autoanalyze/refresher/BUILD.bazel @@ -0,0 +1,43 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "refresher", + srcs = ["refresher.go"], + importpath = "github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze/refresher", + visibility = ["//visibility:public"], + deps = [ + "//pkg/infoschema", + "//pkg/parser/model", + "//pkg/sessionctx", + "//pkg/sessionctx/sysproctrack", + "//pkg/sessionctx/variable", + "//pkg/statistics", + "//pkg/statistics/handle/autoanalyze/exec", + "//pkg/statistics/handle/autoanalyze/priorityqueue", + "//pkg/statistics/handle/lockstats", + "//pkg/statistics/handle/logutil", + "//pkg/statistics/handle/types", + "//pkg/statistics/handle/util", + "//pkg/util", + "//pkg/util/timeutil", + "@com_github_tikv_client_go_v2//oracle", + "@org_uber_go_zap//:zap", + ], +) + +go_test( + name = "refresher_test", + timeout = "short", + srcs = ["refresher_test.go"], + flaky = True, + shard_count = 13, + deps = [ + ":refresher", + "//pkg/parser/model", + "//pkg/statistics", + "//pkg/statistics/handle/autoanalyze/priorityqueue", + "//pkg/testkit", + "@com_github_stretchr_testify//require", + "@com_github_tikv_client_go_v2//oracle", + ], +) diff --git a/pkg/statistics/handle/autoanalyze/refresher/refresher.go b/pkg/statistics/handle/autoanalyze/refresher/refresher.go new file mode 100644 index 0000000000000..aca694e3fa322 --- /dev/null +++ b/pkg/statistics/handle/autoanalyze/refresher/refresher.go @@ -0,0 +1,640 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package refresher + +import ( + "context" + "time" + + "github.com/pingcap/tidb/pkg/infoschema" + "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/sessionctx" + "github.com/pingcap/tidb/pkg/sessionctx/sysproctrack" + "github.com/pingcap/tidb/pkg/sessionctx/variable" + "github.com/pingcap/tidb/pkg/statistics" + "github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze/exec" + "github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze/priorityqueue" + "github.com/pingcap/tidb/pkg/statistics/handle/lockstats" + statslogutil "github.com/pingcap/tidb/pkg/statistics/handle/logutil" + statstypes "github.com/pingcap/tidb/pkg/statistics/handle/types" + statsutil "github.com/pingcap/tidb/pkg/statistics/handle/util" + "github.com/pingcap/tidb/pkg/util" + "github.com/pingcap/tidb/pkg/util/timeutil" + "github.com/tikv/client-go/v2/oracle" + "go.uber.org/zap" +) + +const ( + // unanalyzedTableDefaultChangePercentage is the default change percentage of unanalyzed table. + unanalyzedTableDefaultChangePercentage = 1 + // unanalyzedTableDefaultLastUpdateDuration is the default last update duration of unanalyzed table. + unanalyzedTableDefaultLastUpdateDuration = -30 * time.Minute +) + +// Refresher provides methods to refresh stats info. +// NOTE: Refresher is not thread-safe. +type Refresher struct { + statsHandle statstypes.StatsHandle + sysProcTracker sysproctrack.Tracker + // This will be refreshed every time we rebuild the priority queue. + autoAnalysisTimeWindow + + // Jobs is the priority queue of analysis jobs. + // Exported for testing purposes. + Jobs *priorityqueue.AnalysisPriorityQueue +} + +// NewRefresher creates a new Refresher and starts the goroutine. +func NewRefresher( + statsHandle statstypes.StatsHandle, + sysProcTracker sysproctrack.Tracker, +) *Refresher { + r := &Refresher{ + statsHandle: statsHandle, + sysProcTracker: sysProcTracker, + Jobs: priorityqueue.NewAnalysisPriorityQueue(), + } + + return r +} + +// PickOneTableAndAnalyzeByPriority picks one table and analyzes it by priority. +func (r *Refresher) PickOneTableAndAnalyzeByPriority() bool { + if !r.autoAnalysisTimeWindow.isWithinTimeWindow(time.Now()) { + return false + } + + se, err := r.statsHandle.SPool().Get() + if err != nil { + statslogutil.StatsLogger().Error( + "Get session context failed", + zap.Error(err), + ) + return false + } + defer r.statsHandle.SPool().Put(se) + sctx := se.(sessionctx.Context) + // Pick the table with the highest weight. + for r.Jobs.Len() > 0 { + job := r.Jobs.Pop() + if valid, failReason := job.IsValidToAnalyze( + sctx, + ); !valid { + statslogutil.SingletonStatsSamplerLogger().Info( + "Table is not ready to analyze", + zap.String("failReason", failReason), + zap.Stringer("job", job), + ) + continue + } + statslogutil.StatsLogger().Info( + "Auto analyze triggered", + zap.Stringer("job", job), + ) + err = job.Analyze( + r.statsHandle, + r.sysProcTracker, + ) + if err != nil { + statslogutil.StatsLogger().Error( + "Execute auto analyze job failed", + zap.Stringer("job", job), + zap.Error(err), + ) + } + // Only analyze one table each time. + return true + } + statslogutil.SingletonStatsSamplerLogger().Info( + "No table to analyze", + ) + return false +} + +// RebuildTableAnalysisJobQueue rebuilds the priority queue of analysis jobs. +func (r *Refresher) RebuildTableAnalysisJobQueue() error { + // Reset the priority queue. + r.Jobs = priorityqueue.NewAnalysisPriorityQueue() + + if err := statsutil.CallWithSCtx( + r.statsHandle.SPool(), + func(sctx sessionctx.Context) error { + parameters := exec.GetAutoAnalyzeParameters(sctx) + autoAnalyzeRatio := exec.ParseAutoAnalyzeRatio(parameters[variable.TiDBAutoAnalyzeRatio]) + // Get the available time period for auto analyze and check if the current time is in the period. + start, end, err := exec.ParseAutoAnalysisWindow( + parameters[variable.TiDBAutoAnalyzeStartTime], + parameters[variable.TiDBAutoAnalyzeEndTime], + ) + if err != nil { + statslogutil.StatsLogger().Error( + "parse auto analyze period failed", + zap.Error(err), + ) + return err + } + // We will check it again when we try to execute the job. + // So store the time window for later use. + r.autoAnalysisTimeWindow = autoAnalysisTimeWindow{ + start: start, + end: end, + } + if !r.autoAnalysisTimeWindow.isWithinTimeWindow(time.Now()) { + return nil + } + calculator := priorityqueue.NewPriorityCalculator() + pruneMode := variable.PartitionPruneMode(sctx.GetSessionVars().PartitionPruneMode.Load()) + is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema) + // Query locked tables once to minimize overhead. + // Outdated lock info is acceptable as we verify table lock status pre-analysis. + lockedTables, err := lockstats.QueryLockedTables(sctx) + if err != nil { + return err + } + // Get current timestamp from the session context. + currentTs, err := getStartTs(sctx) + if err != nil { + return err + } + + dbs := is.AllSchemaNames() + for _, db := range dbs { + // Sometimes the tables are too many. Auto-analyze will take too much time on it. + // so we need to check the available time. + if !r.autoAnalysisTimeWindow.isWithinTimeWindow(time.Now()) { + return nil + } + // Ignore the memory and system database. + if util.IsMemOrSysDB(db.L) { + continue + } + + tbls, err := is.SchemaTableInfos(context.Background(), db) + if err != nil { + return err + } + // We need to check every partition of every table to see if it needs to be analyzed. + for _, tblInfo := range tbls { + // If table locked, skip analyze all partitions of the table. + if _, ok := lockedTables[tblInfo.ID]; ok { + continue + } + + if tblInfo.IsView() { + continue + } + pi := tblInfo.GetPartitionInfo() + pushJobFunc := func(job priorityqueue.AnalysisJob) { + if job == nil { + return + } + // Calculate the weight of the job. + weight := calculator.CalculateWeight(job) + // We apply a penalty to larger tables, which can potentially result in a negative weight. + // To prevent this, we filter out any negative weights. Under normal circumstances, table sizes should not be negative. + if weight <= 0 { + statslogutil.SingletonStatsSamplerLogger().Warn( + "Table gets a negative weight", + zap.Float64("weight", weight), + zap.Stringer("job", job), + ) + } + job.SetWeight(weight) + // Push the job onto the queue. + r.Jobs.Push(job) + } + // No partitions, analyze the whole table. + if pi == nil { + job := CreateTableAnalysisJob( + sctx, + db.O, + tblInfo, + r.statsHandle.GetTableStatsForAutoAnalyze(tblInfo), + autoAnalyzeRatio, + currentTs, + ) + pushJobFunc(job) + // Skip the rest of the loop. + continue + } + + // Only analyze the partition that has not been locked. + partitionDefs := make([]model.PartitionDefinition, 0, len(pi.Definitions)) + for _, def := range pi.Definitions { + if _, ok := lockedTables[def.ID]; !ok { + partitionDefs = append(partitionDefs, def) + } + } + partitionStats := getPartitionStats(r.statsHandle, tblInfo, partitionDefs) + // If the prune mode is static, we need to analyze every partition as a separate table. + if pruneMode == variable.Static { + for pIDAndName, stats := range partitionStats { + job := CreateStaticPartitionAnalysisJob( + sctx, + db.O, + tblInfo, + pIDAndName.ID, + pIDAndName.Name, + stats, + autoAnalyzeRatio, + currentTs, + ) + pushJobFunc(job) + } + } else { + job := createTableAnalysisJobForPartitions( + sctx, + db.O, + tblInfo, + r.statsHandle.GetPartitionStatsForAutoAnalyze(tblInfo, tblInfo.ID), + partitionStats, + autoAnalyzeRatio, + currentTs, + ) + pushJobFunc(job) + } + } + } + + return nil + }, + statsutil.FlagWrapTxn, + ); err != nil { + return err + } + + return nil +} + +// CreateTableAnalysisJob creates a TableAnalysisJob for the physical table. +func CreateTableAnalysisJob( + sctx sessionctx.Context, + tableSchema string, + tblInfo *model.TableInfo, + tblStats *statistics.Table, + autoAnalyzeRatio float64, + currentTs uint64, +) priorityqueue.AnalysisJob { + if !tblStats.IsEligibleForAnalysis() { + return nil + } + + tableStatsVer := sctx.GetSessionVars().AnalyzeVersion + statistics.CheckAnalyzeVerOnTable(tblStats, &tableStatsVer) + + changePercentage := CalculateChangePercentage(tblStats, autoAnalyzeRatio) + tableSize := calculateTableSize(tblInfo, tblStats) + lastAnalysisDuration := GetTableLastAnalyzeDuration(tblStats, currentTs) + indexes := CheckIndexesNeedAnalyze(tblInfo, tblStats) + + // No need to analyze. + // We perform a separate check because users may set the auto analyze ratio to 0, + // yet still wish to analyze newly added indexes and tables that have not been analyzed. + if changePercentage == 0 && len(indexes) == 0 { + return nil + } + + job := priorityqueue.NewNonPartitionedTableAnalysisJob( + tableSchema, + tblInfo.Name.O, + tblInfo.ID, + indexes, + tableStatsVer, + changePercentage, + tableSize, + lastAnalysisDuration, + ) + + return job +} + +// CreateStaticPartitionAnalysisJob creates a TableAnalysisJob for the static partition. +func CreateStaticPartitionAnalysisJob( + sctx sessionctx.Context, + tableSchema string, + globalTblInfo *model.TableInfo, + partitionID int64, + partitionName string, + partitionStats *statistics.Table, + autoAnalyzeRatio float64, + currentTs uint64, +) priorityqueue.AnalysisJob { + if !partitionStats.IsEligibleForAnalysis() { + return nil + } + + tableStatsVer := sctx.GetSessionVars().AnalyzeVersion + statistics.CheckAnalyzeVerOnTable(partitionStats, &tableStatsVer) + + changePercentage := CalculateChangePercentage(partitionStats, autoAnalyzeRatio) + tableSize := calculateTableSize(globalTblInfo, partitionStats) + lastAnalysisDuration := GetTableLastAnalyzeDuration(partitionStats, currentTs) + indexes := CheckIndexesNeedAnalyze(globalTblInfo, partitionStats) + + // No need to analyze. + // We perform a separate check because users may set the auto analyze ratio to 0, + // yet still wish to analyze newly added indexes and tables that have not been analyzed. + if changePercentage == 0 && len(indexes) == 0 { + return nil + } + + job := priorityqueue.NewStaticPartitionTableAnalysisJob( + tableSchema, + globalTblInfo.Name.O, + globalTblInfo.ID, + partitionName, + partitionID, + indexes, + tableStatsVer, + changePercentage, + tableSize, + lastAnalysisDuration, + ) + + return job +} + +// CalculateChangePercentage calculates the change percentage of the table +// based on the change count and the analysis count. +func CalculateChangePercentage( + tblStats *statistics.Table, + autoAnalyzeRatio float64, +) float64 { + if !tblStats.IsAnalyzed() { + return unanalyzedTableDefaultChangePercentage + } + + // Auto analyze based on the change percentage is disabled. + // However, this check should not affect the analysis of indexes, + // as index analysis is still needed for query performance. + if autoAnalyzeRatio == 0 { + return 0 + } + + tblCnt := float64(tblStats.RealtimeCount) + if histCnt := tblStats.GetAnalyzeRowCount(); histCnt > 0 { + tblCnt = histCnt + } + res := float64(tblStats.ModifyCount) / tblCnt + if res > autoAnalyzeRatio { + return res + } + + return 0 +} + +func calculateTableSize( + tblInfo *model.TableInfo, + tblStats *statistics.Table, +) float64 { + tblCnt := float64(tblStats.RealtimeCount) + // TODO: Ignore unanalyzable columns. + colCnt := float64(len(tblInfo.Columns)) + + return tblCnt * colCnt +} + +// GetTableLastAnalyzeDuration gets the duration since the last analysis of the table. +func GetTableLastAnalyzeDuration( + tblStats *statistics.Table, + currentTs uint64, +) time.Duration { + lastTime := findLastAnalyzeTime(tblStats, currentTs) + currentTime := oracle.GetTimeFromTS(currentTs) + + // Calculate the duration since last analyze. + return currentTime.Sub(lastTime) +} + +// findLastAnalyzeTime finds the last analyze time of the table. +// It uses `LastUpdateVersion` to find the last analyze time. +// The `LastUpdateVersion` is the version of the transaction that updates the statistics. +// It always not null(default 0), so we can use it to find the last analyze time. +func findLastAnalyzeTime( + tblStats *statistics.Table, + currentTs uint64, +) time.Time { + // Table is not analyzed, compose a fake version. + if !tblStats.IsAnalyzed() { + phy := oracle.GetTimeFromTS(currentTs) + return phy.Add(unanalyzedTableDefaultLastUpdateDuration) + } + return oracle.GetTimeFromTS(tblStats.LastAnalyzeVersion) +} + +// CheckIndexesNeedAnalyze checks if the indexes of the table need to be analyzed. +func CheckIndexesNeedAnalyze( + tblInfo *model.TableInfo, + tblStats *statistics.Table, +) []string { + // If table is not analyzed, we need to analyze whole table. + // So we don't need to check indexes. + if !tblStats.IsAnalyzed() { + return nil + } + + indexes := make([]string, 0, len(tblInfo.Indices)) + // Check if missing index stats. + for _, idx := range tblInfo.Indices { + if idxStats := tblStats.GetIdx(idx.ID); idxStats == nil && !tblStats.ColAndIdxExistenceMap.HasAnalyzed(idx.ID, true) && idx.State == model.StatePublic { + indexes = append(indexes, idx.Name.O) + } + } + + return indexes +} + +func createTableAnalysisJobForPartitions( + sctx sessionctx.Context, + tableSchema string, + tblInfo *model.TableInfo, + tblStats *statistics.Table, + partitionStats map[PartitionIDAndName]*statistics.Table, + autoAnalyzeRatio float64, + currentTs uint64, +) priorityqueue.AnalysisJob { + if !tblStats.IsEligibleForAnalysis() { + return nil + } + + // TODO: figure out how to check the table stats version correctly for partitioned tables. + tableStatsVer := sctx.GetSessionVars().AnalyzeVersion + statistics.CheckAnalyzeVerOnTable(tblStats, &tableStatsVer) + + averageChangePercentage, avgSize, minLastAnalyzeDuration, partitionNames := CalculateIndicatorsForPartitions( + tblInfo, + partitionStats, + autoAnalyzeRatio, + currentTs, + ) + partitionIndexes := CheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable( + tblInfo, + partitionStats, + ) + // No need to analyze. + // We perform a separate check because users may set the auto analyze ratio to 0, + // yet still wish to analyze newly added indexes and tables that have not been analyzed. + if len(partitionNames) == 0 && len(partitionIndexes) == 0 { + return nil + } + + job := priorityqueue.NewDynamicPartitionedTableAnalysisJob( + tableSchema, + tblInfo.Name.O, + tblInfo.ID, + partitionNames, + partitionIndexes, + tableStatsVer, + averageChangePercentage, + avgSize, + minLastAnalyzeDuration, + ) + + return job +} + +// CalculateIndicatorsForPartitions calculates the average change percentage, +// average size and average last analyze duration for the partitions that meet the threshold. +// Change percentage is the ratio of the number of modified rows to the total number of rows. +// Size is the product of the number of rows and the number of columns. +// Last analyze duration is the duration since the last analyze. +func CalculateIndicatorsForPartitions( + tblInfo *model.TableInfo, + partitionStats map[PartitionIDAndName]*statistics.Table, + autoAnalyzeRatio float64, + currentTs uint64, +) ( + avgChange float64, + avgSize float64, + avgLastAnalyzeDuration time.Duration, + partitionNames []string, +) { + totalChangePercent := 0.0 + totalSize := 0.0 + count := 0.0 + partitionNames = make([]string, 0, len(partitionStats)) + cols := float64(len(tblInfo.Columns)) + totalLastAnalyzeDuration := time.Duration(0) + + for pIDAndName, tblStats := range partitionStats { + changePercent := CalculateChangePercentage(tblStats, autoAnalyzeRatio) + // Skip partition analysis if it doesn't meet the threshold, stats are not yet loaded, + // or the auto analyze ratio is set to 0 by the user. + if changePercent == 0 { + continue + } + + totalChangePercent += changePercent + // size = count * cols + totalSize += float64(tblStats.RealtimeCount) * cols + lastAnalyzeDuration := GetTableLastAnalyzeDuration(tblStats, currentTs) + totalLastAnalyzeDuration += lastAnalyzeDuration + partitionNames = append(partitionNames, pIDAndName.Name) + count++ + } + if len(partitionNames) == 0 { + return 0, 0, 0, partitionNames + } + + avgChange = totalChangePercent / count + avgSize = totalSize / count + avgLastAnalyzeDuration = totalLastAnalyzeDuration / time.Duration(count) + + return avgChange, avgSize, avgLastAnalyzeDuration, partitionNames +} + +// CheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable checks if the indexes of the partitioned table need to be analyzed. +// It returns a map from index name to the names of the partitions that need to be analyzed. +// NOTE: This is only for newly added indexes. +func CheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable( + tblInfo *model.TableInfo, + partitionStats map[PartitionIDAndName]*statistics.Table, +) map[string][]string { + partitionIndexes := make(map[string][]string, len(tblInfo.Indices)) + + for _, idx := range tblInfo.Indices { + // No need to analyze the index if it's not public. + if idx.State != model.StatePublic { + continue + } + + // Find all the partitions that need to analyze this index. + names := make([]string, 0, len(partitionStats)) + for pIDAndName, tblStats := range partitionStats { + if idxStats := tblStats.GetIdx(idx.ID); idxStats == nil && !tblStats.ColAndIdxExistenceMap.HasAnalyzed(idx.ID, true) { + names = append(names, pIDAndName.Name) + } + } + + if len(names) > 0 { + partitionIndexes[idx.Name.O] = names + } + } + + return partitionIndexes +} + +func getStartTs(sctx sessionctx.Context) (uint64, error) { + txn, err := sctx.Txn(true) + if err != nil { + return 0, err + } + return txn.StartTS(), nil +} + +// PartitionIDAndName is a struct that contains the ID and name of a partition. +// Exported for testing purposes. Do not use it in other packages. +type PartitionIDAndName struct { + Name string + ID int64 +} + +func getPartitionStats( + statsHandle statstypes.StatsHandle, + tblInfo *model.TableInfo, + defs []model.PartitionDefinition, +) map[PartitionIDAndName]*statistics.Table { + partitionStats := make(map[PartitionIDAndName]*statistics.Table, len(defs)) + + for _, def := range defs { + stats := statsHandle.GetPartitionStatsForAutoAnalyze(tblInfo, def.ID) + // Ignore the partition if it's not ready to analyze. + if !stats.IsEligibleForAnalysis() { + continue + } + d := PartitionIDAndName{ + ID: def.ID, + Name: def.Name.O, + } + partitionStats[d] = stats + } + + return partitionStats +} + +// autoAnalysisTimeWindow is a struct that contains the start and end time of the auto analyze time window. +type autoAnalysisTimeWindow struct { + start time.Time + end time.Time +} + +// isWithinTimeWindow checks if the current time is within the time window. +// If the auto analyze time window is not set or the current time is not in the window, return false. +func (a autoAnalysisTimeWindow) isWithinTimeWindow(currentTime time.Time) bool { + if a.start == (time.Time{}) || a.end == (time.Time{}) { + return false + } + return timeutil.WithinDayTimePeriod(a.start, a.end, currentTime) +} diff --git a/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go b/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go new file mode 100644 index 0000000000000..121de14df00c0 --- /dev/null +++ b/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go @@ -0,0 +1,848 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package refresher_test + +import ( + "context" + "math" + "sort" + "testing" + "time" + + "github.com/pingcap/tidb/pkg/parser/model" + "github.com/pingcap/tidb/pkg/statistics" + "github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze/priorityqueue" + "github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze/refresher" + "github.com/pingcap/tidb/pkg/testkit" + "github.com/stretchr/testify/require" + "github.com/tikv/client-go/v2/oracle" +) + +func TestSkipAnalyzeTableWhenAutoAnalyzeRatioIsZero(t *testing.T) { + statistics.AutoAnalyzeMinCnt = 0 + defer func() { + statistics.AutoAnalyzeMinCnt = 1000 + }() + + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t1 (a int, b int, index idx(a)) " + + "partition by range (a) " + + "(partition p0 values less than (2), " + + "partition p1 values less than (4), " + + "partition p2 values less than (16))", + ) + + tk.MustExec("create table t2 (a int, b int, index idx(a)) " + + "partition by range (a) " + + "(partition p0 values less than (2), " + + "partition p1 values less than (4), " + + "partition p2 values less than (16))", + ) + tk.MustExec("insert into t1 values (1, 1), (2, 2), (3, 3)") + tk.MustExec("insert into t2 values (1, 1), (2, 2), (3, 3)") + // HACK: Set the auto analyze ratio to 0. + // We don't allow users to set the ratio to 0 anymore, but we still need to test this case. + // Because we need to compilable with the old configuration. + tk.MustExec("update mysql.global_variables set variable_value = '0' where variable_name = 'tidb_auto_analyze_ratio'") + handle := dom.StatsHandle() + require.NoError(t, handle.DumpStatsDeltaToKV(true)) + require.NoError(t, handle.Update(context.Background(), dom.InfoSchema())) + // Analyze those tables first. + tk.MustExec("analyze table t1") + tk.MustExec("analyze table t2") + // Insert more data into t1. + tk.MustExec("insert into t1 values (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9)") + require.NoError(t, handle.DumpStatsDeltaToKV(true)) + require.NoError(t, handle.Update(context.Background(), dom.InfoSchema())) + sysProcTracker := dom.SysProcTracker() + r := refresher.NewRefresher(handle, sysProcTracker) + r.RebuildTableAnalysisJobQueue() + // No jobs are added. + require.Equal(t, 0, r.Jobs.Len()) + require.False(t, r.PickOneTableAndAnalyzeByPriority()) + // Enable the auto analyze. + tk.MustExec("set global tidb_auto_analyze_ratio = 0.2") + r.RebuildTableAnalysisJobQueue() + // Jobs are added. + require.Equal(t, 1, r.Jobs.Len()) + require.True(t, r.PickOneTableAndAnalyzeByPriority()) +} + +func TestIgnoreNilOrPseudoStatsOfPartitionedTable(t *testing.T) { + statistics.AutoAnalyzeMinCnt = 0 + defer func() { + statistics.AutoAnalyzeMinCnt = 1000 + }() + + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t1 (a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (2), partition p1 values less than (14))") + tk.MustExec("create table t2 (a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (2), partition p1 values less than (14))") + tk.MustExec("insert into t1 values (1, 1), (2, 2), (3, 3)") + tk.MustExec("insert into t2 values (1, 1), (2, 2), (3, 3)") + handle := dom.StatsHandle() + sysProcTracker := dom.SysProcTracker() + r := refresher.NewRefresher(handle, sysProcTracker) + r.RebuildTableAnalysisJobQueue() + require.Equal(t, 0, r.Jobs.Len(), "No jobs are added because table stats are nil") +} + +func TestIgnoreNilOrPseudoStatsOfNonPartitionedTable(t *testing.T) { + statistics.AutoAnalyzeMinCnt = 0 + defer func() { + statistics.AutoAnalyzeMinCnt = 1000 + }() + + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t1 (a int, b int, index idx(a))") + tk.MustExec("create table t2 (a int, b int, index idx(a))") + tk.MustExec("insert into t1 values (1, 1), (2, 2), (3, 3)") + tk.MustExec("insert into t2 values (1, 1), (2, 2), (3, 3)") + handle := dom.StatsHandle() + sysProcTracker := dom.SysProcTracker() + r := refresher.NewRefresher(handle, sysProcTracker) + r.RebuildTableAnalysisJobQueue() + require.Equal(t, 0, r.Jobs.Len(), "No jobs are added because table stats are nil") +} + +func TestIgnoreTinyTable(t *testing.T) { + statistics.AutoAnalyzeMinCnt = 10 + defer func() { + statistics.AutoAnalyzeMinCnt = 1000 + }() + + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t1 (a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (2), partition p1 values less than (14))") + tk.MustExec("create table t2 (a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (2), partition p1 values less than (14))") + tk.MustExec("insert into t1 values (1, 1), (2, 2), (3, 3)") + tk.MustExec("insert into t2 values (1, 1), (2, 2), (3, 3)") + handle := dom.StatsHandle() + require.NoError(t, handle.DumpStatsDeltaToKV(true)) + require.NoError(t, handle.Update(context.Background(), dom.InfoSchema())) + // Analyze those tables first. + tk.MustExec("analyze table t1") + tk.MustExec("analyze table t2") + require.NoError(t, handle.DumpStatsDeltaToKV(true)) + require.NoError(t, handle.Update(context.Background(), dom.InfoSchema())) + // Make sure table stats are not pseudo. + tbl1, err := dom.InfoSchema().TableByName(context.Background(), model.NewCIStr("test"), model.NewCIStr("t1")) + require.NoError(t, err) + pid1 := tbl1.Meta().GetPartitionInfo().Definitions[1].ID + tblStats1 := handle.GetPartitionStats(tbl1.Meta(), pid1) + require.False(t, tblStats1.Pseudo) + tbl2, err := dom.InfoSchema().TableByName(context.Background(), model.NewCIStr("test"), model.NewCIStr("t2")) + require.NoError(t, err) + pid2 := tbl2.Meta().GetPartitionInfo().Definitions[1].ID + tblStats2 := handle.GetPartitionStats(tbl2.Meta(), pid2) + require.False(t, tblStats2.Pseudo) + + // Insert more data into t1 and t2, but more data is inserted into t1. + tk.MustExec("insert into t1 values (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9), (10, 10), (11, 11), (12, 12), (13, 13)") + tk.MustExec("insert into t2 values (4, 4)") + require.NoError(t, handle.DumpStatsDeltaToKV(true)) + require.NoError(t, handle.Update(context.Background(), dom.InfoSchema())) + sysProcTracker := dom.SysProcTracker() + r := refresher.NewRefresher(handle, sysProcTracker) + r.RebuildTableAnalysisJobQueue() + require.Equal(t, 1, r.Jobs.Len(), "Only t1 is added to the job queue, because t2 is a tiny table(not enough data)") +} + +func TestPickOneTableAndAnalyzeByPriority(t *testing.T) { + statistics.AutoAnalyzeMinCnt = 0 + defer func() { + statistics.AutoAnalyzeMinCnt = 1000 + }() + + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t1 (a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (2), partition p1 values less than (14))") + tk.MustExec("create table t2 (a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (2), partition p1 values less than (14))") + tk.MustExec("insert into t1 values (1, 1), (2, 2), (3, 3)") + tk.MustExec("insert into t2 values (1, 1), (2, 2), (3, 3)") + handle := dom.StatsHandle() + require.NoError(t, handle.DumpStatsDeltaToKV(true)) + require.NoError(t, handle.Update(context.Background(), dom.InfoSchema())) + // Analyze those tables first. + tk.MustExec("analyze table t1") + tk.MustExec("analyze table t2") + require.NoError(t, handle.DumpStatsDeltaToKV(true)) + require.NoError(t, handle.Update(context.Background(), dom.InfoSchema())) + // Insert more data into t1 and t2, but more data is inserted into t1. + tk.MustExec("insert into t1 values (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9), (10, 10), (11, 11), (12, 12), (13, 13)") + tk.MustExec("insert into t2 values (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9)") + require.NoError(t, handle.DumpStatsDeltaToKV(true)) + require.NoError(t, handle.Update(context.Background(), dom.InfoSchema())) + sysProcTracker := dom.SysProcTracker() + r := refresher.NewRefresher(handle, sysProcTracker) + r.RebuildTableAnalysisJobQueue() + require.Equal(t, 2, r.Jobs.Len()) + // Analyze t1 first. + require.True(t, r.PickOneTableAndAnalyzeByPriority()) + require.NoError(t, handle.DumpStatsDeltaToKV(true)) + require.NoError(t, handle.Update(context.Background(), dom.InfoSchema())) + // The table is analyzed. + tbl1, err := dom.InfoSchema().TableByName(context.Background(), model.NewCIStr("test"), model.NewCIStr("t1")) + require.NoError(t, err) + pid1 := tbl1.Meta().GetPartitionInfo().Definitions[1].ID + tblStats1 := handle.GetPartitionStats(tbl1.Meta(), pid1) + require.Equal(t, int64(0), tblStats1.ModifyCount) + require.Equal(t, int64(12), tblStats1.RealtimeCount) + // t2 is not analyzed. + tbl2, err := dom.InfoSchema().TableByName(context.Background(), model.NewCIStr("test"), model.NewCIStr("t2")) + require.NoError(t, err) + pid2 := tbl2.Meta().GetPartitionInfo().Definitions[1].ID + tblStats2 := handle.GetPartitionStats(tbl2.Meta(), pid2) + require.Equal(t, int64(6), tblStats2.ModifyCount) + // Do one more round. + require.True(t, r.PickOneTableAndAnalyzeByPriority()) + // t2 is analyzed. + pid2 = tbl2.Meta().GetPartitionInfo().Definitions[1].ID + tblStats2 = handle.GetPartitionStats(tbl2.Meta(), pid2) + require.Equal(t, int64(0), tblStats2.ModifyCount) + require.Equal(t, int64(8), tblStats2.RealtimeCount) +} + +func TestPickOneTableAndAnalyzeByPriorityWithFailedAnalysis(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + + tk.MustExec("create table t1 (a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (2), partition p1 values less than (4))") + tk.MustExec("create table t2 (a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (2), partition p1 values less than (4))") + tk.MustExec("insert into t1 values (1, 1), (2, 2), (3, 3)") + tk.MustExec("insert into t2 values (1, 1), (2, 2), (3, 3)") + + handle := dom.StatsHandle() + sysProcTracker := dom.SysProcTracker() + r := refresher.NewRefresher(handle, sysProcTracker) + r.RebuildTableAnalysisJobQueue() + // No jobs in the queue. + r.PickOneTableAndAnalyzeByPriority() + // The table is not analyzed. + is := dom.InfoSchema() + tbl1, err := is.TableByName(context.Background(), model.NewCIStr("test"), model.NewCIStr("t1")) + require.NoError(t, err) + pid1 := tbl1.Meta().GetPartitionInfo().Definitions[0].ID + tblStats1 := handle.GetPartitionStats(tbl1.Meta(), pid1) + require.True(t, tblStats1.Pseudo) + + // Add a job to the queue. + job1 := &priorityqueue.NonPartitionedTableAnalysisJob{ + TableID: tbl1.Meta().ID, + TableSchema: "test", + TableName: "t1", + Weight: 1, + Indicators: priorityqueue.Indicators{ + ChangePercentage: 0.5, + }, + } + r.Jobs.Push(job1) + tbl2, err := is.TableByName(context.Background(), model.NewCIStr("test"), model.NewCIStr("t2")) + require.NoError(t, err) + job2 := &priorityqueue.NonPartitionedTableAnalysisJob{ + TableID: tbl2.Meta().ID, + TableSchema: "test", + TableName: "t2", + Weight: 0.9, + Indicators: priorityqueue.Indicators{ + ChangePercentage: 0.5, + }, + } + r.Jobs.Push(job2) + // Add a failed job to t1. + startTime := tk.MustQuery("select now() - interval 2 second").Rows()[0][0].(string) + insertFailedJobForPartitionWithStartTime(tk, "test", "t1", "p0", startTime) + + r.PickOneTableAndAnalyzeByPriority() + // t2 is analyzed. + pid2 := tbl2.Meta().GetPartitionInfo().Definitions[0].ID + tblStats2 := handle.GetPartitionStats(tbl2.Meta(), pid2) + require.True(t, tblStats2.Pseudo) + // t1 is not analyzed. + tblStats1 = handle.GetPartitionStats(tbl1.Meta(), pid1) + require.False(t, tblStats1.Pseudo) +} + +func insertFailedJobForPartitionWithStartTime( + tk *testkit.TestKit, + dbName string, + tableName string, + partitionName string, + startTime string, +) { + tk.MustExec(` + INSERT INTO mysql.analyze_jobs ( + table_schema, + table_name, + partition_name, + job_info, + start_time, + end_time, + state, + fail_reason, + instance + ) VALUES ( + ?, + ?, + ?, + 'Job information for failed job', + ?, + '2024-01-01 10:00:00', + 'failed', + 'Some reason for failure', + 'example_instance' + ); + `, + dbName, + tableName, + partitionName, + startTime, + ) +} + +func TestRebuildTableAnalysisJobQueue(t *testing.T) { + old := statistics.AutoAnalyzeMinCnt + defer func() { + statistics.AutoAnalyzeMinCnt = old + }() + statistics.AutoAnalyzeMinCnt = 0 + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t1 (a int, b int, index idx(a))") + tk.MustExec("insert into t1 values (1, 1), (2, 2), (3, 3)") + handle := dom.StatsHandle() + require.Nil(t, handle.DumpStatsDeltaToKV(true)) + tk.MustExec("analyze table t1") + require.Nil(t, handle.Update(context.Background(), dom.InfoSchema())) + + sysProcTracker := dom.SysProcTracker() + r := refresher.NewRefresher(handle, sysProcTracker) + + // Rebuild the job queue. No jobs are added. + err := r.RebuildTableAnalysisJobQueue() + require.NoError(t, err) + require.Equal(t, 0, r.Jobs.Len()) + // Insert more data into t1. + tk.MustExec("insert into t1 values (4, 4), (5, 5), (6, 6)") + require.Nil(t, handle.DumpStatsDeltaToKV(true)) + require.Nil(t, handle.Update(context.Background(), dom.InfoSchema())) + err = r.RebuildTableAnalysisJobQueue() + require.NoError(t, err) + require.Equal(t, 1, r.Jobs.Len()) + job1 := r.Jobs.Pop() + indicators := job1.GetIndicators() + require.Equal(t, 1.2, math.Round(job1.GetWeight()*10)/10) + require.Equal(t, float64(1), indicators.ChangePercentage) + require.Equal(t, float64(6*2), indicators.TableSize) + require.GreaterOrEqual(t, indicators.LastAnalysisDuration, time.Duration(0)) +} + +func TestCalculateChangePercentage(t *testing.T) { + unanalyzedColumns := map[int64]*statistics.Column{ + 1: {}, + 2: {}, + } + unanalyzedIndices := map[int64]*statistics.Index{ + 1: {}, + 2: {}, + } + analyzedColumns := map[int64]*statistics.Column{ + 1: { + StatsVer: 2, + }, + 2: { + StatsVer: 2, + }, + } + analyzedIndices := map[int64]*statistics.Index{ + 1: { + StatsVer: 2, + }, + 2: { + StatsVer: 2, + }, + } + bothUnanalyzedMap := statistics.NewColAndIndexExistenceMap(0, 0) + bothAnalyzedMap := statistics.NewColAndIndexExistenceMap(2, 2) + bothAnalyzedMap.InsertCol(1, nil, true) + bothAnalyzedMap.InsertCol(2, nil, true) + bothAnalyzedMap.InsertIndex(1, nil, true) + bothAnalyzedMap.InsertIndex(2, nil, true) + tests := []struct { + name string + tblStats *statistics.Table + autoAnalyzeRatio float64 + want float64 + }{ + { + name: "Test Table not analyzed", + tblStats: &statistics.Table{ + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, 0, unanalyzedColumns, unanalyzedIndices), + ColAndIdxExistenceMap: bothUnanalyzedMap, + }, + autoAnalyzeRatio: 0.5, + want: 1, + }, + { + name: "Based on change percentage", + tblStats: &statistics.Table{ + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, (statistics.AutoAnalyzeMinCnt+1)*2, analyzedColumns, analyzedIndices), + ColAndIdxExistenceMap: bothAnalyzedMap, + LastAnalyzeVersion: 1, + }, + autoAnalyzeRatio: 0.5, + want: 2, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := refresher.CalculateChangePercentage(tt.tblStats, tt.autoAnalyzeRatio) + require.Equal(t, tt.want, got) + }) + } +} + +func TestGetTableLastAnalyzeDuration(t *testing.T) { + // 2023-12-31 10:00:00 + lastUpdateTime := time.Date(2023, 12, 31, 10, 0, 0, 0, time.UTC) + lastUpdateTs := oracle.GoTimeToTS(lastUpdateTime) + tblStats := &statistics.Table{ + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, 0, 0, map[int64]*statistics.Column{ + 1: { + StatsVer: 2, + Histogram: statistics.Histogram{ + LastUpdateVersion: lastUpdateTs, + }, + }, + }, nil), + LastAnalyzeVersion: lastUpdateTs, + } + // 2024-01-01 10:00:00 + currentTime := time.Date(2024, 1, 1, 10, 0, 0, 0, time.UTC) + currentTs := oracle.GoTimeToTS(currentTime) + want := 24 * time.Hour + + got := refresher.GetTableLastAnalyzeDuration(tblStats, currentTs) + require.Equal(t, want, got) +} + +func TestGetTableLastAnalyzeDurationForUnanalyzedTable(t *testing.T) { + tblStats := &statistics.Table{ + HistColl: statistics.HistColl{}, + } + // 2024-01-01 10:00:00 + currentTime := time.Date(2024, 1, 1, 10, 0, 0, 0, time.UTC) + currentTs := oracle.GoTimeToTS(currentTime) + want := 1800 * time.Second + + got := refresher.GetTableLastAnalyzeDuration(tblStats, currentTs) + require.Equal(t, want, got) +} + +func TestCheckIndexesNeedAnalyze(t *testing.T) { + analyzedMap := statistics.NewColAndIndexExistenceMap(1, 0) + analyzedMap.InsertCol(1, nil, true) + analyzedMap.InsertIndex(1, nil, false) + tests := []struct { + name string + tblInfo *model.TableInfo + tblStats *statistics.Table + want []string + }{ + { + name: "Test Table not analyzed", + tblInfo: &model.TableInfo{ + Indices: []*model.IndexInfo{ + { + ID: 1, + Name: model.NewCIStr("index1"), + State: model.StatePublic, + }, + }, + }, + tblStats: &statistics.Table{ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(0, 0)}, + want: nil, + }, + { + name: "Test Index not analyzed", + tblInfo: &model.TableInfo{ + Indices: []*model.IndexInfo{ + { + ID: 1, + Name: model.NewCIStr("index1"), + State: model.StatePublic, + }, + }, + }, + tblStats: &statistics.Table{ + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, 0, 0, map[int64]*statistics.Column{ + 1: { + StatsVer: 2, + }, + }, map[int64]*statistics.Index{}), + ColAndIdxExistenceMap: analyzedMap, + LastAnalyzeVersion: 1, + }, + want: []string{"index1"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := refresher.CheckIndexesNeedAnalyze(tt.tblInfo, tt.tblStats) + require.Equal(t, tt.want, got) + }) + } +} + +func TestCalculateIndicatorsForPartitions(t *testing.T) { + // 2024-01-01 10:00:00 + currentTime := time.Date(2024, 1, 1, 10, 0, 0, 0, time.UTC) + currentTs := oracle.GoTimeToTS(currentTime) + // 2023-12-31 10:00:00 + lastUpdateTime := time.Date(2023, 12, 31, 10, 0, 0, 0, time.UTC) + lastUpdateTs := oracle.GoTimeToTS(lastUpdateTime) + unanalyzedMap := statistics.NewColAndIndexExistenceMap(0, 0) + analyzedMap := statistics.NewColAndIndexExistenceMap(2, 1) + analyzedMap.InsertCol(1, nil, true) + analyzedMap.InsertCol(2, nil, true) + analyzedMap.InsertIndex(1, nil, true) + tests := []struct { + name string + tblInfo *model.TableInfo + partitionStats map[refresher.PartitionIDAndName]*statistics.Table + defs []model.PartitionDefinition + autoAnalyzeRatio float64 + currentTs uint64 + wantAvgChangePercentage float64 + wantAvgSize float64 + wantAvgLastAnalyzeDuration time.Duration + wantPartitions []string + }{ + { + name: "Test Table not analyzed", + tblInfo: &model.TableInfo{ + Indices: []*model.IndexInfo{ + { + ID: 1, + Name: model.NewCIStr("index1"), + State: model.StatePublic, + }, + }, + Columns: []*model.ColumnInfo{ + { + ID: 1, + }, + { + ID: 2, + }, + }, + }, + partitionStats: map[refresher.PartitionIDAndName]*statistics.Table{ + { + ID: 1, + Name: "p0", + }: { + HistColl: statistics.HistColl{ + Pseudo: false, + RealtimeCount: statistics.AutoAnalyzeMinCnt + 1, + }, + ColAndIdxExistenceMap: unanalyzedMap, + }, + { + ID: 2, + Name: "p1", + }: { + HistColl: statistics.HistColl{ + Pseudo: false, + RealtimeCount: statistics.AutoAnalyzeMinCnt + 1, + }, + ColAndIdxExistenceMap: unanalyzedMap, + }, + }, + defs: []model.PartitionDefinition{ + { + ID: 1, + Name: model.NewCIStr("p0"), + }, + { + ID: 2, + Name: model.NewCIStr("p1"), + }, + }, + autoAnalyzeRatio: 0.5, + currentTs: currentTs, + wantAvgChangePercentage: 1, + wantAvgSize: 2002, + wantAvgLastAnalyzeDuration: 1800 * time.Second, + wantPartitions: []string{"p0", "p1"}, + }, + { + name: "Test Table analyzed and only one partition meets the threshold", + tblInfo: &model.TableInfo{ + Indices: []*model.IndexInfo{ + { + ID: 1, + Name: model.NewCIStr("index1"), + State: model.StatePublic, + }, + }, + Columns: []*model.ColumnInfo{ + { + ID: 1, + }, + { + ID: 2, + }, + }, + }, + partitionStats: map[refresher.PartitionIDAndName]*statistics.Table{ + { + ID: 1, + Name: "p0", + }: { + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, (statistics.AutoAnalyzeMinCnt+1)*2, map[int64]*statistics.Column{ + 1: { + StatsVer: 2, + Histogram: statistics.Histogram{ + LastUpdateVersion: lastUpdateTs, + }, + }, + 2: { + StatsVer: 2, + Histogram: statistics.Histogram{ + LastUpdateVersion: lastUpdateTs, + }, + }, + }, nil), + Version: currentTs, + ColAndIdxExistenceMap: analyzedMap, + LastAnalyzeVersion: lastUpdateTs, + }, + { + ID: 2, + Name: "p1", + }: { + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, 0, map[int64]*statistics.Column{ + 1: { + StatsVer: 2, + Histogram: statistics.Histogram{ + LastUpdateVersion: lastUpdateTs, + }, + }, + 2: { + StatsVer: 2, + Histogram: statistics.Histogram{ + LastUpdateVersion: lastUpdateTs, + }, + }, + }, nil), + Version: currentTs, + ColAndIdxExistenceMap: analyzedMap, + LastAnalyzeVersion: lastUpdateTs, + }, + }, + defs: []model.PartitionDefinition{ + { + ID: 1, + Name: model.NewCIStr("p0"), + }, + { + ID: 2, + Name: model.NewCIStr("p1"), + }, + }, + autoAnalyzeRatio: 0.5, + currentTs: currentTs, + wantAvgChangePercentage: 2, + wantAvgSize: 2002, + wantAvgLastAnalyzeDuration: 24 * time.Hour, + wantPartitions: []string{"p0"}, + }, + { + name: "No partition meets the threshold", + tblInfo: &model.TableInfo{ + Indices: []*model.IndexInfo{ + { + ID: 1, + Name: model.NewCIStr("index1"), + State: model.StatePublic, + }, + }, + Columns: []*model.ColumnInfo{ + { + ID: 1, + }, + { + ID: 2, + }, + }, + }, + partitionStats: map[refresher.PartitionIDAndName]*statistics.Table{ + { + ID: 1, + Name: "p0", + }: { + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, 0, map[int64]*statistics.Column{ + 1: { + StatsVer: 2, + Histogram: statistics.Histogram{ + LastUpdateVersion: lastUpdateTs, + }, + }, + 2: { + StatsVer: 2, + Histogram: statistics.Histogram{ + LastUpdateVersion: lastUpdateTs, + }, + }, + }, nil), + Version: currentTs, + ColAndIdxExistenceMap: analyzedMap, + LastAnalyzeVersion: lastUpdateTs, + }, + { + ID: 2, + Name: "p1", + }: { + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, 0, map[int64]*statistics.Column{ + 1: { + StatsVer: 2, + Histogram: statistics.Histogram{ + LastUpdateVersion: lastUpdateTs, + }, + }, + 2: { + StatsVer: 2, + Histogram: statistics.Histogram{ + LastUpdateVersion: lastUpdateTs, + }, + }, + }, nil), + Version: currentTs, + ColAndIdxExistenceMap: analyzedMap, + LastAnalyzeVersion: lastUpdateTs, + }, + }, + defs: []model.PartitionDefinition{ + { + ID: 1, + Name: model.NewCIStr("p0"), + }, + { + ID: 2, + Name: model.NewCIStr("p1"), + }, + }, + autoAnalyzeRatio: 0.5, + currentTs: currentTs, + wantAvgChangePercentage: 0, + wantAvgSize: 0, + wantAvgLastAnalyzeDuration: 0, + wantPartitions: []string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotAvgChangePercentage, + gotAvgSize, + gotAvgLastAnalyzeDuration, + gotPartitions := + refresher.CalculateIndicatorsForPartitions( + tt.tblInfo, + tt.partitionStats, + tt.autoAnalyzeRatio, + tt.currentTs, + ) + require.Equal(t, tt.wantAvgChangePercentage, gotAvgChangePercentage) + require.Equal(t, tt.wantAvgSize, gotAvgSize) + require.Equal(t, tt.wantAvgLastAnalyzeDuration, gotAvgLastAnalyzeDuration) + // Sort the partitions. + sort.Strings(tt.wantPartitions) + sort.Strings(gotPartitions) + require.Equal(t, tt.wantPartitions, gotPartitions) + }) + } +} + +func TestCheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable(t *testing.T) { + tblInfo := model.TableInfo{ + Indices: []*model.IndexInfo{ + { + ID: 1, + Name: model.NewCIStr("index1"), + State: model.StatePublic, + }, + { + ID: 2, + Name: model.NewCIStr("index2"), + State: model.StatePublic, + }, + }, + Columns: []*model.ColumnInfo{ + { + ID: 1, + }, + { + ID: 2, + }, + }, + } + partitionStats := map[refresher.PartitionIDAndName]*statistics.Table{ + { + ID: 1, + Name: "p0", + }: { + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, 0, nil, map[int64]*statistics.Index{}), + ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(0, 0), + }, + { + ID: 2, + Name: "p1", + }: { + HistColl: *statistics.NewHistCollWithColsAndIdxs(0, false, statistics.AutoAnalyzeMinCnt+1, 0, nil, map[int64]*statistics.Index{ + 2: { + StatsVer: 2, + }, + }), + ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(0, 1), + }, + } + + partitionIndexes := refresher.CheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable(&tblInfo, partitionStats) + expected := map[string][]string{"index1": {"p0", "p1"}, "index2": {"p0"}} + require.Equal(t, len(expected), len(partitionIndexes)) + + for k, v := range expected { + sort.Strings(v) + if val, ok := partitionIndexes[k]; ok { + sort.Strings(val) + require.Equal(t, v, val) + } else { + require.Fail(t, "key not found in partitionIndexes: "+k) + } + } +} diff --git a/pkg/statistics/handle/cache/statscache.go b/pkg/statistics/handle/cache/statscache.go index 3f3e8d71017a6..8cb26d710af5f 100644 --- a/pkg/statistics/handle/cache/statscache.go +++ b/pkg/statistics/handle/cache/statscache.go @@ -200,7 +200,14 @@ func (s *StatsCacheImpl) SetStatsCacheCapacity(c int64) { // UpdateStatsHealthyMetrics updates stats healthy distribution metrics according to stats cache. func (s *StatsCacheImpl) UpdateStatsHealthyMetrics() { distribution := make([]int64, 5) + uneligibleAnalyze := 0 for _, tbl := range s.Values() { + distribution[4]++ // total table count + isEligibleForAnalysis := tbl.IsEligibleForAnalysis() + if !isEligibleForAnalysis { + uneligibleAnalyze++ + continue + } healthy, ok := tbl.GetStatsHealthy() if !ok { continue @@ -214,9 +221,9 @@ func (s *StatsCacheImpl) UpdateStatsHealthyMetrics() { } else { distribution[3]++ } - distribution[4]++ } for i, val := range distribution { handle_metrics.StatsHealthyGauges[i].Set(float64(val)) } + handle_metrics.StatsHealthyGauges[5].Set(float64(uneligibleAnalyze)) } diff --git a/pkg/statistics/handle/metrics/metrics.go b/pkg/statistics/handle/metrics/metrics.go index 175fef6359df4..c4fd3a644546d 100644 --- a/pkg/statistics/handle/metrics/metrics.go +++ b/pkg/statistics/handle/metrics/metrics.go @@ -40,6 +40,7 @@ func InitMetricsVars() { metrics.StatsHealthyGauge.WithLabelValues("[100,100]"), // [0,100] should always be the last metrics.StatsHealthyGauge.WithLabelValues("[0,100]"), + metrics.StatsHealthyGauge.WithLabelValues("unneeded analyze"), } DumpHistoricalStatsSuccessCounter = metrics.HistoricalStatsCounter.WithLabelValues("dump", "success") diff --git a/pkg/statistics/handle/updatetest/BUILD.bazel b/pkg/statistics/handle/updatetest/BUILD.bazel index 6b08edca6b991..724f86db08440 100644 --- a/pkg/statistics/handle/updatetest/BUILD.bazel +++ b/pkg/statistics/handle/updatetest/BUILD.bazel @@ -16,7 +16,10 @@ go_test( "//pkg/sessionctx", "//pkg/sessionctx/variable", "//pkg/statistics", +<<<<<<< HEAD "//pkg/statistics/handle/autoanalyze", +======= +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) "//pkg/statistics/handle/usage", "//pkg/statistics/handle/util", "//pkg/testkit", diff --git a/pkg/statistics/handle/updatetest/update_test.go b/pkg/statistics/handle/updatetest/update_test.go index 01ef3342f6aea..d9293e5f27b68 100644 --- a/pkg/statistics/handle/updatetest/update_test.go +++ b/pkg/statistics/handle/updatetest/update_test.go @@ -28,7 +28,10 @@ import ( "github.com/pingcap/tidb/pkg/sessionctx" "github.com/pingcap/tidb/pkg/sessionctx/variable" "github.com/pingcap/tidb/pkg/statistics" +<<<<<<< HEAD "github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze" +======= +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) "github.com/pingcap/tidb/pkg/statistics/handle/usage" "github.com/pingcap/tidb/pkg/statistics/handle/util" "github.com/pingcap/tidb/pkg/testkit" @@ -363,11 +366,19 @@ func TestAutoUpdate(t *testing.T) { testKit.MustExec("use test") testKit.MustExec("create table t (a varchar(20))") +<<<<<<< HEAD autoanalyze.AutoAnalyzeMinCnt = 0 testKit.MustExec("set global tidb_auto_analyze_ratio = 0.2") defer func() { autoanalyze.AutoAnalyzeMinCnt = 1000 testKit.MustExec("set global tidb_auto_analyze_ratio = 0.0") +======= + statistics.AutoAnalyzeMinCnt = 0 + testKit.MustExec("set global tidb_auto_analyze_ratio = 0.2") + defer func() { + statistics.AutoAnalyzeMinCnt = 1000 + testKit.MustExec("set global tidb_auto_analyze_ratio = 0.5") +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) }() do := dom @@ -468,11 +479,19 @@ func TestAutoUpdatePartition(t *testing.T) { testKit.MustExec("create table t (a int) PARTITION BY RANGE (a) (PARTITION p0 VALUES LESS THAN (6))") testKit.MustExec("analyze table t") +<<<<<<< HEAD autoanalyze.AutoAnalyzeMinCnt = 0 testKit.MustExec("set global tidb_auto_analyze_ratio = 0.6") defer func() { autoanalyze.AutoAnalyzeMinCnt = 1000 testKit.MustExec("set global tidb_auto_analyze_ratio = 0.0") +======= + statistics.AutoAnalyzeMinCnt = 0 + testKit.MustExec("set global tidb_auto_analyze_ratio = 0.6") + defer func() { + statistics.AutoAnalyzeMinCnt = 1000 + testKit.MustExec("set global tidb_auto_analyze_ratio = 0.5") +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) }() do := dom @@ -513,7 +532,11 @@ func TestIssue25700(t *testing.T) { tk.MustExec("drop table if exists t") tk.MustExec("CREATE TABLE `t` ( `ldecimal` decimal(32,4) DEFAULT NULL, `rdecimal` decimal(32,4) DEFAULT NULL, `gen_col` decimal(36,4) GENERATED ALWAYS AS (`ldecimal` + `rdecimal`) VIRTUAL, `col_timestamp` timestamp(3) NULL DEFAULT NULL ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;") tk.MustExec("analyze table t") +<<<<<<< HEAD tk.MustExec("INSERT INTO `t` (`ldecimal`, `rdecimal`, `col_timestamp`) VALUES (2265.2200, 9843.4100, '1999-12-31 16:00:00')" + strings.Repeat(", (2265.2200, 9843.4100, '1999-12-31 16:00:00')", int(autoanalyze.AutoAnalyzeMinCnt))) +======= + tk.MustExec("INSERT INTO `t` (`ldecimal`, `rdecimal`, `col_timestamp`) VALUES (2265.2200, 9843.4100, '1999-12-31 16:00:00')" + strings.Repeat(", (2265.2200, 9843.4100, '1999-12-31 16:00:00')", int(statistics.AutoAnalyzeMinCnt))) +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) require.NoError(t, dom.StatsHandle().DumpStatsDeltaToKV(true)) require.NoError(t, dom.StatsHandle().Update(dom.InfoSchema())) @@ -803,11 +826,19 @@ func TestAutoUpdatePartitionInDynamicOnlyMode(t *testing.T) { testKit.MustExec("set @@tidb_analyze_version = 2") testKit.MustExec("analyze table t") +<<<<<<< HEAD autoanalyze.AutoAnalyzeMinCnt = 0 testKit.MustExec("set global tidb_auto_analyze_ratio = 0.1") defer func() { autoanalyze.AutoAnalyzeMinCnt = 1000 testKit.MustExec("set global tidb_auto_analyze_ratio = 0.0") +======= + statistics.AutoAnalyzeMinCnt = 0 + testKit.MustExec("set global tidb_auto_analyze_ratio = 0.1") + defer func() { + statistics.AutoAnalyzeMinCnt = 1000 + testKit.MustExec("set global tidb_auto_analyze_ratio = 0.5") +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) }() require.NoError(t, h.Update(is)) @@ -849,9 +880,15 @@ func TestAutoAnalyzeRatio(t *testing.T) { oriStart := tk.MustQuery("select @@tidb_auto_analyze_start_time").Rows()[0][0].(string) oriEnd := tk.MustQuery("select @@tidb_auto_analyze_end_time").Rows()[0][0].(string) +<<<<<<< HEAD autoanalyze.AutoAnalyzeMinCnt = 0 defer func() { autoanalyze.AutoAnalyzeMinCnt = 1000 +======= + statistics.AutoAnalyzeMinCnt = 0 + defer func() { + statistics.AutoAnalyzeMinCnt = 1000 +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_start_time='%v'", oriStart)) tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_end_time='%v'", oriEnd)) }() @@ -1077,9 +1114,15 @@ func TestStatsLockUnlockForAutoAnalyze(t *testing.T) { oriStart := tk.MustQuery("select @@tidb_auto_analyze_start_time").Rows()[0][0].(string) oriEnd := tk.MustQuery("select @@tidb_auto_analyze_end_time").Rows()[0][0].(string) +<<<<<<< HEAD autoanalyze.AutoAnalyzeMinCnt = 0 defer func() { autoanalyze.AutoAnalyzeMinCnt = 1000 +======= + statistics.AutoAnalyzeMinCnt = 0 + defer func() { + statistics.AutoAnalyzeMinCnt = 1000 +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_start_time='%v'", oriStart)) tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_end_time='%v'", oriEnd)) }() @@ -1277,6 +1320,7 @@ func TestNotDumpSysTable(t *testing.T) { func TestAutoAnalyzePartitionTableAfterAddingIndex(t *testing.T) { store, dom := testkit.CreateMockStoreAndDomain(t) tk := testkit.NewTestKit(t, store) +<<<<<<< HEAD oriMinCnt := autoanalyze.AutoAnalyzeMinCnt oriStart := tk.MustQuery("select @@tidb_auto_analyze_start_time").Rows()[0][0].(string) oriEnd := tk.MustQuery("select @@tidb_auto_analyze_end_time").Rows()[0][0].(string) @@ -1286,6 +1330,17 @@ func TestAutoAnalyzePartitionTableAfterAddingIndex(t *testing.T) { tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_end_time='%v'", oriEnd)) }() autoanalyze.AutoAnalyzeMinCnt = 0 +======= + oriMinCnt := statistics.AutoAnalyzeMinCnt + oriStart := tk.MustQuery("select @@tidb_auto_analyze_start_time").Rows()[0][0].(string) + oriEnd := tk.MustQuery("select @@tidb_auto_analyze_end_time").Rows()[0][0].(string) + defer func() { + statistics.AutoAnalyzeMinCnt = oriMinCnt + tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_start_time='%v'", oriStart)) + tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_end_time='%v'", oriEnd)) + }() + statistics.AutoAnalyzeMinCnt = 0 +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) tk.MustExec("set global tidb_auto_analyze_start_time='00:00 +0000'") tk.MustExec("set global tidb_auto_analyze_end_time='23:59 +0000'") tk.MustExec("set global tidb_analyze_version = 2") diff --git a/pkg/statistics/integration_test.go b/pkg/statistics/integration_test.go index c675cd47f857c..2dfc734e0ab15 100644 --- a/pkg/statistics/integration_test.go +++ b/pkg/statistics/integration_test.go @@ -26,7 +26,10 @@ import ( "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/parser/model" "github.com/pingcap/tidb/pkg/statistics" +<<<<<<< HEAD "github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze" +======= +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) "github.com/pingcap/tidb/pkg/testkit" "github.com/pingcap/tidb/pkg/testkit/testdata" "github.com/stretchr/testify/require" @@ -307,9 +310,15 @@ func TestOutdatedStatsCheck(t *testing.T) { oriStart := tk.MustQuery("select @@tidb_auto_analyze_start_time").Rows()[0][0].(string) oriEnd := tk.MustQuery("select @@tidb_auto_analyze_end_time").Rows()[0][0].(string) +<<<<<<< HEAD autoanalyze.AutoAnalyzeMinCnt = 0 defer func() { autoanalyze.AutoAnalyzeMinCnt = 1000 +======= + statistics.AutoAnalyzeMinCnt = 0 + defer func() { + statistics.AutoAnalyzeMinCnt = 1000 +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_start_time='%v'", oriStart)) tk.MustExec(fmt.Sprintf("set global tidb_auto_analyze_end_time='%v'", oriEnd)) }() diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index ac8645adbe87b..41beca4b498ce 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -42,6 +42,10 @@ const ( PseudoRowCount = 10000 ) +// AutoAnalyzeMinCnt means if the count of table is less than this value, we don't need to do auto analyze. +// Exported for testing. +var AutoAnalyzeMinCnt int64 = 1000 + var ( // Below functions are used to solve cycle import problem. // Note: all functions below will be removed after finishing moving all estimation functions into the cardinality package. @@ -412,6 +416,28 @@ func (t *Table) GetStatsInfo(id int64, isIndex bool, needCopy bool) (*Histogram, return nil, nil, nil, nil, false } +<<<<<<< HEAD +======= +// IsAnalyzed checks whether the table is analyzed or not by checking its last analyze's timestamp value. +// A valid timestamp must be greater than 0. +func (t *Table) IsAnalyzed() bool { + return t.LastAnalyzeVersion > 0 +} + +// IsEligibleForAnalysis checks whether the table is eligible for analysis. +func (t *Table) IsEligibleForAnalysis() bool { + // 1. If the statistics are either not loaded or are classified as pseudo, there is no need for analyze. + // Pseudo statistics can be created by the optimizer, so we need to double check it. + // 2. If the table is too small, we don't want to waste time to analyze it. + // Leave the opportunity to other bigger tables. + if t == nil || t.Pseudo || t.RealtimeCount < AutoAnalyzeMinCnt { + return false + } + + return true +} + +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) // GetAnalyzeRowCount tries to get the row count of a column or an index if possible. // This method is useful because this row count doesn't consider the modify count. func (coll *HistColl) GetAnalyzeRowCount() float64 { diff --git a/pkg/ttl/ttlworker/BUILD.bazel b/pkg/ttl/ttlworker/BUILD.bazel index 89927a4ca96cb..b416ed33b63f6 100644 --- a/pkg/ttl/ttlworker/BUILD.bazel +++ b/pkg/ttl/ttlworker/BUILD.bazel @@ -81,7 +81,11 @@ go_test( "//pkg/session", "//pkg/sessionctx", "//pkg/sessionctx/variable", +<<<<<<< HEAD "//pkg/statistics/handle/autoanalyze", +======= + "//pkg/statistics", +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) "//pkg/store/mockstore", "//pkg/testkit", "//pkg/timer/api", diff --git a/pkg/ttl/ttlworker/job_manager_integration_test.go b/pkg/ttl/ttlworker/job_manager_integration_test.go index 00ec19e8dfddd..21c865095a345 100644 --- a/pkg/ttl/ttlworker/job_manager_integration_test.go +++ b/pkg/ttl/ttlworker/job_manager_integration_test.go @@ -32,7 +32,11 @@ import ( "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/parser/model" dbsession "github.com/pingcap/tidb/pkg/session" +<<<<<<< HEAD "github.com/pingcap/tidb/pkg/statistics/handle/autoanalyze" +======= + "github.com/pingcap/tidb/pkg/statistics" +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) "github.com/pingcap/tidb/pkg/testkit" timerapi "github.com/pingcap/tidb/pkg/timer/api" timertable "github.com/pingcap/tidb/pkg/timer/tablestore" @@ -178,10 +182,17 @@ func TestTTLAutoAnalyze(t *testing.T) { failpoint.Enable("github.com/pingcap/tidb/pkg/ttl/ttlworker/task-manager-loop-interval", fmt.Sprintf("return(%d)", time.Second)) defer failpoint.Disable("github.com/pingcap/tidb/pkg/ttl/ttlworker/task-manager-loop-interval") +<<<<<<< HEAD originAutoAnalyzeMinCnt := autoanalyze.AutoAnalyzeMinCnt autoanalyze.AutoAnalyzeMinCnt = 0 defer func() { autoanalyze.AutoAnalyzeMinCnt = originAutoAnalyzeMinCnt +======= + originAutoAnalyzeMinCnt := statistics.AutoAnalyzeMinCnt + statistics.AutoAnalyzeMinCnt = 0 + defer func() { + statistics.AutoAnalyzeMinCnt = originAutoAnalyzeMinCnt +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) }() store, dom := testkit.CreateMockStoreAndDomain(t) @@ -364,10 +375,17 @@ func TestTTLJobDisable(t *testing.T) { failpoint.Enable("github.com/pingcap/tidb/pkg/ttl/ttlworker/resize-workers-interval", fmt.Sprintf("return(%d)", time.Second)) defer failpoint.Disable("github.com/pingcap/tidb/pkg/ttl/ttlworker/resize-workers-interval") +<<<<<<< HEAD originAutoAnalyzeMinCnt := autoanalyze.AutoAnalyzeMinCnt autoanalyze.AutoAnalyzeMinCnt = 0 defer func() { autoanalyze.AutoAnalyzeMinCnt = originAutoAnalyzeMinCnt +======= + originAutoAnalyzeMinCnt := statistics.AutoAnalyzeMinCnt + statistics.AutoAnalyzeMinCnt = 0 + defer func() { + statistics.AutoAnalyzeMinCnt = originAutoAnalyzeMinCnt +>>>>>>> 7e73ddc91b5 (statistics: add metrics for unneeded analyze table (#54822)) }() store, dom := testkit.CreateMockStoreAndDomain(t)