diff --git a/planner/core/stats.go b/planner/core/stats.go index 19650e9b753ca..d8ba0ce69b848 100644 --- a/planner/core/stats.go +++ b/planner/core/stats.go @@ -65,7 +65,7 @@ func (p *LogicalMemTable) DeriveStats(childStats []*property.StatsInfo, selfSche stats := &property.StatsInfo{ RowCount: float64(statsTable.Count), ColNDVs: make(map[int64]float64, len(p.TableInfo.Columns)), - HistColl: statsTable.GenerateHistCollFromColumnInfo(p.TableInfo.Columns, p.schema.Columns), + HistColl: statsTable.GenerateHistCollFromColumnInfo(p.TableInfo, p.schema.Columns), StatsVersion: statistics.PseudoVersion, } for _, col := range selfSchema.Columns { @@ -232,7 +232,7 @@ func (ds *DataSource) initStats(colGroups [][]*expression.Column) { tableStats := &property.StatsInfo{ RowCount: float64(ds.statisticTable.Count), ColNDVs: make(map[int64]float64, ds.schema.Len()), - HistColl: ds.statisticTable.GenerateHistCollFromColumnInfo(ds.Columns, ds.schema.Columns), + HistColl: ds.statisticTable.GenerateHistCollFromColumnInfo(ds.tableInfo, ds.schema.Columns), StatsVersion: ds.statisticTable.Version, } if ds.statisticTable.Pseudo { diff --git a/statistics/integration_test.go b/statistics/integration_test.go index 3a858b1b22fcc..4219af8712fba 100644 --- a/statistics/integration_test.go +++ b/statistics/integration_test.go @@ -667,3 +667,151 @@ func TestUpdateNotLoadIndexFMSketch(t *testing.T) { require.Nil(t, h.GetPartitionStats(tblInfo, p0.ID).Indices[idxInfo.ID].FMSketch) require.Nil(t, h.GetPartitionStats(tblInfo, p1.ID).Indices[idxInfo.ID].FMSketch) } +<<<<<<< HEAD +======= + +func TestIndexJoinInnerRowCountUpperBound(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + testKit := testkit.NewTestKit(t, store) + h := dom.StatsHandle() + + testKit.MustExec("use test") + testKit.MustExec("drop table if exists t") + testKit.MustExec("create table t(a int, b int, index idx(b))") + require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh())) + is := dom.InfoSchema() + tb, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + tblInfo := tb.Meta() + + // Mock the stats: + // The two columns are the same. + // From 0 to 499, each value has 1000 rows. Therefore, NDV is 500 and total row count is 500000. + mockStatsTbl := mockStatsTable(tblInfo, 500000) + colValues, err := generateIntDatum(1, 500) + require.NoError(t, err) + for i := 1; i <= 2; i++ { + mockStatsTbl.Columns[int64(i)] = &statistics.Column{ + Histogram: *mockStatsHistogram(int64(i), colValues, 1000, types.NewFieldType(mysql.TypeLonglong)), + Info: tblInfo.Columns[i-1], + StatsLoadedStatus: statistics.NewStatsFullLoadStatus(), + StatsVer: 2, + } + } + generateMapsForMockStatsTbl(mockStatsTbl) + stat := h.GetTableStats(tblInfo) + stat.HistColl = mockStatsTbl.HistColl + + testKit.MustQuery("explain format = 'brief' " + + "select /*+ inl_join(t2) */ * from (select * from t where t.a < 1) as t1 join t t2 where t2.a = 0 and t1.a = t2.b"). + Check(testkit.Rows( + "IndexJoin 1000000.00 root inner join, inner:IndexLookUp, outer key:test.t.a, inner key:test.t.b, equal cond:eq(test.t.a, test.t.b)", + "├─TableReader(Build) 1000.00 root data:Selection", + "│ └─Selection 1000.00 cop[tikv] lt(test.t.a, 1), not(isnull(test.t.a))", + "│ └─TableFullScan 500000.00 cop[tikv] table:t keep order:false, stats:pseudo", + "└─IndexLookUp(Probe) 1000000.00 root ", + " ├─Selection(Build) 500000000.00 cop[tikv] not(isnull(test.t.b))", + " │ └─IndexRangeScan 500000000.00 cop[tikv] table:t2, index:idx(b) range: decided by [eq(test.t.b, test.t.a)], keep order:false, stats:pseudo", + " └─Selection(Probe) 1000000.00 cop[tikv] eq(test.t.a, 0)", + " └─TableRowIDScan 500000000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + )) +} + +func TestOrderingIdxSelectivityThreshold(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + testKit := testkit.NewTestKit(t, store) + h := dom.StatsHandle() + sc := &stmtctx.StatementContext{TimeZone: time.UTC} + + testKit.MustExec("use test") + testKit.MustExec("drop table if exists t") + testKit.MustExec("create table t(a int primary key , b int, c int, index ib(b), index ic(c))") + require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh())) + is := dom.InfoSchema() + tb, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + tblInfo := tb.Meta() + + // Mock the stats: + // total row count 100000 + // column a: PK, from 0 to 100000, NDV 100000 + // column b, c: from 0 to 10000, each value has 10 rows, NDV 10000 + // indexes are created on (b), (c) respectively + mockStatsTbl := mockStatsTable(tblInfo, 100000) + pkColValues, err := generateIntDatum(1, 100000) + require.NoError(t, err) + mockStatsTbl.Columns[1] = &statistics.Column{ + Histogram: *mockStatsHistogram(1, pkColValues, 1, types.NewFieldType(mysql.TypeLonglong)), + Info: tblInfo.Columns[0], + StatsLoadedStatus: statistics.NewStatsFullLoadStatus(), + StatsVer: 2, + } + colValues, err := generateIntDatum(1, 10000) + require.NoError(t, err) + idxValues := make([]types.Datum, 0) + for _, val := range colValues { + b, err := codec.EncodeKey(sc, nil, val) + require.NoError(t, err) + idxValues = append(idxValues, types.NewBytesDatum(b)) + } + + for i := 2; i <= 3; i++ { + mockStatsTbl.Columns[int64(i)] = &statistics.Column{ + Histogram: *mockStatsHistogram(int64(i), colValues, 10, types.NewFieldType(mysql.TypeLonglong)), + Info: tblInfo.Columns[i-1], + StatsLoadedStatus: statistics.NewStatsFullLoadStatus(), + StatsVer: 2, + } + } + for i := 1; i <= 2; i++ { + mockStatsTbl.Indices[int64(i)] = &statistics.Index{ + Histogram: *mockStatsHistogram(int64(i), idxValues, 10, types.NewFieldType(mysql.TypeBlob)), + Info: tblInfo.Indices[i-1], + StatsLoadedStatus: statistics.NewStatsFullLoadStatus(), + StatsVer: 2, + } + } + generateMapsForMockStatsTbl(mockStatsTbl) + stat := h.GetTableStats(tblInfo) + stat.HistColl = mockStatsTbl.HistColl + + var ( + input []string + output []struct { + Query string + Result []string + } + ) + integrationSuiteData := statistics.GetIntegrationSuiteData() + integrationSuiteData.LoadTestCases(t, &input, &output) + for i := 0; i < len(input); i++ { + testdata.OnRecord(func() { + output[i].Query = input[i] + }) + if !strings.HasPrefix(input[i], "explain") { + testKit.MustExec(input[i]) + continue + } + testdata.OnRecord(func() { + output[i].Result = testdata.ConvertRowsToStrings(testKit.MustQuery(input[i]).Rows()) + }) + testKit.MustQuery(input[i]).Check(testkit.Rows(output[i].Result...)) + } +} + +func TestIssue44369(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + h := dom.StatsHandle() + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t(a int, b int, index iab(a,b));") + require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh())) + tk.MustExec("insert into t value(1,1);") + require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll)) + tk.MustExec("analyze table t;") + is := dom.InfoSchema() + require.NoError(t, h.Update(is)) + tk.MustExec("alter table t rename column b to bb;") + tk.MustExec("select * from t where a = 10 and bb > 20;") +} +>>>>>>> 282c753cfbc (statistics, planner: use the correct `IndexInfo` in `GenerateHistCollFromColumnInfo()` (#44441)) diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index 5bd41dc1ae41f..ba0bf888bfe3b 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -582,7 +582,7 @@ func TestSelectivity(t *testing.T) { sel := p.(plannercore.LogicalPlan).Children()[0].(*plannercore.LogicalSelection) ds := sel.Children()[0].(*plannercore.DataSource) - histColl := statsTbl.GenerateHistCollFromColumnInfo(ds.Columns, ds.Schema().Columns) + histColl := statsTbl.GenerateHistCollFromColumnInfo(ds.TableInfo(), ds.Schema().Columns) ratio, _, err := histColl.Selectivity(sctx, sel.Conditions, nil) require.NoErrorf(t, err, "for %s", tt.exprs) @@ -698,7 +698,7 @@ func TestDNFCondSelectivity(t *testing.T) { sel := p.(plannercore.LogicalPlan).Children()[0].(*plannercore.LogicalSelection) ds := sel.Children()[0].(*plannercore.DataSource) - histColl := statsTbl.GenerateHistCollFromColumnInfo(ds.Columns, ds.Schema().Columns) + histColl := statsTbl.GenerateHistCollFromColumnInfo(ds.TableInfo(), ds.Schema().Columns) ratio, _, err := histColl.Selectivity(sctx, sel.Conditions, nil) require.NoErrorf(t, err, "error %v, for expr %s", err, tt) diff --git a/statistics/table.go b/statistics/table.go index 90351b3d0c78e..288e77faf9412 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -598,20 +598,19 @@ func (coll *HistColl) ID2UniqueID(columns []*expression.Column) *HistColl { return newColl } +<<<<<<< HEAD // GenerateHistCollFromColumnInfo generates a new HistColl whose ColID2IdxID and IdxID2ColIDs is built from the given parameter. func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo, columns []*expression.Column) *HistColl { +======= +// GenerateHistCollFromColumnInfo generates a new HistColl whose ColID2IdxIDs and IdxID2ColIDs is built from the given parameter. +func (coll *HistColl) GenerateHistCollFromColumnInfo(tblInfo *model.TableInfo, columns []*expression.Column) *HistColl { +>>>>>>> 282c753cfbc (statistics, planner: use the correct `IndexInfo` in `GenerateHistCollFromColumnInfo()` (#44441)) newColHistMap := make(map[int64]*Column) colInfoID2UniqueID := make(map[int64]int64, len(columns)) - colNames2UniqueID := make(map[string]int64) + idxID2idxInfo := make(map[int64]*model.IndexInfo) for _, col := range columns { colInfoID2UniqueID[col.ID] = col.UniqueID } - for _, colInfo := range infos { - uniqueID, ok := colInfoID2UniqueID[colInfo.ID] - if ok { - colNames2UniqueID[colInfo.Name.L] = uniqueID - } - } for id, colHist := range coll.Columns { uniqueID, ok := colInfoID2UniqueID[id] // Collect the statistics by the given columns. @@ -619,13 +618,28 @@ func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo, newColHistMap[uniqueID] = colHist } } + for _, idxInfo := range tblInfo.Indices { + idxID2idxInfo[idxInfo.ID] = idxInfo + } newIdxHistMap := make(map[int64]*Index) idx2Columns := make(map[int64][]int64) +<<<<<<< HEAD colID2IdxID := make(map[int64]int64) for _, idxHist := range coll.Indices { ids := make([]int64, 0, len(idxHist.Info.Columns)) for _, idxCol := range idxHist.Info.Columns { uniqueID, ok := colNames2UniqueID[idxCol.Name.L] +======= + colID2IdxIDs := make(map[int64][]int64) + for id, idxHist := range coll.Indices { + idxInfo := idxID2idxInfo[id] + if idxInfo == nil { + continue + } + ids := make([]int64, 0, len(idxInfo.Columns)) + for _, idxCol := range idxInfo.Columns { + uniqueID, ok := colInfoID2UniqueID[tblInfo.Columns[idxCol.Offset].ID] +>>>>>>> 282c753cfbc (statistics, planner: use the correct `IndexInfo` in `GenerateHistCollFromColumnInfo()` (#44441)) if !ok { break } diff --git a/statistics/trace_test.go b/statistics/trace_test.go index 03d3c71e7f8ca..75ab5e88cfc84 100644 --- a/statistics/trace_test.go +++ b/statistics/trace_test.go @@ -100,3 +100,184 @@ func TestTraceCE(t *testing.T) { require.ElementsMatch(t, resultJSON, out[i].Trace) } } +<<<<<<< HEAD +======= + +func TestTraceCEPartitionTable(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a int, b int, d varchar(10), index idx(a, b)) PARTITION BY RANGE (a) (PARTITION p0 VALUES LESS THAN MAXVALUE);") + tk.MustExec(`insert into t values(1, 1, "aaa"), + (1, 1, "bbb"), + (1, 2, "ccc"), + (1, 2, "ddd"), + (2, 2, "aaa"), + (2, 3, "bbb")`) + tk.MustExec("analyze table t") + result := tk.MustQuery("trace plan target='estimation' select * from t where a >=1") + require.Len(t, result.Rows(), 1) + resultStr := result.Rows()[0][0].(string) + var resultJSON []*tracing.CETraceRecord + err := json.Unmarshal([]byte(resultStr), &resultJSON) + require.NoError(t, err) + for _, r := range resultJSON { + require.Equal(t, "t", r.TableName) + } +} + +func TestTraceDebugSelectivity(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + statsHandle := dom.StatsHandle() + + // Make the result of v1 analyze result stable + // 1. make sure all rows are always collect as samples + originalSampleSize := executor.MaxRegionSampleSize + executor.MaxRegionSampleSize = 10000 + defer func() { + executor.MaxRegionSampleSize = originalSampleSize + }() + // 2. make the order of samples for building TopN stable + // (the earlier TopN entry will modify the CMSketch, therefore influence later TopN entry's row count, + // see (*SampleCollector).ExtractTopN() for details) + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/statistics/StabilizeV1AnalyzeTopN", `return(true)`)) + defer func() { + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/statistics/StabilizeV1AnalyzeTopN")) + }() + + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a int, b int, index iab(a, b), index ib(b))") + require.NoError(t, statsHandle.HandleDDLEvent(<-statsHandle.DDLEventCh())) + + // Prepare the data. + + // For column a, from -1000 to 999, each value appears 1 time, + // but if it's dividable by 100, make this value appear 50 times. + // For column b, it's always a+500. + start := -1000 + for i := 0; i < 2000; i += 50 { + sql := "insert into t values " + // 50 rows as a batch + values := make([]string, 0, 50) + for j := 0; j < 50; j++ { + values = append(values, fmt.Sprintf("(%d,%d)", start+i+j, start+i+j+500)) + } + sql = sql + strings.Join(values, ",") + tk.MustExec(sql) + + if i%100 == 0 { + sql := "insert into t values " + topNValue := fmt.Sprintf("(%d,%d) ,", start+i, start+i+500) + sql = sql + strings.Repeat(topNValue, 49) + sql = sql[0 : len(sql)-1] + tk.MustExec(sql) + } + } + require.Nil(t, statsHandle.DumpStatsDeltaToKV(handle.DumpAll)) + tk.MustExec("analyze table t with 1 samplerate, 20 topn") + require.Nil(t, statsHandle.Update(dom.InfoSchema())) + // Add 100 modify count + sql := "insert into t values " + topNValue := fmt.Sprintf("(%d,%d) ,", 5000, 5000) + sql = sql + strings.Repeat(topNValue, 100) + sql = sql[0 : len(sql)-1] + tk.MustExec(sql) + require.Nil(t, statsHandle.DumpStatsDeltaToKV(handle.DumpAll)) + require.Nil(t, statsHandle.Update(dom.InfoSchema())) + + var ( + in []string + out []struct { + ResultForV1 interface{} + ResultForV2 interface{} + } + ) + traceSuiteData := statistics.GetTraceSuiteData() + traceSuiteData.LoadTestCases(t, &in, &out) + + // Trigger loading needed statistics. + for _, tt := range in { + sql := "explain " + tt + tk.MustExec(sql) + } + err := statsHandle.LoadNeededHistograms() + require.NoError(t, err) + + sctx := tk.Session().(sessionctx.Context) + tb, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + tblInfo := tb.Meta() + statsTbl := statsHandle.GetTableStats(tblInfo) + stmtCtx := sctx.GetSessionVars().StmtCtx + stmtCtx.EnableOptimizerDebugTrace = true + + // Collect common information for the following tests. + p := parser.New() + dsSchemaCols := make([][]*expression.Column, 0, len(in)) + selConditions := make([][]expression.Expression, 0, len(in)) + tblInfos := make([]*model.TableInfo, 0, len(in)) + for _, sql := range in { + stmt, err := p.ParseOneStmt(sql, "", "") + require.NoError(t, err) + ret := &plannercore.PreprocessorReturn{} + err = plannercore.Preprocess(context.Background(), sctx, stmt, plannercore.WithPreprocessorReturn(ret)) + require.NoError(t, err) + p, _, err := plannercore.BuildLogicalPlanForTest(context.Background(), sctx, stmt, ret.InfoSchema) + require.NoError(t, err) + + sel := p.(plannercore.LogicalPlan).Children()[0].(*plannercore.LogicalSelection) + ds := sel.Children()[0].(*plannercore.DataSource) + + dsSchemaCols = append(dsSchemaCols, ds.Schema().Columns) + selConditions = append(selConditions, sel.Conditions) + tblInfos = append(tblInfos, ds.TableInfo()) + } + var buf bytes.Buffer + encoder := json.NewEncoder(&buf) + encoder.SetEscapeHTML(false) + + // Test using ver2 stats. + for i, sql := range in { + stmtCtx.OptimizerDebugTrace = nil + histColl := statsTbl.GenerateHistCollFromColumnInfo(tblInfos[i], dsSchemaCols[i]) + _, _, err = histColl.Selectivity(sctx, selConditions[i], nil) + require.NoError(t, err, sql, "For ver2") + traceInfo := stmtCtx.OptimizerDebugTrace + buf.Reset() + require.NoError(t, encoder.Encode(traceInfo), sql, "For ver2") + var res interface{} + require.NoError(t, json.Unmarshal(buf.Bytes(), &res), sql, "For ver2") + testdata.OnRecord(func() { + out[i].ResultForV2 = res + }) + require.Equal(t, out[i].ResultForV2, res, sql, "For ver2") + } + + tk.MustExec("set tidb_analyze_version = 1") + tk.MustExec("analyze table t with 20 topn") + require.Nil(t, statsHandle.Update(dom.InfoSchema())) + statsTbl = statsHandle.GetTableStats(tblInfo) + + // Test using ver1 stats. + stmtCtx = sctx.GetSessionVars().StmtCtx + stmtCtx.EnableOptimizerDebugTrace = true + for i, sql := range in { + stmtCtx.OptimizerDebugTrace = nil + histColl := statsTbl.GenerateHistCollFromColumnInfo(tblInfos[i], dsSchemaCols[i]) + _, _, err = histColl.Selectivity(sctx, selConditions[i], nil) + require.NoError(t, err, sql, "For ver1") + traceInfo := stmtCtx.OptimizerDebugTrace + buf.Reset() + require.NoError(t, encoder.Encode(traceInfo), sql, "For ver1") + var res interface{} + require.NoError(t, json.Unmarshal(buf.Bytes(), &res), sql, "For ver1") + testdata.OnRecord(func() { + out[i].ResultForV1 = res + }) + require.Equal(t, out[i].ResultForV1, res, sql, "For ver1") + } +} +>>>>>>> 282c753cfbc (statistics, planner: use the correct `IndexInfo` in `GenerateHistCollFromColumnInfo()` (#44441))