Skip to content

Commit

Permalink
This is an automated cherry-pick of pingcap#44441
Browse files Browse the repository at this point in the history
Signed-off-by: ti-chi-bot <ti-community-prow-bot@tidb.io>
  • Loading branch information
time-and-fate authored and ti-chi-bot committed Sep 6, 2023
1 parent a420763 commit e21ac87
Show file tree
Hide file tree
Showing 5 changed files with 531 additions and 11 deletions.
4 changes: 2 additions & 2 deletions planner/core/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func (p *LogicalMemTable) DeriveStats(childStats []*property.StatsInfo, selfSche
stats := &property.StatsInfo{
RowCount: float64(statsTable.Count),
ColNDVs: make(map[int64]float64, len(p.TableInfo.Columns)),
HistColl: statsTable.GenerateHistCollFromColumnInfo(p.TableInfo.Columns, p.schema.Columns),
HistColl: statsTable.GenerateHistCollFromColumnInfo(p.TableInfo, p.schema.Columns),
StatsVersion: statistics.PseudoVersion,
}
for _, col := range selfSchema.Columns {
Expand Down Expand Up @@ -232,7 +232,7 @@ func (ds *DataSource) initStats(colGroups [][]*expression.Column) {
tableStats := &property.StatsInfo{
RowCount: float64(ds.statisticTable.Count),
ColNDVs: make(map[int64]float64, ds.schema.Len()),
HistColl: ds.statisticTable.GenerateHistCollFromColumnInfo(ds.Columns, ds.schema.Columns),
HistColl: ds.statisticTable.GenerateHistCollFromColumnInfo(ds.tableInfo, ds.schema.Columns),
StatsVersion: ds.statisticTable.Version,
}
if ds.statisticTable.Pseudo {
Expand Down
325 changes: 325 additions & 0 deletions statistics/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -559,3 +559,328 @@ func hasPseudoStats(rows [][]interface{}) bool {
}
return false
}
<<<<<<< HEAD
=======

// TestNotLoadedStatsOnAllNULLCol makes sure that stats on a column that only contains NULLs can be used even when it's
// not loaded. This is reasonable because it makes no difference whether it's loaded or not.
func TestNotLoadedStatsOnAllNULLCol(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
h := dom.StatsHandle()
oriLease := h.Lease()
h.SetLease(1000)
defer func() {
h.SetLease(oriLease)
}()
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t1")
tk.MustExec("drop table if exists t2")
tk.MustExec("create table t1(a int)")
tk.MustExec("create table t2(a int)")
tk.MustExec("insert into t1 values(null), (null), (null), (null)")
tk.MustExec("insert into t2 values(null), (null)")
tk.MustExec("analyze table t1;")
tk.MustExec("analyze table t2;")

res := tk.MustQuery("explain format = 'brief' select * from t1 left join t2 on t1.a=t2.a order by t1.a, t2.a")
res.Check(testkit.Rows(
"Sort 4.00 root test.t1.a, test.t2.a",
"└─HashJoin 4.00 root left outer join, equal:[eq(test.t1.a, test.t2.a)]",
" ├─TableReader(Build) 0.00 root data:Selection",
// If we are not using stats on this column (which means we use pseudo estimation), the row count for the Selection will become 2.
" │ └─Selection 0.00 cop[tikv] not(isnull(test.t2.a))",
" │ └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false",
" └─TableReader(Probe) 4.00 root data:TableFullScan",
" └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false"))

res = tk.MustQuery("explain format = 'brief' select * from t2 left join t1 on t1.a=t2.a order by t1.a, t2.a")
res.Check(testkit.Rows(
"Sort 2.00 root test.t1.a, test.t2.a",
"└─HashJoin 2.00 root left outer join, equal:[eq(test.t2.a, test.t1.a)]",
// If we are not using stats on this column, the build side will become t2 because of smaller row count.
" ├─TableReader(Build) 0.00 root data:Selection",
// If we are not using stats on this column, the row count for the Selection will become 4.
" │ └─Selection 0.00 cop[tikv] not(isnull(test.t1.a))",
" │ └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false",
" └─TableReader(Probe) 2.00 root data:TableFullScan",
" └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false"))

res = tk.MustQuery("explain format = 'brief' select * from t1 right join t2 on t1.a=t2.a order by t1.a, t2.a")
res.Check(testkit.Rows(
"Sort 2.00 root test.t1.a, test.t2.a",
"└─HashJoin 2.00 root right outer join, equal:[eq(test.t1.a, test.t2.a)]",
" ├─TableReader(Build) 0.00 root data:Selection",
" │ └─Selection 0.00 cop[tikv] not(isnull(test.t1.a))",
" │ └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false",
" └─TableReader(Probe) 2.00 root data:TableFullScan",
" └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false"))

res = tk.MustQuery("explain format = 'brief' select * from t2 right join t1 on t1.a=t2.a order by t1.a, t2.a")
res.Check(testkit.Rows(
"Sort 4.00 root test.t1.a, test.t2.a",
"└─HashJoin 4.00 root right outer join, equal:[eq(test.t2.a, test.t1.a)]",
" ├─TableReader(Build) 0.00 root data:Selection",
" │ └─Selection 0.00 cop[tikv] not(isnull(test.t2.a))",
" │ └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false",
" └─TableReader(Probe) 4.00 root data:TableFullScan",
" └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false"))
}

func TestCrossValidationSelectivity(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
h := dom.StatsHandle()
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("set @@tidb_analyze_version = 1")
tk.MustExec("create table t (a int, b int, c int, primary key (a, b) clustered)")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
tk.MustExec("insert into t values (1,2,3), (1,4,5)")
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
tk.MustExec("analyze table t")
tk.MustQuery("explain format = 'brief' select * from t where a = 1 and b > 0 and b < 1000 and c > 1000").Check(testkit.Rows(
"TableReader 0.00 root data:Selection",
"└─Selection 0.00 cop[tikv] gt(test.t.c, 1000)",
" └─TableRangeScan 2.00 cop[tikv] table:t range:(1 0,1 1000), keep order:false"))
}

func TestShowHistogramsLoadStatus(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
h := dom.StatsHandle()
origLease := h.Lease()
h.SetLease(time.Second)
defer func() { h.SetLease(origLease) }()
tk.MustExec("use test")
tk.MustExec("create table t(a int primary key, b int, c int, index idx(b, c))")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
tk.MustExec("insert into t values (1,2,3), (4,5,6)")
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
tk.MustExec("analyze table t")
require.NoError(t, h.Update(dom.InfoSchema()))
rows := tk.MustQuery("show stats_histograms where db_name = 'test' and table_name = 't'").Rows()
for _, row := range rows {
require.Equal(t, "allEvicted", row[10].(string))
}
}

func TestSingleColumnIndexNDV(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
h := dom.StatsHandle()
tk.MustExec("use test")
tk.MustExec("create table t(a int, b int, c varchar(20), d varchar(20), index idx_a(a), index idx_b(b), index idx_c(c), index idx_d(d))")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
tk.MustExec("insert into t values (1, 1, 'xxx', 'zzz'), (2, 2, 'yyy', 'zzz'), (1, 3, null, 'zzz')")
for i := 0; i < 5; i++ {
tk.MustExec("insert into t select * from t")
}
tk.MustExec("analyze table t")
rows := tk.MustQuery("show stats_histograms where db_name = 'test' and table_name = 't'").Sort().Rows()
expectedResults := [][]string{
{"a", "2", "0"}, {"b", "3", "0"}, {"c", "2", "32"}, {"d", "1", "0"},
{"idx_a", "2", "0"}, {"idx_b", "3", "0"}, {"idx_c", "2", "32"}, {"idx_d", "1", "0"},
}
for i, row := range rows {
require.Equal(t, expectedResults[i][0], row[3]) // column_name
require.Equal(t, expectedResults[i][1], row[6]) // distinct_count
require.Equal(t, expectedResults[i][2], row[7]) // null_count
}
}

func TestColumnStatsLazyLoad(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
h := dom.StatsHandle()
originLease := h.Lease()
defer h.SetLease(originLease)
// Set `Lease` to `Millisecond` to enable column stats lazy load.
h.SetLease(time.Millisecond)
tk.MustExec("use test")
tk.MustExec("create table t(a int, b int)")
tk.MustExec("insert into t values (1,2), (3,4), (5,6), (7,8)")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
tk.MustExec("analyze table t")
is := dom.InfoSchema()
tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
require.NoError(t, err)
tblInfo := tbl.Meta()
c1 := tblInfo.Columns[0]
c2 := tblInfo.Columns[1]
require.True(t, h.GetTableStats(tblInfo).Columns[c1.ID].IsAllEvicted())
require.True(t, h.GetTableStats(tblInfo).Columns[c2.ID].IsAllEvicted())
tk.MustExec("analyze table t")
require.True(t, h.GetTableStats(tblInfo).Columns[c1.ID].IsAllEvicted())
require.True(t, h.GetTableStats(tblInfo).Columns[c2.ID].IsAllEvicted())
}

func TestUpdateNotLoadIndexFMSketch(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
h := dom.StatsHandle()
tk.MustExec("use test")
tk.MustExec("create table t(a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (10),partition p1 values less than maxvalue)")
tk.MustExec("insert into t values (1,2), (3,4), (5,6), (7,8)")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
tk.MustExec("analyze table t")
is := dom.InfoSchema()
tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
require.NoError(t, err)
tblInfo := tbl.Meta()
idxInfo := tblInfo.Indices[0]
p0 := tblInfo.Partition.Definitions[0]
p1 := tblInfo.Partition.Definitions[1]
require.Nil(t, h.GetPartitionStats(tblInfo, p0.ID).Indices[idxInfo.ID].FMSketch)
require.Nil(t, h.GetPartitionStats(tblInfo, p1.ID).Indices[idxInfo.ID].FMSketch)
h.Clear()
require.NoError(t, h.Update(is))
require.Nil(t, h.GetPartitionStats(tblInfo, p0.ID).Indices[idxInfo.ID].FMSketch)
require.Nil(t, h.GetPartitionStats(tblInfo, p1.ID).Indices[idxInfo.ID].FMSketch)
}

func TestIndexJoinInnerRowCountUpperBound(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
h := dom.StatsHandle()

testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int, b int, index idx(b))")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
is := dom.InfoSchema()
tb, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
require.NoError(t, err)
tblInfo := tb.Meta()

// Mock the stats:
// The two columns are the same.
// From 0 to 499, each value has 1000 rows. Therefore, NDV is 500 and total row count is 500000.
mockStatsTbl := mockStatsTable(tblInfo, 500000)
colValues, err := generateIntDatum(1, 500)
require.NoError(t, err)
for i := 1; i <= 2; i++ {
mockStatsTbl.Columns[int64(i)] = &statistics.Column{
Histogram: *mockStatsHistogram(int64(i), colValues, 1000, types.NewFieldType(mysql.TypeLonglong)),
Info: tblInfo.Columns[i-1],
StatsLoadedStatus: statistics.NewStatsFullLoadStatus(),
StatsVer: 2,
}
}
generateMapsForMockStatsTbl(mockStatsTbl)
stat := h.GetTableStats(tblInfo)
stat.HistColl = mockStatsTbl.HistColl

testKit.MustQuery("explain format = 'brief' " +
"select /*+ inl_join(t2) */ * from (select * from t where t.a < 1) as t1 join t t2 where t2.a = 0 and t1.a = t2.b").
Check(testkit.Rows(
"IndexJoin 1000000.00 root inner join, inner:IndexLookUp, outer key:test.t.a, inner key:test.t.b, equal cond:eq(test.t.a, test.t.b)",
"├─TableReader(Build) 1000.00 root data:Selection",
"│ └─Selection 1000.00 cop[tikv] lt(test.t.a, 1), not(isnull(test.t.a))",
"│ └─TableFullScan 500000.00 cop[tikv] table:t keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 1000000.00 root ",
" ├─Selection(Build) 500000000.00 cop[tikv] not(isnull(test.t.b))",
" │ └─IndexRangeScan 500000000.00 cop[tikv] table:t2, index:idx(b) range: decided by [eq(test.t.b, test.t.a)], keep order:false, stats:pseudo",
" └─Selection(Probe) 1000000.00 cop[tikv] eq(test.t.a, 0)",
" └─TableRowIDScan 500000000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
))
}

func TestOrderingIdxSelectivityThreshold(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
h := dom.StatsHandle()
sc := &stmtctx.StatementContext{TimeZone: time.UTC}

testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int primary key , b int, c int, index ib(b), index ic(c))")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
is := dom.InfoSchema()
tb, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
require.NoError(t, err)
tblInfo := tb.Meta()

// Mock the stats:
// total row count 100000
// column a: PK, from 0 to 100000, NDV 100000
// column b, c: from 0 to 10000, each value has 10 rows, NDV 10000
// indexes are created on (b), (c) respectively
mockStatsTbl := mockStatsTable(tblInfo, 100000)
pkColValues, err := generateIntDatum(1, 100000)
require.NoError(t, err)
mockStatsTbl.Columns[1] = &statistics.Column{
Histogram: *mockStatsHistogram(1, pkColValues, 1, types.NewFieldType(mysql.TypeLonglong)),
Info: tblInfo.Columns[0],
StatsLoadedStatus: statistics.NewStatsFullLoadStatus(),
StatsVer: 2,
}
colValues, err := generateIntDatum(1, 10000)
require.NoError(t, err)
idxValues := make([]types.Datum, 0)
for _, val := range colValues {
b, err := codec.EncodeKey(sc, nil, val)
require.NoError(t, err)
idxValues = append(idxValues, types.NewBytesDatum(b))
}

for i := 2; i <= 3; i++ {
mockStatsTbl.Columns[int64(i)] = &statistics.Column{
Histogram: *mockStatsHistogram(int64(i), colValues, 10, types.NewFieldType(mysql.TypeLonglong)),
Info: tblInfo.Columns[i-1],
StatsLoadedStatus: statistics.NewStatsFullLoadStatus(),
StatsVer: 2,
}
}
for i := 1; i <= 2; i++ {
mockStatsTbl.Indices[int64(i)] = &statistics.Index{
Histogram: *mockStatsHistogram(int64(i), idxValues, 10, types.NewFieldType(mysql.TypeBlob)),
Info: tblInfo.Indices[i-1],
StatsLoadedStatus: statistics.NewStatsFullLoadStatus(),
StatsVer: 2,
}
}
generateMapsForMockStatsTbl(mockStatsTbl)
stat := h.GetTableStats(tblInfo)
stat.HistColl = mockStatsTbl.HistColl

var (
input []string
output []struct {
Query string
Result []string
}
)
integrationSuiteData := statistics.GetIntegrationSuiteData()
integrationSuiteData.LoadTestCases(t, &input, &output)
for i := 0; i < len(input); i++ {
testdata.OnRecord(func() {
output[i].Query = input[i]
})
if !strings.HasPrefix(input[i], "explain") {
testKit.MustExec(input[i])
continue
}
testdata.OnRecord(func() {
output[i].Result = testdata.ConvertRowsToStrings(testKit.MustQuery(input[i]).Rows())
})
testKit.MustQuery(input[i]).Check(testkit.Rows(output[i].Result...))
}
}

func TestIssue44369(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
h := dom.StatsHandle()
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("create table t(a int, b int, index iab(a,b));")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
tk.MustExec("insert into t value(1,1);")
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
tk.MustExec("analyze table t;")
is := dom.InfoSchema()
require.NoError(t, h.Update(is))
tk.MustExec("alter table t rename column b to bb;")
tk.MustExec("select * from t where a = 10 and bb > 20;")
}
>>>>>>> 282c753cfbc (statistics, planner: use the correct `IndexInfo` in `GenerateHistCollFromColumnInfo()` (#44441))
4 changes: 2 additions & 2 deletions statistics/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,7 @@ func TestSelectivity(t *testing.T) {
sel := p.(plannercore.LogicalPlan).Children()[0].(*plannercore.LogicalSelection)
ds := sel.Children()[0].(*plannercore.DataSource)

histColl := statsTbl.GenerateHistCollFromColumnInfo(ds.Columns, ds.Schema().Columns)
histColl := statsTbl.GenerateHistCollFromColumnInfo(ds.TableInfo(), ds.Schema().Columns)

ratio, _, err := histColl.Selectivity(sctx, sel.Conditions, nil)
require.NoErrorf(t, err, "for %s", tt.exprs)
Expand Down Expand Up @@ -675,7 +675,7 @@ func TestDNFCondSelectivity(t *testing.T) {
sel := p.(plannercore.LogicalPlan).Children()[0].(*plannercore.LogicalSelection)
ds := sel.Children()[0].(*plannercore.DataSource)

histColl := statsTbl.GenerateHistCollFromColumnInfo(ds.Columns, ds.Schema().Columns)
histColl := statsTbl.GenerateHistCollFromColumnInfo(ds.TableInfo(), ds.Schema().Columns)

ratio, _, err := histColl.Selectivity(sctx, sel.Conditions, nil)
require.NoErrorf(t, err, "error %v, for expr %s", err, tt)
Expand Down
Loading

0 comments on commit e21ac87

Please sign in to comment.