Skip to content

Commit

Permalink
statistics: fix estimation error when ranges are too many and modify …
Browse files Browse the repository at this point in the history
…count is large (#40472)

* fix

* fmt

* bazel update

* update test result

Co-authored-by: Weizhen Wang <wangweizhen@pingcap.com>
Co-authored-by: Ti Chi Robot <ti-community-prow-bot@tidb.io>
  • Loading branch information
3 people authored Jan 11, 2023
1 parent 2cf328b commit 2f13578
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 5 deletions.
1 change: 1 addition & 0 deletions statistics/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ go_test(
"@com_github_pingcap_failpoint//:failpoint",
"@com_github_pingcap_log//:log",
"@com_github_stretchr_testify//require",
"@org_golang_x_exp//slices",
"@org_uber_go_goleak//:goleak",
"@org_uber_go_zap//:zap",
],
Expand Down
10 changes: 6 additions & 4 deletions statistics/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ func (idx *Index) GetRowCount(sctx sessionctx.Context, coll *HistColl, indexRang
totalCount := float64(0)
isSingleCol := len(idx.Info.Columns) == 1
for _, indexRange := range indexRanges {
var count float64
lb, err := codec.EncodeKey(sc, nil, indexRange.LowVal...)
if err != nil {
return 0, err
Expand All @@ -242,7 +243,7 @@ func (idx *Index) GetRowCount(sctx sessionctx.Context, coll *HistColl, indexRang
totalCount++
continue
}
count := idx.equalRowCount(lb, realtimeRowCount)
count = idx.equalRowCount(lb, realtimeRowCount)
// If the current table row count has changed, we should scale the row count accordingly.
count *= idx.GetIncreaseFactor(realtimeRowCount)
totalCount += count
Expand All @@ -262,7 +263,7 @@ func (idx *Index) GetRowCount(sctx sessionctx.Context, coll *HistColl, indexRang
r := types.NewBytesDatum(rb)
lowIsNull := bytes.Equal(lb, nullKeyBytes)
if isSingleCol && lowIsNull {
totalCount += float64(idx.Histogram.NullCount)
count += float64(idx.Histogram.NullCount)
}
expBackoffSuccess := false
// Due to the limitation of calcFraction and convertDatumToScalar, the histogram actually won't estimate anything.
Expand Down Expand Up @@ -301,16 +302,17 @@ func (idx *Index) GetRowCount(sctx sessionctx.Context, coll *HistColl, indexRang
}
}
if !expBackoffSuccess {
totalCount += idx.BetweenRowCount(l, r)
count += idx.BetweenRowCount(l, r)
}

// If the current table row count has changed, we should scale the row count accordingly.
totalCount *= idx.GetIncreaseFactor(realtimeRowCount)
count *= idx.GetIncreaseFactor(realtimeRowCount)

// handling the out-of-range part
if (idx.outOfRange(l) && !(isSingleCol && lowIsNull)) || idx.outOfRange(r) {
totalCount += idx.Histogram.outOfRangeRowCount(&l, &r, modifyCount)
}
totalCount += count
}
totalCount = mathutil.Clamp(totalCount, 0, float64(realtimeRowCount))
return totalCount, nil
Expand Down
84 changes: 83 additions & 1 deletion statistics/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import (
"github.com/pingcap/tidb/util/mock"
"github.com/pingcap/tidb/util/ranger"
"github.com/stretchr/testify/require"
"golang.org/x/exp/slices"
)

func TestCollationColumnEstimate(t *testing.T) {
Expand Down Expand Up @@ -891,7 +892,7 @@ func prepareSelectivity(testKit *testkit.TestKit, dom *domain.Domain) (*statisti
return statsTbl, nil
}

func getRange(start, end int64) []*ranger.Range {
func getRange(start, end int64) ranger.Ranges {
ran := &ranger.Range{
LowVal: []types.Datum{types.NewIntDatum(start)},
HighVal: []types.Datum{types.NewIntDatum(end)},
Expand All @@ -900,6 +901,21 @@ func getRange(start, end int64) []*ranger.Range {
return []*ranger.Range{ran}
}

func getRanges(start, end []int64) (res ranger.Ranges) {
if len(start) != len(end) {
return nil
}
for i := range start {
ran := &ranger.Range{
LowVal: []types.Datum{types.NewIntDatum(start[i])},
HighVal: []types.Datum{types.NewIntDatum(end[i])},
Collators: collate.GetBinaryCollatorSlice(1),
}
res = append(res, ran)
}
return
}

func TestSelectivityGreedyAlgo(t *testing.T) {
nodes := make([]*statistics.StatsNode, 3)
nodes[0] = statistics.MockStatsNode(1, 3, 2)
Expand Down Expand Up @@ -1075,3 +1091,69 @@ func TestGlobalStatsOutOfRangeEstimationAfterDelete(t *testing.T) {
testKit.MustQuery(input[i]).Check(testkit.Rows(output[i].Result...))
}
}

func generateMapsForMockStatsTbl(statsTbl *statistics.Table) {
idx2Columns := make(map[int64][]int64)
colID2IdxIDs := make(map[int64][]int64)
for _, idxHist := range statsTbl.Indices {
ids := make([]int64, 0, len(idxHist.Info.Columns))
for _, idxCol := range idxHist.Info.Columns {
ids = append(ids, int64(idxCol.Offset))
}
colID2IdxIDs[ids[0]] = append(colID2IdxIDs[ids[0]], idxHist.ID)
idx2Columns[idxHist.ID] = ids
}
for _, idxIDs := range colID2IdxIDs {
slices.Sort(idxIDs)
}
statsTbl.Idx2ColumnIDs = idx2Columns
statsTbl.ColID2IdxIDs = colID2IdxIDs
}

func TestIssue39593(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)

testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int, b int, index idx(a, b))")
is := dom.InfoSchema()
tb, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
require.NoError(t, err)
tblInfo := tb.Meta()

// mock the statistics.Table
statsTbl := mockStatsTable(tblInfo, 540)
colValues, err := generateIntDatum(1, 54)
require.NoError(t, err)
for i := 1; i <= 2; i++ {
statsTbl.Columns[int64(i)] = &statistics.Column{
Histogram: *mockStatsHistogram(int64(i), colValues, 10, types.NewFieldType(mysql.TypeLonglong)),
Info: tblInfo.Columns[i-1],
StatsLoadedStatus: statistics.NewStatsFullLoadStatus(),
StatsVer: 2,
}
}
idxValues, err := generateIntDatum(2, 3)
require.NoError(t, err)
tp := types.NewFieldType(mysql.TypeBlob)
statsTbl.Indices[1] = &statistics.Index{
Histogram: *mockStatsHistogram(1, idxValues, 60, tp),
Info: tblInfo.Indices[0],
StatsVer: 2,
}
generateMapsForMockStatsTbl(statsTbl)

sctx := testKit.Session()
idxID := tblInfo.Indices[0].ID
vals := []int64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}
count, err := statsTbl.GetRowCountByIndexRanges(sctx, idxID, getRanges(vals, vals))
require.NoError(t, err)
// estimated row count without any changes
require.Equal(t, float64(360), count)
statsTbl.Count *= 10
count, err = statsTbl.GetRowCountByIndexRanges(sctx, idxID, getRanges(vals, vals))
require.NoError(t, err)
// estimated row count after mock modify on the table
require.Equal(t, float64(3600), count)
}

0 comments on commit 2f13578

Please sign in to comment.