Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: fix wrong count when is out of range with datetime #58661

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
tmp fix
Signed-off-by: Weizhen Wang <wangweizhen@pingcap.com>
  • Loading branch information
hawkingrei committed Jan 7, 2025
commit 367487eb97d14788f02fe5e1674cd462cc588b80
3 changes: 2 additions & 1 deletion build/nogo_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,8 @@
"/cgo/": "ignore cgo code",
"external/": "no need to vet third party code",
"$GOROOT/": "ignore GOROOT code",
".*_generated\\.go$": "ignore generated code"
".*_generated\\.go$": "ignore generated code",
".*_test\\.go$": "ignore test code"
}
},
"nilfunc": {
Expand Down
2 changes: 1 addition & 1 deletion pkg/planner/cardinality/row_count_column.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ func GetColumnRowCount(sctx planctx.PlanContext, c *statistics.Column, ranges []
if c.StatsVer == statistics.Version2 {
histNDV -= int64(c.TopN.Num())
}
cnt += c.Histogram.OutOfRangeRowCount(sctx, &lowVal, &highVal, realtimeRowCount, modifyCount, histNDV)
cnt += c.Histogram.OutOfRangeRowCount(sctx, &lowVal, &highVal, realtimeRowCount, modifyCount, histNDV, false)
}

if debugTrace {
Expand Down
14 changes: 2 additions & 12 deletions pkg/planner/cardinality/row_count_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package cardinality

import (
"bytes"
"fmt"
"math"
"slices"
"strings"
Expand Down Expand Up @@ -215,19 +214,9 @@ func isSingleColIdxNullRange(idx *statistics.Index, ran *ranger.Range) bool {
return false
}

func Shortening(lb []byte, rb []byte, kind byte) {
if kind != types.KindMysqlTime {
return
}

}

// It uses the modifyCount to validate, and realtimeRowCount to adjust the influence of modifications on the table.
func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRanges []*ranger.Range, realtimeRowCount, modifyCount int64) (float64, error) {
sc := sctx.GetSessionVars().StmtCtx
if !sctx.GetSessionVars().InRestrictedSQL {
fmt.Println("here")
}
debugTrace := sc.EnableOptimizerDebugTrace
if debugTrace {
debugtrace.EnterContextCommon(sctx)
Expand All @@ -250,6 +239,7 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index,
if debugTrace {
debugTraceStartEstimateRange(sctx, indexRange, lb, rb, totalCount)
}
isMysqlTime := indexRange.LowVal[0].Kind() == types.KindMysqlTime && indexRange.HighVal[0].Kind() == types.KindMysqlTime
fullLen := len(indexRange.LowVal) == len(indexRange.HighVal) && len(indexRange.LowVal) == len(idx.Info.Columns)
if bytes.Equal(lb, rb) {
// case 1: it's a point
Expand Down Expand Up @@ -361,7 +351,7 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index,
histNDV -= int64(idx.TopN.Num())
}
}
count += idx.Histogram.OutOfRangeRowCount(sctx, &l, &r, realtimeRowCount, modifyCount, histNDV)
count += idx.Histogram.OutOfRangeRowCount(sctx, &l, &r, realtimeRowCount, modifyCount, histNDV, isMysqlTime)
}

if debugTrace {
Expand Down
4 changes: 3 additions & 1 deletion pkg/planner/core/issuetest/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@ go_test(
data = glob(["testdata/**"]),
flaky = True,
race = "on",
shard_count = 6,
shard_count = 7,
deps = [
"//pkg/domain",
"//pkg/parser",
"//pkg/planner",
"//pkg/planner/core",
"//pkg/planner/core/base",
"//pkg/planner/core/resolve",
"//pkg/statistics/util",
"//pkg/testkit",
"//pkg/testkit/testdata",
"//pkg/testkit/testmain",
Expand Down
14 changes: 13 additions & 1 deletion pkg/planner/core/issuetest/planner_issue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,5 +208,17 @@ func TestIssue50080(t *testing.T) {
tk.MustExec("use test;")
tk.MustExec("CREATE TABLE `test` (`ecif_party_no` varchar(20) DEFAULT NULL,`busi_cust_no` varchar(20) DEFAULT NULL,`busi_series_cd` varchar(2) DEFAULT NULL,`org_belong` varchar(15) DEFAULT NULL,`party_no` varchar(20) DEFAULT NULL,`rela_status_cd` varchar(2) DEFAULT NULL,`rela_status_desc` varchar(20) DEFAULT NULL,`created_by` varchar(100) DEFAULT 'ecifdata',`created_date` datetime DEFAULT CURRENT_TIMESTAMP,`updated_by` varchar(100) DEFAULT 'ecifdata',`updated_date` datetime DEFAULT CURRENT_TIMESTAMP,`id_tp00_cust_no_rela` varchar(40) NOT NULL DEFAULT uuid(),KEY `IX_CUST_RELA_DATE` (`updated_date`),KEY `IX_TPCNR_BCN` (`busi_cust_no`),KEY `IX_TPCNR_EPN` (`ecif_party_no`),KEY `IX_TPCNR_PAN` (`party_no`),PRIMARY KEY (`id_tp00_cust_no_rela`) /*T![clustered_index] NONCLUSTERED */);")
require.NoError(t, loadTableStats("test.json", dom))
tk.MustQuery("explain select * from test where updated_date > '2023-12-31 23:59:00' and updated_date<'2024-01-01 00:00:01';").Check(testkit.Rows())
tk.MustQuery("explain select * from test where updated_date > '2023-12-31 23:59:00' and updated_date<'2023-12-31 23:59:59';").Check(testkit.Rows(
"IndexLookUp_7 229.43 root ",
"├─IndexRangeScan_5(Build) 229.43 cop[tikv] table:test, index:IX_CUST_RELA_DATE(updated_date) range:(2023-12-31 23:59:00,2023-12-31 23:59:59), keep order:false",
"└─TableRowIDScan_6(Probe) 229.43 cop[tikv] table:test keep order:false"))
tk.MustQuery("explain select * from test where updated_date > '2023-12-31 23:59:00' and updated_date<'2024-01-01 00:00:01';").Check(testkit.Rows(
"IndexLookUp_7 237.21 root ",
"├─IndexRangeScan_5(Build) 237.21 cop[tikv] table:test, index:IX_CUST_RELA_DATE(updated_date) range:(2023-12-31 23:59:00,2024-01-01 00:00:01), keep order:false",
"└─TableRowIDScan_6(Probe) 237.21 cop[tikv] table:test keep order:false"))
tk.MustExec("drop index IX_CUST_RELA_DATE ON test")
tk.MustQuery("explain select * from test where updated_date > '2023-12-31 23:59:00' and updated_date<'2024-01-01 00:00:01';").Check(testkit.Rows(
"TableReader_7 237.21 root data:Selection_6",
"└─Selection_6 237.21 cop[tikv] gt(test.test.updated_date, 2023-12-31 23:59:00.000000), lt(test.test.updated_date, 2024-01-01 00:00:01.000000)",
" └─TableFullScan_5 859718933.00 cop[tikv] table:test keep order:false"))
}
71 changes: 59 additions & 12 deletions pkg/statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
"github.com/pingcap/tidb/pkg/util/codec"
"github.com/pingcap/tidb/pkg/util/collate"
"github.com/pingcap/tidb/pkg/util/intest"
"github.com/pingcap/tidb/pkg/util/logutil"
"github.com/pingcap/tidb/pkg/util/ranger"
"github.com/pingcap/tipb/go-tipb"
"github.com/twmb/murmur3"
Expand Down Expand Up @@ -896,6 +897,22 @@ func (hg *Histogram) OutOfRange(val types.Datum) bool {
chunk.Compare(hg.Bounds.GetRow(hg.Bounds.NumRows()-1), 0, &val) < 0
}

func convertTime(d *types.Datum) (*types.Datum, error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add some comments to explain why we need the extra conversion.

_, packedUint, err := codec.DecodeUint(d.GetBytes()[1:])
if err != nil {
logutil.BgLogger().Warn("fail to decode uint", zap.Error(err))
return nil, err
}
var t types.Time
err = t.FromPackedUint(packedUint)
if err != nil {
logutil.BgLogger().Warn("get time by FromPackedUint", zap.Error(err))
return nil, err
}
result := types.NewDatum(t)
return &result, nil
}

// OutOfRangeRowCount estimate the row count of part of [lDatum, rDatum] which is out of range of the histogram.
// Here we assume the density of data is decreasing from the lower/upper bound of the histogram toward outside.
// The maximum row count it can get is the modifyCount. It reaches the maximum when out-of-range width reaches histogram range width.
Expand All @@ -921,7 +938,7 @@ func (hg *Histogram) OutOfRange(val types.Datum) bool {
func (hg *Histogram) OutOfRangeRowCount(
sctx planctx.PlanContext,
lDatum, rDatum *types.Datum,
realtimeRowCount, modifyCount, histNDV int64,
realtimeRowCount, modifyCount, histNDV int64, isDatetime bool,
) (result float64) {
debugTrace := sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace
if debugTrace {
Expand Down Expand Up @@ -951,18 +968,33 @@ func (hg *Histogram) OutOfRangeRowCount(

// For bytes and string type, we need to cut the common prefix when converting them to scalar value.
// Here we calculate the length of common prefix.
var l, r float64
commonPrefix := 0
if hg.GetLower(0).Kind() == types.KindBytes || hg.GetLower(0).Kind() == types.KindString {
// Calculate the common prefix length among the lower and upper bound of histogram and the range we want to estimate.
commonPrefix = commonPrefixLength(hg.GetLower(0).GetBytes(),
hg.GetUpper(hg.Len()-1).GetBytes(),
lDatum.GetBytes(),
rDatum.GetBytes())
if isDatetime {
ltime, err := convertTime(lDatum)
if err != nil {
return 0
}
rtime, err := convertTime(rDatum)
if err != nil {
return 0
}
l = convertMysqlTimeDatumToScalar(ltime)
r = convertMysqlTimeDatumToScalar(rtime)
} else {
if hg.GetLower(0).Kind() == types.KindBytes || hg.GetLower(0).Kind() == types.KindString {
// Calculate the common prefix length among the lower and upper bound of histogram and the range we want to estimate.
commonPrefix = commonPrefixLength(hg.GetLower(0).GetBytes(),
hg.GetUpper(hg.Len()-1).GetBytes(),
lDatum.GetBytes(),
rDatum.GetBytes())
}

// Convert the range we want to estimate to scalar value(float64)
l = convertDatumToScalar(lDatum, commonPrefix)
r = convertDatumToScalar(rDatum, commonPrefix)
}

// Convert the range we want to estimate to scalar value(float64)
l := convertDatumToScalar(lDatum, commonPrefix)
r := convertDatumToScalar(rDatum, commonPrefix)
unsigned := mysql.HasUnsignedFlag(hg.Tp.GetFlag())
// If this is an unsigned column, we need to make sure values are not negative.
// Normal negative value should have become 0. But this still might happen when met MinNotNull here.
Expand Down Expand Up @@ -993,8 +1025,23 @@ func (hg *Histogram) OutOfRangeRowCount(
allowUseModifyCount := sctx.GetSessionVars().GetOptObjective() != variable.OptObjectiveDeterminate

// Convert the lower and upper bound of the histogram to scalar value(float64)
histL := convertDatumToScalar(hg.GetLower(0), commonPrefix)
histR := convertDatumToScalar(hg.GetUpper(hg.Len()-1), commonPrefix)
var histL, histR float64
if isDatetime {
ltime, err := convertTime(hg.GetLower(0))
if err != nil {
return 0
}
rtime, err := convertTime(hg.GetUpper(hg.Len() - 1))
if err != nil {
return 0
}
histL = convertMysqlTimeDatumToScalar(ltime)
histR = convertMysqlTimeDatumToScalar(rtime)
} else {
histL = convertDatumToScalar(hg.GetLower(0), commonPrefix)
histR = convertDatumToScalar(hg.GetUpper(hg.Len()-1), commonPrefix)
}

histWidth := histR - histL
// If we find that the histogram width is too small or too large - we still may need to consider
// the impact of modifications to the table
Expand Down
26 changes: 15 additions & 11 deletions pkg/statistics/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,7 @@ func convertDatumToScalar(value *types.Datum, commonPfxLen int) float64 {
}
return scalar
case types.KindMysqlTime:
valueTime := value.GetMysqlTime()
var minTime types.Time
switch valueTime.Type() {
case mysql.TypeDate:
minTime = types.NewTime(types.MinDatetime, mysql.TypeDate, types.DefaultFsp)
case mysql.TypeDatetime:
minTime = types.NewTime(types.MinDatetime, mysql.TypeDatetime, types.DefaultFsp)
case mysql.TypeTimestamp:
minTime = types.MinTimestamp
}
return float64(valueTime.Sub(UTCWithAllowInvalidDateCtx, &minTime).Duration)
Comment on lines -71 to -81
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we don't need to change this.
We can directly use convertDatumToScalar() to replace convertMysqlTimeDatumToScalar() in this PR since the input is definitely of KindMysqlTime.

return convertMysqlTimeDatumToScalar(value)
case types.KindString, types.KindBytes:
bytes := value.GetBytes()
if len(bytes) <= commonPfxLen {
Expand All @@ -95,6 +85,20 @@ func convertDatumToScalar(value *types.Datum, commonPfxLen int) float64 {
}
}

func convertMysqlTimeDatumToScalar(value *types.Datum) float64 {
valueTime := value.GetMysqlTime()
var minTime types.Time
switch valueTime.Type() {
case mysql.TypeDate:
minTime = types.NewTime(types.MinDatetime, mysql.TypeDate, types.DefaultFsp)
case mysql.TypeDatetime:
minTime = types.NewTime(types.MinDatetime, mysql.TypeDatetime, types.DefaultFsp)
case mysql.TypeTimestamp:
minTime = types.MinTimestamp
}
return float64(valueTime.Sub(UTCWithAllowInvalidDateCtx, &minTime).Duration)
}

// PreCalculateScalar converts the lower and upper to scalar. When the datum type is KindString or KindBytes, we also
// calculate their common prefix length, because when a value falls between lower and upper, the common prefix
// of lower and upper equals to the common prefix of the lower, upper and the value. For some simple types like `Int64`,
Expand Down