Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: out of range for sampling | tidb-test=pr/2381 #55512

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
7 changes: 5 additions & 2 deletions pkg/planner/cardinality/row_count_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -336,13 +336,16 @@ func getIndexRowCountForStatsV2(sctx context.PlanContext, idx *statistics.Index,
c := coll.GetCol(idx.Histogram.ID)
// If this is single column of a multi-column index - use the column's NDV rather than index NDV
isSingleColRange := len(indexRange.LowVal) == len(indexRange.HighVal) && len(indexRange.LowVal) == 1
if isSingleColRange && !isSingleColIdx && c != nil && c.Histogram.NDV > 0 {
if isSingleColRange && c != nil && c.Histogram.NDV > 0 {
histNDV = c.Histogram.NDV - int64(c.TopN.Num())
count += c.Histogram.OutOfRangeRowCount(sctx, &indexRange.LowVal[0], &indexRange.HighVal[0], modifyCount, histNDV, increaseFactor)
} else {
histNDV -= int64(idx.TopN.Num())
count += idx.Histogram.OutOfRangeRowCount(sctx, &l, &r, modifyCount, histNDV, increaseFactor)
}
} else {
count += idx.Histogram.OutOfRangeRowCount(sctx, &l, &r, modifyCount, histNDV, increaseFactor)
}
count += idx.Histogram.OutOfRangeRowCount(sctx, &l, &r, modifyCount, histNDV, increaseFactor)
}

if debugTrace {
Expand Down
32 changes: 17 additions & 15 deletions pkg/statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -976,14 +976,16 @@ func (hg *Histogram) OutOfRangeRowCount(
// Convert the lower and upper bound of the histogram to scalar value(float64)
histL := convertDatumToScalar(hg.GetLower(0), commonPrefix)
histR := convertDatumToScalar(hg.GetUpper(hg.Len()-1), commonPrefix)
lowerVal := hg.GetLower(0).GetInt64()
upperVal := hg.GetUpper(hg.Len() - 1).GetInt64()
histWidth := histR - histL
if histWidth <= 0 {
return 0
}
boundL := histL - histWidth
boundR := histR + histWidth

var leftPercent, rightPercent, rowCount float64
var leftPercent, rightPercent, rowCount, upperBound, sampleOutOfRange float64
if debugTrace {
defer func() {
debugtrace.RecordAnyValuesWithNames(sctx,
Expand Down Expand Up @@ -1030,12 +1032,17 @@ func (hg *Histogram) OutOfRangeRowCount(
}

totalPercent := min(leftPercent*0.5+rightPercent*0.5, 1.0)
rowCount = totalPercent * hg.NotNullCount()

// Upper & lower bound logic.
upperBound := rowCount
if histNDV > 0 {
upperBound = hg.NotNullCount() / float64(histNDV)
// Calculate any out-of-range portion attributed to sampling of the original histogram buckets
if upperVal > lowerVal && float64(histNDV) > histWidth && totalPercent > 0 {
sampleOutOfRange = (float64(histNDV) - histWidth)
if leftPercent == 0 || rightPercent == 0 {
sampleOutOfRange *= 0.5
}
rowCount += sampleOutOfRange
}
}

allowUseModifyCount := sctx.GetSessionVars().GetOptObjective() != variable.OptObjectiveDeterminate
Expand All @@ -1044,20 +1051,15 @@ func (hg *Histogram) OutOfRangeRowCount(
// In OptObjectiveDeterminate mode, we can't rely on the modify count anymore.
// An upper bound is necessary to make the estimation make sense for predicates with bound on only one end, like a > 1.
// We use 1/NDV here (only the Histogram part is considered) and it seems reasonable and good enough for now.
return min(rowCount, upperBound)
return math.Max(rowCount, upperBound)
terry1purcell marked this conversation as resolved.
Show resolved Hide resolved
}

// If the modifyCount is large (compared to original table rows), then any out of range estimate is unreliable.
// Assume at least 1/NDV is returned
if float64(modifyCount) > hg.NotNullCount() && rowCount < upperBound {
rowCount = upperBound
} else if rowCount < upperBound {
// Adjust by increaseFactor if our estimate is low
rowCount *= increaseFactor
}
rowCount += totalPercent * math.Min(float64(modifyCount), hg.NotNullCount())
terry1purcell marked this conversation as resolved.
Show resolved Hide resolved

// Adjust by increaseFactor if our estimate is low
rowCount *= increaseFactor

// Use modifyCount as a final bound
return min(rowCount, float64(modifyCount))
return rowCount
}

// Copy deep copies the histogram.
Expand Down