Skip to content

Commit

Permalink
statistics: fix difference between BinarySearchRemoveVal and RemoveVa…
Browse files Browse the repository at this point in the history
…ls (#47878) (#47938)

close #47887
  • Loading branch information
ti-chi-bot authored Oct 26, 2023
1 parent 189aeb5 commit c0d0981
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 4 deletions.
2 changes: 1 addition & 1 deletion pkg/statistics/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ go_test(
data = glob(["testdata/**"]),
embed = [":statistics"],
flaky = True,
shard_count = 32,
shard_count = 33,
deps = [
"//pkg/config",
"//pkg/parser/ast",
Expand Down
3 changes: 3 additions & 0 deletions pkg/statistics/cmsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,9 @@ func (c *TopN) FindTopN(d []byte) int {
if bytes.Compare(c.TopN[len(c.TopN)-1].Encoded, d) < 0 {
return -1
}
if bytes.Compare(c.TopN[0].Encoded, d) > 0 {
return -1
}
idx, match := slices.BinarySearchFunc(c.TopN, d, func(a TopNMeta, b []byte) int {
return bytes.Compare(a.Encoded, b)
})
Expand Down
17 changes: 14 additions & 3 deletions pkg/statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,16 +296,18 @@ func (hg *Histogram) BinarySearchRemoveVal(valCntPairs TopNMeta) {
return
}
}
var midIdx = 0
var found bool
for lowIdx <= highIdx {
midIdx := (lowIdx + highIdx) / 2
midIdx = (lowIdx + highIdx) / 2
cmpResult := bytes.Compare(hg.Bounds.Column(0).GetRaw(midIdx*2), valCntPairs.Encoded)
if cmpResult > 0 {
lowIdx = midIdx + 1
highIdx = midIdx - 1
continue
}
cmpResult = bytes.Compare(hg.Bounds.Column(0).GetRaw(midIdx*2+1), valCntPairs.Encoded)
if cmpResult < 0 {
highIdx = midIdx - 1
lowIdx = midIdx + 1
continue
}
if hg.Buckets[midIdx].NDV > 0 {
Expand All @@ -318,8 +320,17 @@ func (hg *Histogram) BinarySearchRemoveVal(valCntPairs TopNMeta) {
if hg.Buckets[midIdx].Count < 0 {
hg.Buckets[midIdx].Count = 0
}
found = true
break
}
if found {
for midIdx++; midIdx <= hg.Len()-1; midIdx++ {
hg.Buckets[midIdx].Count -= int64(valCntPairs.Count)
if hg.Buckets[midIdx].Count < 0 {
hg.Buckets[midIdx].Count = 0
}
}
}
}

// RemoveVals remove the given values from the histogram.
Expand Down
34 changes: 34 additions & 0 deletions pkg/statistics/histogram_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -532,3 +532,37 @@ func TestStandardizeForV2AnalyzeIndex(t *testing.T) {
fmt.Sprintf("testData[%d].inputHist:%s", i, test.inputHistToStr))
}
}

func generateData(t *testing.T) *Histogram {
var data []*bucket4Test
sumCount := int64(0)
for n := 100; n < 10000; n = n + 100 {
sumCount += 100
data = append(data, &bucket4Test{
lower: int64(n),
upper: int64(n + 100),
count: sumCount,
repeat: 10,
ndv: 10,
})
}
return genHist4Test(t, data, 0)
}

func TestVerifyHistsBinarySearchRemoveValAndRemoveVals(t *testing.T) {
data1 := generateData(t)
data2 := generateData(t)

require.Equal(t, data1, data2)
ctx := mock.NewContext()
sc := ctx.GetSessionVars().StmtCtx
b, err := codec.EncodeKey(sc, nil, types.NewIntDatum(150))
require.NoError(t, err)
tmp := TopNMeta{
Encoded: b,
Count: 2,
}
data1.RemoveVals([]TopNMeta{tmp})
data2.BinarySearchRemoveVal(tmp)
require.Equal(t, data1, data2)
}

0 comments on commit c0d0981

Please sign in to comment.