Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics: store the topn by slice instead of map #20818

Merged
merged 7 commits into from
Nov 6, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
address comment
  • Loading branch information
winoros committed Nov 4, 2020
commit 72eb90f68b604ef360ee11519a5b5e06ccb45dfd
12 changes: 4 additions & 8 deletions statistics/cmsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ func buildCMSAndTopN(helper *topNHelper, d, w int32, scaleRatio uint64, defaultV
data, cnt := helper.sorted[i].data, helper.sorted[i].cnt
t.AppendTopN(data, cnt*scaleRatio)
}
t.Sort()
helper.sorted = helper.sorted[helper.actualNumTop:]
}
c.defaultValue = defaultVal
Expand Down Expand Up @@ -200,7 +201,7 @@ func (c *CMSketch) considerDefVal(cnt uint64) bool {
return (cnt == 0 || (cnt > c.defaultValue && cnt < 2*(c.count/uint64(c.width)))) && c.defaultValue > 0
}

func updateValueBytesNew(c *CMSketch, t *TopN, d []byte, count uint64) {
func updateValueBytes(c *CMSketch, t *TopN, d []byte, count uint64) {
h1, h2 := murmur3.Sum128(d)
if oriCount, ok := t.QueryTopN(d); ok {
deltaCount := count - oriCount
Expand Down Expand Up @@ -329,6 +330,7 @@ func MergeTopN(dst, src *TopN, c *CMSketch, numTop uint32, usingMax bool) {
c.insertBytesByCount(data, cnt)
}
}
dst.Sort()
}

// MergeCMSketch merges two CM Sketch.
Expand Down Expand Up @@ -411,6 +413,7 @@ func TopNFromProto(protoTopN []*tipb.CMSketchTopN) *TopN {
copy(d, e.Data)
topN.AppendTopN(d, e.Count)
}
topN.Sort()
return topN
}

Expand Down Expand Up @@ -470,7 +473,6 @@ func (c *CMSketch) Copy() *CMSketch {

// AppendTopN appends a topn into the TopN struct.
func (c *TopN) AppendTopN(data []byte, count uint64) {
c.sorted = false
c.TopN = append(c.TopN, TopNMeta{data, count})
}

Expand All @@ -488,7 +490,6 @@ func (c *CMSketch) CalcDefaultValForAnalyze(NDV uint64) {
// TopN stores most-common values, which is used to estimate point queries.
type TopN struct {
TopN []TopNMeta
sorted bool
}

// Copy makes a copy for current TopN.
Expand All @@ -504,7 +505,6 @@ func (c *TopN) Copy() *TopN {
}
return &TopN{
TopN: topN,
sorted: c.sorted,
}
}

Expand All @@ -527,9 +527,6 @@ func (c *TopN) QueryTopN(d []byte) (uint64, bool) {
}

func (c *TopN) findTopN(d []byte) int {
if c.sorted == false {
c.Sort()
}
match := false
idx := sort.Search(len(c.TopN), func(i int) bool {
cmp := bytes.Compare(c.TopN[i].Encoded, d)
Expand All @@ -549,7 +546,6 @@ func (c *TopN) Sort() {
sort.Slice(c.TopN, func(i, j int) bool {
return bytes.Compare(c.TopN[i].Encoded, c.TopN[j].Encoded) < 0
})
c.sorted = true
}

// TotalCount returns how many data is stored in TopN.
Expand Down
4 changes: 2 additions & 2 deletions statistics/feedback.go
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,7 @@ func UpdateCMSketchAndTopN(c *CMSketch, t *TopN, eqFeedbacks []Feedback) (*CMSke
newCMSketch := c.Copy()
newTopN := t.Copy()
for _, fb := range eqFeedbacks {
updateValueBytesNew(newCMSketch, newTopN, fb.Lower.GetBytes(), uint64(fb.Count))
updateValueBytes(newCMSketch, newTopN, fb.Lower.GetBytes(), uint64(fb.Count))
}
return newCMSketch, newTopN
}
Expand Down Expand Up @@ -871,7 +871,7 @@ func decodeFeedbackForIndex(q *QueryFeedback, pb *queryFeedback, c *CMSketch, t
return
}
for i := 0; i < len(pb.IndexPoints); i++ {
updateValueBytesNew(c, t, pb.IndexPoints[i], uint64(pb.Counts[start+i]))
updateValueBytes(c, t, pb.IndexPoints[i], uint64(pb.Counts[start+i]))
}
}
}
Expand Down
5 changes: 5 additions & 0 deletions statistics/handle/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ func (h *Handle) initCMSketch4Indices(is infoschema.InfoSchema, tables map[int64
}

func (h *Handle) initStatsTopN4Chunk(tables map[int64]*statistics.Table, iter *chunk.Iterator4Chunk) {
affectedIndexes := make(map[int64]*statistics.Index)
for row := iter.Begin(); row != iter.End(); row = iter.Next() {
table, ok := tables[row.GetInt64(0)]
if !ok {
Expand All @@ -251,10 +252,14 @@ func (h *Handle) initStatsTopN4Chunk(tables map[int64]*statistics.Table, iter *c
if idx.TopN == nil {
idx.TopN = statistics.NewTopN(32)
}
affectedIndexes[row.GetInt64(1)] = idx
data := make([]byte, len(row.GetBytes(2)))
copy(data, row.GetBytes(2))
idx.TopN.AppendTopN(data, row.GetUint64(3))
}
for _, idx := range affectedIndexes {
idx.TopN.Sort()
}
}

func (h *Handle) initStatsTopN(tables map[int64]*statistics.Table) error {
Expand Down
1 change: 1 addition & 0 deletions statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -1275,5 +1275,6 @@ func (hg *Histogram) ExtractTopN(cms *CMSketch, topN *TopN, numCols int, numTopN
cms.subValue(h1, h2, realCnt)
topN.AppendTopN(dataCnt.data, realCnt)
}
topN.Sort()
return nil
}
1 change: 1 addition & 0 deletions statistics/sample.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,5 +322,6 @@ func (c *SampleCollector) ExtractTopN(numTop uint32, sc *stmtctx.StatementContex
cms.subValue(h1, h2, realCnt)
c.TopN.AppendTopN(data, realCnt)
}
c.TopN.Sort()
return nil
}