Skip to content

Commit

Permalink
statistics: fix the correlation estimation for version 2 (#23057)
Browse files Browse the repository at this point in the history
  • Loading branch information
winoros authored Mar 3, 2021
1 parent b5582e0 commit a4b4d7a
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 1 deletion.
4 changes: 3 additions & 1 deletion statistics/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,9 @@ func BuildColumnHistAndTopN(ctx sessionctx.Context, numBuckets, numTopN int, id
cur, curCnt = sampleBytes, 1
}

// Calc the correlation of the column between the handle column.
hg.Correlation = calcCorrelation(sampleNum, corrXYSum)

// Handle the counting for the last value. Basically equal to the case 2 above.
// now topn is empty: append the "current" count directly
if len(topNList) == 0 {
Expand Down Expand Up @@ -340,6 +343,5 @@ func BuildColumnHistAndTopN(ctx sessionctx.Context, numBuckets, numTopN int, id
}
}

hg.Correlation = calcCorrelation(int64(len(samples)), corrXYSum)
return hg, topn, nil
}
20 changes: 20 additions & 0 deletions statistics/handle/handle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -621,8 +621,15 @@ func (s *testStatsSuite) TestCorrelation(c *C) {
c.Assert(len(result.Rows()), Equals, 2)
c.Assert(result.Rows()[0][9], Equals, "0")
c.Assert(result.Rows()[1][9], Equals, "0.8285714285714286")
testKit.MustExec("set @@session.tidb_analyze_version=2")
testKit.MustExec("analyze table t")
result = testKit.MustQuery("show stats_histograms where Table_name = 't'").Sort()
c.Assert(len(result.Rows()), Equals, 2)
c.Assert(result.Rows()[0][9], Equals, "0")
c.Assert(result.Rows()[1][9], Equals, "0.8285714285714286")

testKit.MustExec("truncate table t")
testKit.MustExec("set @@session.tidb_analyze_version=1")
result = testKit.MustQuery("show stats_histograms where Table_name = 't'").Sort()
c.Assert(len(result.Rows()), Equals, 0)
testKit.MustExec("insert into t values(1,21),(3,12),(4,7),(2,20),(5,1)")
Expand All @@ -637,8 +644,15 @@ func (s *testStatsSuite) TestCorrelation(c *C) {
c.Assert(len(result.Rows()), Equals, 2)
c.Assert(result.Rows()[0][9], Equals, "0")
c.Assert(result.Rows()[1][9], Equals, "-0.9428571428571428")
testKit.MustExec("set @@session.tidb_analyze_version=2")
testKit.MustExec("analyze table t")
result = testKit.MustQuery("show stats_histograms where Table_name = 't'").Sort()
c.Assert(len(result.Rows()), Equals, 2)
c.Assert(result.Rows()[0][9], Equals, "0")
c.Assert(result.Rows()[1][9], Equals, "-0.9428571428571428")

testKit.MustExec("truncate table t")
testKit.MustExec("set @@session.tidb_analyze_version=1")
testKit.MustExec("insert into t values (1,1),(2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1),(10,1),(11,1),(12,1),(13,1),(14,1),(15,1),(16,1),(17,1),(18,1),(19,1),(20,2),(21,2),(22,2),(23,2),(24,2),(25,2)")
testKit.MustExec("analyze table t")
result = testKit.MustQuery("show stats_histograms where Table_name = 't'").Sort()
Expand All @@ -654,6 +668,12 @@ func (s *testStatsSuite) TestCorrelation(c *C) {
c.Assert(len(result.Rows()), Equals, 2)
c.Assert(result.Rows()[0][9], Equals, "1")
c.Assert(result.Rows()[1][9], Equals, "0.8285714285714286")
testKit.MustExec("set @@session.tidb_analyze_version=2")
testKit.MustExec("analyze table t")
result = testKit.MustQuery("show stats_histograms where Table_name = 't'").Sort()
c.Assert(len(result.Rows()), Equals, 2)
c.Assert(result.Rows()[0][9], Equals, "1")
c.Assert(result.Rows()[1][9], Equals, "0.8285714285714286")

testKit.MustExec("truncate table t")
testKit.MustExec("insert into t values(1,1),(2,7),(3,12),(8,18),(4,20),(5,21)")
Expand Down

0 comments on commit a4b4d7a

Please sign in to comment.