Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

plan: add cache for statistics table #2398

Merged
merged 10 commits into from
Jan 7, 2017
1 change: 1 addition & 0 deletions executor/executor_simple.go
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ func (e *SimpleExec) buildStatisticsAndSaveToKV(tn *ast.TableName, count int64,
if err != nil {
return errors.Trace(err)
}
statistics.SetStatisticsTableCache(tn.TableInfo.ID, t)
tpb, err := t.ToPB()
if err != nil {
return errors.Trace(err)
Expand Down
36 changes: 34 additions & 2 deletions plan/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/ngaut/log"
"github.com/pingcap/tidb/ast"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/meta"
"github.com/pingcap/tidb/model"
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/plan/statistics"
Expand Down Expand Up @@ -908,8 +909,39 @@ func (b *planBuilder) buildTableDual() LogicalPlan {
}

func (b *planBuilder) getTableStats(table *model.TableInfo) *statistics.Table {
// TODO: Currently we always return a pseudo table for good performance. We will use a cache in future.
return statistics.PseudoTable(table)
tbl := statistics.GetStatisticsTableCache(table)
if tbl != nil {
return tbl
}
txn := b.ctx.Txn()
if txn == nil {
return statistics.PseudoTable(table)
}
m := meta.NewMeta(txn)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think move cache refill logic to statistics package is better.
And If the the stats for a table is always nil, we don't want to get it from kv every time.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried, but it will introduce circle dependency, because meta/meta.go imports statistics package. And if stats table is nil, I will save a pseduo one in cache, thus we will not get it from kv every time.

Copy link
Member

@coocood coocood Jan 5, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cache can be implemented in another package.
like statscache

Copy link
Contributor Author

@alivxxx alivxxx Jan 5, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

plan/statscache or plan/statistics/statscache?

tpb, err := m.GetTableStats(table.ID)
if err != nil {
return statistics.PseudoTable(table)
}
// This table has no statistics table, we give it a pseudo one and save in cache.
if tpb == nil {
tbl = statistics.PseudoTable(table)
tbl.TS = int64(txn.StartTS())
statistics.SetStatisticsTableCache(table.ID, tbl)
return tbl
}
tbl, err = statistics.TableFromPB(table, tpb)
// Error is not nil may mean that there are some ddl changes on this table, so the origin
// statistics can not be used any more, we give it a pseudo one and save in cache.
if err != nil {
log.Errorf("Error occured when convert pb table for %s", table.Name.O)
tbl = statistics.PseudoTable(table)
tbl.TS = int64(txn.StartTS())
statistics.SetStatisticsTableCache(table.ID, tbl)
return statistics.PseudoTable(table)
}
tbl.TS = int64(txn.StartTS())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should not use the build time field to record lookup time.

statistics.SetStatisticsTableCache(table.ID, tbl)
return tbl
}

func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
Expand Down
1 change: 1 addition & 0 deletions plan/statistics/statistics.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ func NewTable(sc *variable.StatementContext, ti *model.TableInfo, ts, count, num

// TableFromPB creates a table statistics from protobuffer.
func TableFromPB(ti *model.TableInfo, tpb *TablePB) (*Table, error) {
// TODO: The following error may mean that there is a ddl change on this table. Currently, The caller simply drop the statistics table. Maybe we can have better solution.
if tpb.GetId() != ti.ID {
return nil, errors.Errorf("table id not match, expected %d, got %d", ti.ID, tpb.GetId())
}
Expand Down
60 changes: 60 additions & 0 deletions plan/statistics/statistics_cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Copyright 2017 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package statistics

import (
"sync"
"time"

"github.com/pingcap/tidb/model"
"github.com/pingcap/tidb/store/tikv/oracle"
)

type tableCache struct {
m sync.RWMutex
cache map[int64]*Table
}

var tblCache = tableCache{cache: map[int64]*Table{}}

// expireDuration is 1 hour.
var expireDuration int64 = 60 * 60 * 1000

func tableCacheExpired(tbl *Table) bool {
duration := oracle.GetPhysical(time.Now()) - oracle.ExtractPhysical(uint64(tbl.TS))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tbl.TS is the stats build time, table stats may not updated frequently, we should use the last get time to determine if the stats is expired.

if duration >= expireDuration {
return true
}
return false
}

// GetStatisticsTableCache retrieves the statistics table from cache.
func GetStatisticsTableCache(tblInfo *model.TableInfo) *Table {
tblCache.m.RLock()
statTbl, ok := tblCache.cache[tblInfo.ID]
tblCache.m.RUnlock()
// Here we check the TableInfo because there may be some ddl changes in the duration period.
// Also, we rely on the fact that TableInfo will not be same if and only if there are ddl changes.
if !ok || tblInfo != statTbl.info || tableCacheExpired(statTbl) {
return nil
}
return statTbl
}

// SetStatisticsTableCache sets the statistics table cache.
func SetStatisticsTableCache(id int64, tbl *Table) {
tblCache.m.Lock()
tblCache.cache[id] = tbl
tblCache.m.Unlock()
}