Skip to content

Commit

Permalink
support projection elimination (pingcap#1740)
Browse files Browse the repository at this point in the history
  • Loading branch information
XuHuaiyu authored Oct 9, 2016
1 parent a33240d commit b2bf4f5
Show file tree
Hide file tree
Showing 4 changed files with 208 additions and 1 deletion.
94 changes: 94 additions & 0 deletions plan/eliminate_projection.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// Copyright 2016 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package plan

import (
"github.com/pingcap/tidb/expression"
)

// EliminateProjection eliminates projection operator to avoid the cost of memory copy in the iterator of projection.
func EliminateProjection(p LogicalPlan) LogicalPlan {
switch plan := p.(type) {
case *Projection:
if !projectionCanBeEliminated(plan) {
break
}
child := p.GetChildByIndex(0).(LogicalPlan)
// pointer of schema in PROJECTION operator may be referenced by parent operator,
// and attributes of child operator may be used later, so here we shallow copy child's schema
// to the schema of PROJECTION, and reset the child's schema as the schema of PROJECTION.
for i, col := range plan.GetSchema() {
plan.GetSchema()[i] = shallowCopyColumn(col, child.GetSchema()[i])
}
child.SetSchema(plan.GetSchema())
RemovePlan(p)
p = EliminateProjection(child)
case *DataSource:
// predicates may be pushed down when build physical plan, and the schema of Selection operator is
// always the same as the child operator, so here we copy the schema of Selection to DataSource.
if sel, ok := plan.GetParentByIndex(0).(*Selection); ok {
plan.SetSchema(sel.GetSchema())
for i, cond := range sel.Conditions {
sel.Conditions[i], _ = retrieveColumnsInExpression(cond, plan.GetSchema())
}
}
}
if len(p.GetChildren()) == 1 {
child := p.GetChildByIndex(0)
p.ReplaceChild(child, EliminateProjection(child.(LogicalPlan)))
} else {
children := make([]Plan, 0, len(p.GetChildren()))
for _, child := range p.GetChildren() {
children = append(children, EliminateProjection(child.(LogicalPlan)))
}
p.SetChildren(children...)
}
return p
}

func shallowCopyColumn(colDest, colSrc *expression.Column) *expression.Column {
colDest.Correlated = colSrc.Correlated
colDest.FromID = colSrc.FromID
colDest.Position = colSrc.Position
colDest.ID = colSrc.ID
colDest.IsAggOrSubq = colSrc.IsAggOrSubq
colDest.RetType = colSrc.RetType

return colDest
}

// projectionCanBeEliminated checks if a PROJECTION operator can be eliminated.
// PROJECTION operator can be eliminated when meet the following conditions at the same time:
// 1. fields of PROJECTION are all columns
// 2. fields of PROJECTION are just the same as the schema of the child operator (including order, amount, etc.).
// expressions like following cases can not be eliminated:
// "SELECT b, a from t",
// or "SELECT c AS a, c AS b FROM t WHERE d = 1",
// or "select t1.a, t2.b, t1.b, t2.a from t1, t2 where t1.a < 0 and t2.b > 0".
func projectionCanBeEliminated(p *Projection) bool {
child := p.GetChildByIndex(0).(LogicalPlan)
if len(p.GetSchema()) != len(child.GetSchema()) {
return false
}
for i, expr := range p.Exprs {
col, ok := expr.(*expression.Column)
if !ok || col.Correlated {
return false
}
if col.FromID != child.GetSchema()[i].FromID || col.Position != child.GetSchema()[i].Position {
return false
}
}
return true
}
1 change: 1 addition & 0 deletions plan/optimizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ func Optimize(ctx context.Context, node ast.Node, is infoschema.InfoSchema) (Pla
if err != nil {
return nil, errors.Trace(err)
}
logic = EliminateProjection(logic)
info, err := logic.convert2PhysicalPlan(&requiredProperty{})
if err != nil {
return nil, errors.Trace(err)
Expand Down
110 changes: 109 additions & 1 deletion plan/plan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,6 @@ func (s *testPlanSuite) TestPredicatePushDown(c *C) {
c.Assert(builder.err, IsNil)
lp := p.(LogicalPlan)
c.Assert(ToString(lp), Equals, ca.first, Commentf("for %s", ca.sql))

_, lp, err = lp.PredicatePushDown(nil)
c.Assert(err, IsNil)
_, err = lp.PruneColumnsAndResolveIndices(lp.GetSchema())
Expand Down Expand Up @@ -1021,6 +1020,115 @@ func (s *testPlanSuite) TestConstantFolding(c *C) {
}
}

func (s *testPlanSuite) TestProjectionElimination(c *C) {
defer testleak.AfterTest(c)()
cases := []struct {
sql string
ans string
}{
// projection can be eliminated in following cases.
{
sql: "select a from t",
ans: "DataScan(t)",
},
{
sql: "select a from t where a > 1",
ans: "DataScan(t)->Selection",
},
{
sql: "select a from t where a is null",
ans: "DataScan(t)->Selection",
},
{
sql: "select a, b from t where b > 0",
ans: "DataScan(t)->Selection",
},
{
sql: "select a as c1, b as c2 from t where a = 3",
ans: "DataScan(t)->Selection",
},
{
sql: "select a as c1, b as c2 from t as t1 where t1.a = 0",
ans: "DataScan(t)->Selection",
},
{
sql: "select a from t where exists(select 1 from t as x where x.a < t.a)",
ans: "Join{DataScan(t)->DataScan(t)}",
},
{
sql: "select a from (select d as a from t where d = 0) k where k.a = 5",
ans: "DataScan(t)->Selection",
},
{
sql: "select t1.a from t t1 where t1.a in (select t2.a from t t2 where t2.a > 1)",
ans: "Join{DataScan(t)->DataScan(t)->Selection}",
},
{
sql: "select t1.a, t2.b from t t1, t t2 where t1.a > 0 and t2.b < 0",
ans: "Join{DataScan(t)->Selection->DataScan(t)->Selection}",
},
{
sql: "select t1.a, t1.b, t2.a, t2.b from t t1, t t2 where t1.a > 0 and t2.b < 0",
ans: "Join{DataScan(t)->Selection->DataScan(t)->Selection}",
},
{
sql: "select * from (t t1 join t t2) join (t t3 join t t4)",
ans: "Join{Join{DataScan(t)->DataScan(t)}->Join{DataScan(t)->DataScan(t)}}",
},
// projection can not be eliminated in following cases.
{
sql: "select t1.b, t1.a, t2.b, t2.a from t t1, t t2 where t1.a > 0 and t2.b < 0",
ans: "Join{DataScan(t)->Selection->DataScan(t)->Selection}->Projection",
},
{
sql: "select d, c, b, a from t where a = b and b = 1",
ans: "DataScan(t)->Selection->Projection",
},
{
sql: "select d as a, b as c from t as t1 where d > 0 and b < 0",
ans: "DataScan(t)->Selection->Projection",
},
{
sql: "select c as a, c as b from t",
ans: "DataScan(t)->Projection",
},
{
sql: "select c as a, c as b from t where d > 0",
ans: "DataScan(t)->Selection->Projection",
},
{
sql: "select t1.a, t2.b, t2.a, t1.b from t t1, t t2 where t1.a > 0 and t2.b < 0",
ans: "Join{DataScan(t)->Selection->DataScan(t)->Selection}->Projection",
},
{
sql: "select t1.a from t t1 where t1.a in (select t2.a from t t2 where t1.a > 1)",
ans: "DataScan(t)->Apply(DataScan(t)->Selection->Projection)->Selection->Projection",
},
}
for _, ca := range cases {
comment := Commentf("for %s", ca.sql)
stmt, err := s.ParseOneStmt(ca.sql, "", "")
c.Assert(err, IsNil, comment)
ast.SetFlag(stmt)

err = mockResolve(stmt)
c.Assert(err, IsNil)

builder := &planBuilder{
allocator: new(idAllocator),
ctx: mock.NewContext(),
}
p := builder.build(stmt)
c.Assert(builder.err, IsNil)
lp := p.(LogicalPlan)
_, lp, err = lp.PredicatePushDown(nil)
c.Assert(err, IsNil)
_, err = lp.PruneColumnsAndResolveIndices(lp.GetSchema())
c.Assert(err, IsNil)
p = EliminateProjection(lp)
c.Assert(ToString(p), Equals, ca.ans, Commentf("for %s", ca.sql))
}
}
func (s *testPlanSuite) TestCoveringIndex(c *C) {
cases := []struct {
columnNames []string
Expand Down
4 changes: 4 additions & 0 deletions tidb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,10 @@ func (s *testMainSuite) TestCaseInsensitive(c *C) {
c.Assert(err, IsNil)
c.Assert(fields[0].ColumnAsName.O, Equals, "A")
c.Assert(fields[1].ColumnAsName.O, Equals, "b")
rs = mustExecSQL(c, se, "select a as A from t where A > 0")
fields, err = rs.Fields()
c.Assert(err, IsNil)
c.Assert(fields[0].ColumnAsName.O, Equals, "A")
mustExecSQL(c, se, "update T set b = B + 1")
mustExecSQL(c, se, "update T set B = b + 1")
rs = mustExecSQL(c, se, "select b from T")
Expand Down

0 comments on commit b2bf4f5

Please sign in to comment.