From 286e8521acf9b898b1d117daf7ed8ffdfbad4719 Mon Sep 17 00:00:00 2001 From: Jinlong Liu <50897894+King-Dylan@users.noreply.github.com> Date: Tue, 7 Nov 2023 10:18:41 +0800 Subject: [PATCH] planner: eliminate useless scalar subqueries in some scenarios of aggregate queries (#47550) close pingcap/tidb#45822 --- pkg/executor/test/executor/executor_test.go | 4 +- pkg/planner/cascades/optimize.go | 2 +- .../physicalplantest/physical_plan_test.go | 29 ++++++ .../testdata/plan_suite_in.json | 15 +++ .../testdata/plan_suite_out.json | 45 +++++++++ .../testdata/integration_suite_out.json | 25 ++--- pkg/planner/core/plan.go | 6 +- pkg/planner/core/rule_column_pruning.go | 91 +++++++++++-------- pkg/planner/core/rule_max_min_eliminate.go | 3 +- .../testdata/plan_suite_unexported_out.json | 2 +- pkg/planner/util/fixcontrol/get.go | 2 + .../planner/core/casetest/integration.result | 12 +-- 12 files changed, 166 insertions(+), 70 deletions(-) diff --git a/pkg/executor/test/executor/executor_test.go b/pkg/executor/test/executor/executor_test.go index 88abe9d20fae0..dfb7488e9615b 100644 --- a/pkg/executor/test/executor/executor_test.go +++ b/pkg/executor/test/executor/executor_test.go @@ -2352,7 +2352,7 @@ func TestApplyCache(t *testing.T) { tk.MustExec("create table t(a int);") tk.MustExec("insert into t values (1),(1),(1),(1),(1),(1),(1),(1),(1);") tk.MustExec("analyze table t;") - result := tk.MustQuery("explain analyze SELECT count(1) FROM (SELECT (SELECT min(a) FROM t as t2 WHERE t2.a > t1.a) AS a from t as t1) t;") + result := tk.MustQuery("explain analyze SELECT count(a) FROM (SELECT (SELECT min(a) FROM t as t2 WHERE t2.a > t1.a) AS a from t as t1) t;") require.Contains(t, result.Rows()[1][0], "Apply") var ( ind int @@ -2372,7 +2372,7 @@ func TestApplyCache(t *testing.T) { tk.MustExec("create table t(a int);") tk.MustExec("insert into t values (1),(2),(3),(4),(5),(6),(7),(8),(9);") tk.MustExec("analyze table t;") - result = tk.MustQuery("explain analyze SELECT count(1) FROM (SELECT (SELECT min(a) FROM t as t2 WHERE t2.a > t1.a) AS a from t as t1) t;") + result = tk.MustQuery("explain analyze SELECT count(a) FROM (SELECT (SELECT min(a) FROM t as t2 WHERE t2.a > t1.a) AS a from t as t1) t;") require.Contains(t, result.Rows()[1][0], "Apply") flag = false value = (result.Rows()[1][5]).(string) diff --git a/pkg/planner/cascades/optimize.go b/pkg/planner/cascades/optimize.go index d16ab3a7c7113..c1dde8f5ad7bc 100644 --- a/pkg/planner/cascades/optimize.go +++ b/pkg/planner/cascades/optimize.go @@ -116,7 +116,7 @@ func (opt *Optimizer) FindBestPlan(sctx sessionctx.Context, logical plannercore. } func (*Optimizer) onPhasePreprocessing(_ sessionctx.Context, plan plannercore.LogicalPlan) (plannercore.LogicalPlan, error) { - err := plan.PruneColumns(plan.Schema().Columns, nil) + err := plan.PruneColumns(plan.Schema().Columns, nil, plan) if err != nil { return nil, err } diff --git a/pkg/planner/core/casetest/physicalplantest/physical_plan_test.go b/pkg/planner/core/casetest/physicalplantest/physical_plan_test.go index d05180be31f25..1b01d7e6e8343 100644 --- a/pkg/planner/core/casetest/physicalplantest/physical_plan_test.go +++ b/pkg/planner/core/casetest/physicalplantest/physical_plan_test.go @@ -167,6 +167,35 @@ func TestAggEliminator(t *testing.T) { } } +// Fix Issue #45822 +func TestRuleColumnPruningLogicalApply(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + var input []string + var output []struct { + SQL string + Best string + } + planSuiteData := GetPlanSuiteData() + planSuiteData.LoadTestCases(t, &input, &output) + p := parser.New() + is := infoschema.MockInfoSchema([]*model.TableInfo{core.MockSignedTable(), core.MockUnsignedTable()}) + tk.MustExec("use test") + tk.MustExec("set @@tidb_opt_fix_control = '45822:ON';") + for i, tt := range input { + comment := fmt.Sprintf("input: %s", tt) + stmt, err := p.ParseOneStmt(tt, "", "") + require.NoError(t, err, comment) + p, _, err := planner.Optimize(context.TODO(), tk.Session(), stmt, is) + require.NoError(t, err) + testdata.OnRecord(func() { + output[i].SQL = tt + output[i].Best = core.ToString(p) + }) + require.Equal(t, output[i].Best, core.ToString(p), fmt.Sprintf("input: %s", tt)) + } +} + func TestINMJHint(t *testing.T) { var ( input []string diff --git a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_in.json b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_in.json index ec0cedcc67ec8..501f734a66527 100644 --- a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_in.json +++ b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_in.json @@ -354,6 +354,21 @@ "select max(a) from (select t1.a from t t1 join t t2 on t1.a=t2.a) t" ] }, + { + "name": "TestRuleColumnPruningLogicalApply", + "cases": [ + "SELECT COUNT(*) FROM (SELECT a, (SELECT t2.b FROM t t2, t t3 WHERE t2.a = t3.a AND t1.a = t2.a LIMIT 1) t FROM t t1) t", + "SELECT COUNT(a) FROM (SELECT a, (SELECT t2.b FROM t t2, t t3 WHERE t2.a = t3.a AND t1.a = t2.a LIMIT 1) t FROM t t1) t", + "SELECT COUNT(t) FROM (SELECT a, (SELECT t2.b FROM t t2, t t3 WHERE t2.a = t3.a AND t1.a = t2.a LIMIT 1) t FROM t t1) t", + "SELECT COUNT(a) FROM t t1 WHERE t1.a IN (SELECT t2.a FROM t t2, t t3 WHERE t2.b = t3.b)", + "SELECT a FROM (SELECT a, (SELECT t2.b FROM t t2, t t3 WHERE t2.a = t3.a AND t1.a = t2.a LIMIT 1) t FROM t t1) t", + "SELECT a FROM t WHERE b IN (SELECT t2.b FROM t t2, t t3 WHERE t2.a = t3.a)", + "SELECT a FROM t WHERE EXISTS (SELECT t2.b FROM t t2, t t3 WHERE t2.a = t3.a AND t2.b=t.b)", + "SELECT a FROM t WHERE NOT EXISTS (SELECT t2.b FROM t t2, t t3 WHERE t2.a = t3.a AND t2.b=t.b)", + "SELECT a FROM t WHERE b IN (SELECT b FROM t WHERE b = 1 AND a IN (SELECT a FROM t WHERE a > 0))", + "SELECT a FROM t WHERE b IN (SELECT b FROM t WHERE b = 1 AND a IN (SELECT t2.a FROM (SELECT t1.a, (SELECT t2.b FROM t t2, t t3 WHERE t2.a = t3.a AND t1.a = t2.a LIMIT 1) t FROM t t1) t2))" + ] + }, { "name": "TestUnmatchedTableInHint", "cases": [ diff --git a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json index 819ee1a8a225d..3329243c823f6 100644 --- a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json +++ b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json @@ -2320,6 +2320,51 @@ } ] }, + { + "Name": "TestRuleColumnPruningLogicalApply", + "Cases": [ + { + "SQL": "SELECT COUNT(*) FROM (SELECT a, (SELECT t2.b FROM t t2, t t3 WHERE t2.a = t3.a AND t1.a = t2.a LIMIT 1) t FROM t t1) t", + "Best": "IndexReader(Index(t.f)[[NULL,+inf]]->HashAgg)->HashAgg" + }, + { + "SQL": "SELECT COUNT(a) FROM (SELECT a, (SELECT t2.b FROM t t2, t t3 WHERE t2.a = t3.a AND t1.a = t2.a LIMIT 1) t FROM t t1) t", + "Best": "IndexReader(Index(t.f)[[NULL,+inf]]->HashAgg)->HashAgg" + }, + { + "SQL": "SELECT COUNT(t) FROM (SELECT a, (SELECT t2.b FROM t t2, t t3 WHERE t2.a = t3.a AND t1.a = t2.a LIMIT 1) t FROM t t1) t", + "Best": "Apply{IndexReader(Index(t.f)[[NULL,+inf]])->MergeInnerJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,test.t.a)->Limit}->HashAgg" + }, + { + "SQL": "SELECT COUNT(a) FROM t t1 WHERE t1.a IN (SELECT t2.a FROM t t2, t t3 WHERE t2.b = t3.b)", + "Best": "LeftHashJoin{IndexReader(Index(t.f)[[NULL,+inf]])->LeftHashJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.b,test.t.b)->HashAgg}(test.t.a,test.t.a)->HashAgg" + }, + { + "SQL": "SELECT a FROM (SELECT a, (SELECT t2.b FROM t t2, t t3 WHERE t2.a = t3.a AND t1.a = t2.a LIMIT 1) t FROM t t1) t", + "Best": "IndexReader(Index(t.f)[[NULL,+inf]])" + }, + { + "SQL": "SELECT a FROM t WHERE b IN (SELECT t2.b FROM t t2, t t3 WHERE t2.a = t3.a)", + "Best": "LeftHashJoin{TableReader(Table(t))->MergeInnerJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,test.t.a)->HashAgg}(test.t.b,test.t.b)" + }, + { + "SQL": "SELECT a FROM t WHERE EXISTS (SELECT t2.b FROM t t2, t t3 WHERE t2.a = t3.a AND t2.b=t.b)", + "Best": "LeftHashJoin{TableReader(Table(t))->MergeInnerJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,test.t.a)}(test.t.b,test.t.b)" + }, + { + "SQL": "SELECT a FROM t WHERE NOT EXISTS (SELECT t2.b FROM t t2, t t3 WHERE t2.a = t3.a AND t2.b=t.b)", + "Best": "LeftHashJoin{TableReader(Table(t))->MergeInnerJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,test.t.a)}(test.t.b,test.t.b)" + }, + { + "SQL": "SELECT a FROM t WHERE b IN (SELECT b FROM t WHERE b = 1 AND a IN (SELECT a FROM t WHERE a > 0))", + "Best": "RightHashJoin{IndexJoin{TableReader(Table(t)->Sel([eq(test.t.b, 1)]))->TableReader(Table(t)->Sel([gt(test.t.a, 0)]))}(test.t.a,test.t.a)->HashAgg->TableReader(Table(t))}(test.t.b,test.t.b)" + }, + { + "SQL": "SELECT a FROM t WHERE b IN (SELECT b FROM t WHERE b = 1 AND a IN (SELECT t2.a FROM (SELECT t1.a, (SELECT t2.b FROM t t2, t t3 WHERE t2.a = t3.a AND t1.a = t2.a LIMIT 1) t FROM t t1) t2))", + "Best": "LeftHashJoin{TableReader(Table(t))->IndexJoin{TableReader(Table(t)->Sel([eq(test.t.b, 1)]))->TableReader(Table(t))}(test.t.a,test.t.a)->HashAgg}(test.t.b,test.t.b)" + } + ] + }, { "Name": "TestUnmatchedTableInHint", "Cases": [ diff --git a/pkg/planner/core/casetest/testdata/integration_suite_out.json b/pkg/planner/core/casetest/testdata/integration_suite_out.json index 399eb07db7d23..7df363a498e8b 100644 --- a/pkg/planner/core/casetest/testdata/integration_suite_out.json +++ b/pkg/planner/core/casetest/testdata/integration_suite_out.json @@ -356,22 +356,15 @@ { "SQL": "explain format = 'verbose' select (2) in (select /*+ read_from_storage(tiflash[t1]) */ count(*) from t1) from (select t.b < (select /*+ read_from_storage(tiflash[t2]) */ t.b from t2 limit 1 ) from t3 t) t; -- we do generate the agg pushed-down plan of mpp, but cost-cmp failed", "Plan": [ - "HashJoin_19 3.00 162366.01 root CARTESIAN left outer semi join", - "├─Selection_36(Build) 0.80 31149.25 root eq(2, Column#18)", - "│ └─StreamAgg_43 1.00 31099.35 root funcs:count(1)->Column#18", - "│ └─TableReader_55 3.00 30949.65 root MppVersion: 2, data:ExchangeSender_54", - "│ └─ExchangeSender_54 3.00 464139.20 mpp[tiflash] ExchangeType: PassThrough", - "│ └─TableFullScan_53 3.00 464139.20 mpp[tiflash] table:t1 keep order:false", - "└─Projection_20(Probe) 3.00 129648.62 root 1->Column#28", - " └─Apply_22 3.00 129648.32 root CARTESIAN left outer join", - " ├─IndexReader_26(Build) 3.00 53.37 root index:IndexFullScan_25", - " │ └─IndexFullScan_25 3.00 610.50 cop[tikv] table:t, index:c(b) keep order:false", - " └─Projection_27(Probe) 3.00 43198.32 root 1->Column#26", - " └─Limit_30 3.00 43198.22 root offset:0, count:1", - " └─TableReader_35 3.00 43198.22 root MppVersion: 2, data:ExchangeSender_34", - " └─ExchangeSender_34 3.00 647920.44 mpp[tiflash] ExchangeType: PassThrough", - " └─Limit_33 3.00 647920.44 mpp[tiflash] offset:0, count:1", - " └─TableFullScan_32 3.00 647920.44 mpp[tiflash] table:t2 keep order:false" + "HashJoin_17 3.00 32771.06 root CARTESIAN left outer semi join", + "├─Selection_23(Build) 0.80 31149.25 root eq(2, Column#18)", + "│ └─StreamAgg_30 1.00 31099.35 root funcs:count(1)->Column#18", + "│ └─TableReader_42 3.00 30949.65 root MppVersion: 2, data:ExchangeSender_41", + "│ └─ExchangeSender_41 3.00 464139.20 mpp[tiflash] ExchangeType: PassThrough", + "│ └─TableFullScan_40 3.00 464139.20 mpp[tiflash] table:t1 keep order:false", + "└─Projection_18(Probe) 3.00 53.67 root 1->Column#24", + " └─IndexReader_22 3.00 53.37 root index:IndexFullScan_21", + " └─IndexFullScan_21 3.00 610.50 cop[tikv] table:t, index:c(b) keep order:false" ] }, { diff --git a/pkg/planner/core/plan.go b/pkg/planner/core/plan.go index 2a3ade525dece..f99e95dc6e3ab 100644 --- a/pkg/planner/core/plan.go +++ b/pkg/planner/core/plan.go @@ -254,7 +254,7 @@ type LogicalPlan interface { PredicatePushDown([]expression.Expression, *logicalOptimizeOp) ([]expression.Expression, LogicalPlan) // PruneColumns prunes the unused columns. - PruneColumns([]*expression.Column, *logicalOptimizeOp) error + PruneColumns([]*expression.Column, *logicalOptimizeOp, LogicalPlan) error // findBestTask converts the logical plan to the physical plan. It's a new interface. // It is called recursively from the parent to the children to create the result physical plan. @@ -756,11 +756,11 @@ func (*baseLogicalPlan) ExtractCorrelatedCols() []*expression.CorrelatedColumn { } // PruneColumns implements LogicalPlan interface. -func (p *baseLogicalPlan) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *baseLogicalPlan) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp, _ LogicalPlan) error { if len(p.children) == 0 { return nil } - return p.children[0].PruneColumns(parentUsedCols, opt) + return p.children[0].PruneColumns(parentUsedCols, opt, p) } // Schema implements Plan Schema interface. diff --git a/pkg/planner/core/rule_column_pruning.go b/pkg/planner/core/rule_column_pruning.go index ae7be6306f01c..93d05d20ad9ce 100644 --- a/pkg/planner/core/rule_column_pruning.go +++ b/pkg/planner/core/rule_column_pruning.go @@ -26,6 +26,7 @@ import ( "github.com/pingcap/tidb/pkg/parser/model" "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/planner/util" + "github.com/pingcap/tidb/pkg/planner/util/fixcontrol" ) type columnPruner struct { @@ -33,7 +34,7 @@ type columnPruner struct { func (*columnPruner) optimize(_ context.Context, lp LogicalPlan, opt *logicalOptimizeOp) (LogicalPlan, bool, error) { planChanged := false - err := lp.PruneColumns(lp.Schema().Columns, opt) + err := lp.PruneColumns(lp.Schema().Columns, opt, lp) return lp, planChanged, err } @@ -70,7 +71,7 @@ func exprHasSetVarOrSleep(expr expression.Expression) bool { // the level projection expressions construction is left to the last logical optimize rule) // // so when do the rule_column_pruning here, we just prune the schema is enough. -func (p *LogicalExpand) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalExpand) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp, _ LogicalPlan) error { child := p.children[0] // Expand need those extra redundant distinct group by columns projected from underlying projection. // distinct GroupByCol must be used by aggregate above, to make sure this, append distinctGroupByCol again. @@ -86,12 +87,12 @@ func (p *LogicalExpand) PruneColumns(parentUsedCols []*expression.Column, opt *l } appendColumnPruneTraceStep(p, prunedColumns, opt) // Underlying still need to keep the distinct group by columns and parent used columns. - return child.PruneColumns(parentUsedCols, opt) + return child.PruneColumns(parentUsedCols, opt, p) } // PruneColumns implements LogicalPlan interface. // If any expression has SetVar function or Sleep function, we do not prune it. -func (p *LogicalProjection) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalProjection) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp, _ LogicalPlan) error { child := p.children[0] used := expression.GetUsedList(parentUsedCols, p.schema) prunedColumns := make([]*expression.Column, 0) @@ -107,18 +108,18 @@ func (p *LogicalProjection) PruneColumns(parentUsedCols []*expression.Column, op appendColumnPruneTraceStep(p, prunedColumns, opt) selfUsedCols := make([]*expression.Column, 0, len(p.Exprs)) selfUsedCols = expression.ExtractColumnsFromExpressions(selfUsedCols, p.Exprs, nil) - return child.PruneColumns(selfUsedCols, opt) + return child.PruneColumns(selfUsedCols, opt, p) } // PruneColumns implements LogicalPlan interface. -func (p *LogicalSelection) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalSelection) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp, _ LogicalPlan) error { child := p.children[0] parentUsedCols = expression.ExtractColumnsFromExpressions(parentUsedCols, p.Conditions, nil) - return child.PruneColumns(parentUsedCols, opt) + return child.PruneColumns(parentUsedCols, opt, p) } // PruneColumns implements LogicalPlan interface. -func (la *LogicalAggregation) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (la *LogicalAggregation) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp, _ LogicalPlan) error { child := la.children[0] used := expression.GetUsedList(parentUsedCols, la.Schema()) prunedColumns := make([]*expression.Column, 0) @@ -191,7 +192,7 @@ func (la *LogicalAggregation) PruneColumns(parentUsedCols []*expression.Column, } } appendGroupByItemsPruneTraceStep(la, prunedGroupByItems, opt) - err := child.PruneColumns(selfUsedCols, opt) + err := child.PruneColumns(selfUsedCols, opt, la) if err != nil { return err } @@ -242,27 +243,27 @@ func pruneByItems(p LogicalPlan, old []*util.ByItems, opt *logicalOptimizeOp) (b // PruneColumns implements LogicalPlan interface. // If any expression can view as a constant in execution stage, such as correlated column, constant, // we do prune them. Note that we can't prune the expressions contain non-deterministic functions, such as rand(). -func (ls *LogicalSort) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (ls *LogicalSort) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp, _ LogicalPlan) error { child := ls.children[0] var cols []*expression.Column ls.ByItems, cols = pruneByItems(ls, ls.ByItems, opt) parentUsedCols = append(parentUsedCols, cols...) - return child.PruneColumns(parentUsedCols, opt) + return child.PruneColumns(parentUsedCols, opt, ls) } // PruneColumns implements LogicalPlan interface. // If any expression can view as a constant in execution stage, such as correlated column, constant, // we do prune them. Note that we can't prune the expressions contain non-deterministic functions, such as rand(). -func (lt *LogicalTopN) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (lt *LogicalTopN) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp, _ LogicalPlan) error { child := lt.children[0] var cols []*expression.Column lt.ByItems, cols = pruneByItems(lt, lt.ByItems, opt) parentUsedCols = append(parentUsedCols, cols...) - return child.PruneColumns(parentUsedCols, opt) + return child.PruneColumns(parentUsedCols, opt, lt) } // PruneColumns implements LogicalPlan interface. -func (p *LogicalUnionAll) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalUnionAll) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp, _ LogicalPlan) error { used := expression.GetUsedList(parentUsedCols, p.schema) hasBeenUsed := false for i := range used { @@ -276,7 +277,7 @@ func (p *LogicalUnionAll) PruneColumns(parentUsedCols []*expression.Column, opt copy(parentUsedCols, p.schema.Columns) } for _, child := range p.Children() { - err := child.PruneColumns(parentUsedCols, opt) + err := child.PruneColumns(parentUsedCols, opt, p) if err != nil { return err } @@ -315,7 +316,7 @@ func (p *LogicalUnionAll) PruneColumns(parentUsedCols []*expression.Column, opt } // PruneColumns implements LogicalPlan interface. -func (p *LogicalUnionScan) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalUnionScan) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp, _ LogicalPlan) error { for i := 0; i < p.handleCols.NumCols(); i++ { parentUsedCols = append(parentUsedCols, p.handleCols.GetCol(i)) } @@ -326,11 +327,11 @@ func (p *LogicalUnionScan) PruneColumns(parentUsedCols []*expression.Column, opt } condCols := expression.ExtractColumnsFromExpressions(nil, p.conditions, nil) parentUsedCols = append(parentUsedCols, condCols...) - return p.children[0].PruneColumns(parentUsedCols, opt) + return p.children[0].PruneColumns(parentUsedCols, opt, p) } // PruneColumns implements LogicalPlan interface. -func (ds *DataSource) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (ds *DataSource) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp, _ LogicalPlan) error { used := expression.GetUsedList(parentUsedCols, ds.schema) exprCols := expression.ExtractColumnsFromExpressions(nil, ds.allConds, nil) @@ -382,7 +383,7 @@ func (ds *DataSource) PruneColumns(parentUsedCols []*expression.Column, opt *log } // PruneColumns implements LogicalPlan interface. -func (p *LogicalMemTable) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalMemTable) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp, _ LogicalPlan) error { switch p.TableInfo.Name.O { case infoschema.TableStatementsSummary, infoschema.TableStatementsSummaryHistory, @@ -413,7 +414,7 @@ func (p *LogicalMemTable) PruneColumns(parentUsedCols []*expression.Column, opt } // PruneColumns implements LogicalPlan interface. -func (p *LogicalTableDual) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalTableDual) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp, _ LogicalPlan) error { used := expression.GetUsedList(parentUsedCols, p.Schema()) prunedColumns := make([]*expression.Column, 0) for i := len(used) - 1; i >= 0; i-- { @@ -459,16 +460,16 @@ func (p *LogicalJoin) mergeSchema() { } // PruneColumns implements LogicalPlan interface. -func (p *LogicalJoin) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalJoin) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp, _ LogicalPlan) error { leftCols, rightCols := p.extractUsedCols(parentUsedCols) - err := p.children[0].PruneColumns(leftCols, opt) + err := p.children[0].PruneColumns(leftCols, opt, p) if err != nil { return err } addConstOneForEmptyProjection(p.children[0]) - err = p.children[1].PruneColumns(rightCols, opt) + err = p.children[1].PruneColumns(rightCols, opt, p) if err != nil { return err } @@ -484,10 +485,14 @@ func (p *LogicalJoin) PruneColumns(parentUsedCols []*expression.Column, opt *log } // PruneColumns implements LogicalPlan interface. -func (la *LogicalApply) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (la *LogicalApply) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp, parentLp LogicalPlan) error { leftCols, rightCols := la.extractUsedCols(parentUsedCols) - - err := la.children[1].PruneColumns(rightCols, opt) + allowEliminateApply := fixcontrol.GetBoolWithDefault(la.SCtx().GetSessionVars().GetOptimizerFixControlMap(), fixcontrol.Fix45822, true) + if allowEliminateApply && rightCols == nil && la.JoinType == LeftOuterJoin { + applyEliminateTraceStep(la.Children()[1], opt) + parentLp.SetChildren(la.Children()[0]) + } + err := la.children[1].PruneColumns(rightCols, opt, la) if err != nil { return err } @@ -498,7 +503,7 @@ func (la *LogicalApply) PruneColumns(parentUsedCols []*expression.Column, opt *l leftCols = append(leftCols, &col.Column) } - err = la.children[0].PruneColumns(leftCols, opt) + err = la.children[0].PruneColumns(leftCols, opt, la) if err != nil { return err } @@ -509,9 +514,9 @@ func (la *LogicalApply) PruneColumns(parentUsedCols []*expression.Column, opt *l } // PruneColumns implements LogicalPlan interface. -func (p *LogicalLock) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalLock) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp, _ LogicalPlan) error { if !IsSelectForUpdateLockType(p.Lock.LockType) { - return p.baseLogicalPlan.PruneColumns(parentUsedCols, opt) + return p.baseLogicalPlan.PruneColumns(parentUsedCols, opt, p) } for tblID, cols := range p.tblID2Handle { @@ -525,11 +530,11 @@ func (p *LogicalLock) PruneColumns(parentUsedCols []*expression.Column, opt *log parentUsedCols = append(parentUsedCols, physTblIDCol) } } - return p.children[0].PruneColumns(parentUsedCols, opt) + return p.children[0].PruneColumns(parentUsedCols, opt, p) } // PruneColumns implements LogicalPlan interface. -func (p *LogicalWindow) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalWindow) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp, _ LogicalPlan) error { windowColumns := p.GetWindowResultColumns() cnt := 0 for _, col := range parentUsedCols { @@ -547,7 +552,7 @@ func (p *LogicalWindow) PruneColumns(parentUsedCols []*expression.Column, opt *l } parentUsedCols = parentUsedCols[:cnt] parentUsedCols = p.extractUsedCols(parentUsedCols) - err := p.children[0].PruneColumns(parentUsedCols, opt) + err := p.children[0].PruneColumns(parentUsedCols, opt, p) if err != nil { return err } @@ -573,14 +578,14 @@ func (p *LogicalWindow) extractUsedCols(parentUsedCols []*expression.Column) []* } // PruneColumns implements LogicalPlan interface. -func (p *LogicalLimit) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { +func (p *LogicalLimit) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp, _ LogicalPlan) error { if len(parentUsedCols) == 0 { // happens when LIMIT appears in UPDATE. return nil } savedUsedCols := make([]*expression.Column, len(parentUsedCols)) copy(savedUsedCols, parentUsedCols) - if err := p.children[0].PruneColumns(parentUsedCols, opt); err != nil { + if err := p.children[0].PruneColumns(parentUsedCols, opt, p); err != nil { return err } p.schema = nil @@ -706,11 +711,23 @@ func preferKeyColumnFromTable(dataSource *DataSource, originColumns []*expressio // PruneColumns implements the interface of LogicalPlan. // LogicalCTE just do a empty function call. It's logical optimize is indivisual phase. -func (*LogicalCTE) PruneColumns(_ []*expression.Column, _ *logicalOptimizeOp) error { +func (*LogicalCTE) PruneColumns(_ []*expression.Column, _ *logicalOptimizeOp, _ LogicalPlan) error { return nil } // PruneColumns implements the interface of LogicalPlan. -func (p *LogicalSequence) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp) error { - return p.children[len(p.children)-1].PruneColumns(parentUsedCols, opt) +func (p *LogicalSequence) PruneColumns(parentUsedCols []*expression.Column, opt *logicalOptimizeOp, _ LogicalPlan) error { + return p.children[len(p.children)-1].PruneColumns(parentUsedCols, opt, p) +} + +func applyEliminateTraceStep(lp LogicalPlan, opt *logicalOptimizeOp) { + action := func() string { + buffer := bytes.NewBufferString( + fmt.Sprintf("%v_%v is eliminated.", lp.TP(), lp.ID())) + return buffer.String() + } + reason := func() string { + return fmt.Sprintf("%v_%v can be eliminated because it hasn't been used by it's parent.", lp.TP(), lp.ID()) + } + opt.appendStepToCurrent(lp.ID(), lp.TP(), reason, action) } diff --git a/pkg/planner/core/rule_max_min_eliminate.go b/pkg/planner/core/rule_max_min_eliminate.go index 03cc81a60067d..b2b2bc2fabdd8 100644 --- a/pkg/planner/core/rule_max_min_eliminate.go +++ b/pkg/planner/core/rule_max_min_eliminate.go @@ -155,7 +155,8 @@ func (a *maxMinEliminator) splitAggFuncAndCheckIndices(agg *LogicalAggregation, newAgg := LogicalAggregation{AggFuncs: []*aggregation.AggFuncDesc{f}}.Init(agg.SCtx(), agg.SelectBlockOffset()) newAgg.SetChildren(a.cloneSubPlans(agg.children[0])) newAgg.schema = expression.NewSchema(agg.schema.Columns[i]) - if err := newAgg.PruneColumns([]*expression.Column{newAgg.schema.Columns[0]}, opt); err != nil { + // Since LogicalAggregation doesn’t use the parent LogicalPlan, passing an incorrect parameter here won’t affect subsequent optimizations. + if err := newAgg.PruneColumns([]*expression.Column{newAgg.schema.Columns[0]}, opt, newAgg); err != nil { return nil, false } aggs = append(aggs, newAgg) diff --git a/pkg/planner/core/testdata/plan_suite_unexported_out.json b/pkg/planner/core/testdata/plan_suite_unexported_out.json index ecc854447351e..dd58f8c94714f 100644 --- a/pkg/planner/core/testdata/plan_suite_unexported_out.json +++ b/pkg/planner/core/testdata/plan_suite_unexported_out.json @@ -137,7 +137,7 @@ "Join{Join{DataScan(t)->DataScan(x)->Aggr(firstrow(test.t.a))}(test.t.a,test.t.a)->Projection->DataScan(x)->Aggr(firstrow(test.t.a))}(test.t.a,test.t.a)->Projection->Projection", "Apply{DataScan(t1)->DataScan(t2)->Sel([eq(test.t.a, test.t.a)])->Projection->Sort->Limit}->Projection->Sel([eq(test.t.b, test.t.b)])->Projection", "Apply{DataScan(t2)->DataScan(t1)->Sel([eq(test.t.a, test.t.a)])->Projection}->Projection", - "Join{DataScan(t2)->DataScan(t1)->Aggr(firstrow(test.t.c),count(1))}(test.t.c,test.t.c)->Projection->Aggr(count(1))->Projection" + "DataScan(t2)->Aggr(count(1))->Projection" ] }, { diff --git a/pkg/planner/util/fixcontrol/get.go b/pkg/planner/util/fixcontrol/get.go index 6c53b344b5f15..1c2706fbb83a2 100644 --- a/pkg/planner/util/fixcontrol/get.go +++ b/pkg/planner/util/fixcontrol/get.go @@ -34,6 +34,8 @@ const ( Fix44855 uint64 = 44855 // Fix45132 controls whether to use access range row count to determine access path on the Skyline pruning. Fix45132 uint64 = 45132 + // Fix45822 controls whether to eliminate apply operator. + Fix45822 uint64 = 45822 // Fix45798 controls whether to cache plans that access generated columns. Fix45798 uint64 = 45798 // Fix46177 controls whether to explore enforced plans for DataSource if it has already found an unenforced plan. diff --git a/tests/integrationtest/r/planner/core/casetest/integration.result b/tests/integrationtest/r/planner/core/casetest/integration.result index 7f4553d8c0f2d..ec0588ee10a8d 100644 --- a/tests/integrationtest/r/planner/core/casetest/integration.result +++ b/tests/integrationtest/r/planner/core/casetest/integration.result @@ -376,15 +376,9 @@ id estRows task access object operator info HashJoin 10000.00 root CARTESIAN left outer semi join, other cond:eq(2, planner__core__casetest__integration.t.b) ├─TableReader(Build) 10000.00 root data:TableFullScan │ └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo -└─Projection(Probe) 10000.00 root 1->Column#27 - └─Apply 10000.00 root CARTESIAN left outer join - ├─TableReader(Build) 10000.00 root data:TableFullScan - │ └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo - └─Projection(Probe) 10000.00 root 1->Column#25 - └─Limit 10000.00 root offset:0, count:1 - └─TableReader 10000.00 root data:Limit - └─Limit 10000.00 cop[tikv] offset:0, count:1 - └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo +└─Projection(Probe) 10000.00 root 1->Column#23 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo select (2) in (select b from t) from (select t.a < (select t.a from t t1 limit 1) from t) t; (2) in (select b from t) 1