From cf0a91c197d61d30730db645d5b8d3b2c7ea7d22 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Thu, 9 Nov 2023 12:14:12 +0800 Subject: [PATCH] planner, CTE: Fix default inline CTE which contains agg or window function and refactor inline CTE strategy (#48437) close pingcap/tidb#47711 --- .../testdata/flat_plan_suite_out.json | 75 +---------- .../hint/testdata/integration_suite_out.json | 23 ++-- .../physicalplantest/physical_plan_test.go | 2 + .../testdata/plan_suite_in.json | 9 +- .../testdata/plan_suite_out.json | 123 +++++++++++++++++- .../casetest/planstats/plan_stats_test.go | 8 +- pkg/planner/core/logical_plan_builder.go | 57 ++++++-- pkg/planner/core/planbuilder.go | 7 + pkg/planner/core/preprocess.go | 7 + tests/integrationtest/r/cte.result | 12 -- tests/integrationtest/r/explain_cte.result | 27 ---- tests/integrationtest/t/cte.test | 1 - tests/integrationtest/t/explain_cte.test | 5 - 13 files changed, 208 insertions(+), 148 deletions(-) diff --git a/pkg/planner/core/casetest/flatplan/testdata/flat_plan_suite_out.json b/pkg/planner/core/casetest/flatplan/testdata/flat_plan_suite_out.json index 9a21833258c5f..1839322a235dc 100644 --- a/pkg/planner/core/casetest/flatplan/testdata/flat_plan_suite_out.json +++ b/pkg/planner/core/casetest/flatplan/testdata/flat_plan_suite_out.json @@ -212,8 +212,8 @@ { "Depth": 2, "Label": 0, - "IsRoot": true, - "StoreType": 2, + "IsRoot": false, + "StoreType": 0, "ReqType": 0, "IsPhysicalPlan": true, "TextTreeIndent": "│ │ ", @@ -232,80 +232,15 @@ { "Depth": 2, "Label": 0, - "IsRoot": true, - "StoreType": 2, + "IsRoot": false, + "StoreType": 0, "ReqType": 0, "IsPhysicalPlan": true, "TextTreeIndent": " │ ", "IsLastChild": true } ], - "CTEs": [ - [ - { - "Depth": 0, - "Label": 0, - "IsRoot": true, - "StoreType": 2, - "ReqType": 0, - "IsPhysicalPlan": true, - "TextTreeIndent": "", - "IsLastChild": true - }, - { - "Depth": 1, - "Label": 3, - "IsRoot": true, - "StoreType": 2, - "ReqType": 0, - "IsPhysicalPlan": true, - "TextTreeIndent": "│ ", - "IsLastChild": true - }, - { - "Depth": 2, - "Label": 0, - "IsRoot": false, - "StoreType": 0, - "ReqType": 0, - "IsPhysicalPlan": true, - "TextTreeIndent": " │ ", - "IsLastChild": true - } - ], - [ - { - "Depth": 0, - "Label": 0, - "IsRoot": true, - "StoreType": 2, - "ReqType": 0, - "IsPhysicalPlan": true, - "TextTreeIndent": "", - "IsLastChild": true - }, - { - "Depth": 1, - "Label": 3, - "IsRoot": true, - "StoreType": 2, - "ReqType": 0, - "IsPhysicalPlan": true, - "TextTreeIndent": "│ ", - "IsLastChild": true - }, - { - "Depth": 2, - "Label": 0, - "IsRoot": false, - "StoreType": 0, - "ReqType": 0, - "IsPhysicalPlan": true, - "TextTreeIndent": " │ ", - "IsLastChild": true - } - ] - ] + "CTEs": null }, { "SQL": "WITH RECURSIVE cte (n) AS( SELECT 1 UNION ALL SELECT n + 1 FROM cte WHERE n < 5)SELECT * FROM cte;", diff --git a/pkg/planner/core/casetest/hint/testdata/integration_suite_out.json b/pkg/planner/core/casetest/hint/testdata/integration_suite_out.json index 8bdc44ff8656a..369189e4531db 100644 --- a/pkg/planner/core/casetest/hint/testdata/integration_suite_out.json +++ b/pkg/planner/core/casetest/hint/testdata/integration_suite_out.json @@ -798,24 +798,21 @@ { "SQL": "explain format = 'brief' select /*+ qb_name(qb_v8, v8), merge(@qb_v8) */ * from v8;", "Plan": [ - "HashAgg 16000.00 root group by:Column#21, funcs:firstrow(Column#21)->Column#21", + "HashAgg 16000.00 root group by:Column#41, funcs:firstrow(Column#41)->Column#41", "└─Union 1000000010000.00 root ", " ├─HashJoin 1000000000000.00 root CARTESIAN inner join", " │ ├─TableReader(Build) 10000.00 root data:TableFullScan", " │ │ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - " │ └─CTEFullScan(Probe) 100000000.00 root CTE:cte2 data:CTE_1", + " │ └─Projection(Probe) 100000000.00 root 1->Column#55", + " │ └─HashJoin 100000000.00 root CARTESIAN inner join", + " │ ├─Projection(Build) 10000.00 root 1->Column#54", + " │ │ └─IndexReader 10000.00 root index:IndexFullScan", + " │ │ └─IndexFullScan 10000.00 cop[tikv] table:t3, index:idx_a(a) keep order:false, stats:pseudo", + " │ └─Projection(Probe) 10000.00 root 1->Column#53", + " │ └─IndexReader 10000.00 root index:IndexFullScan", + " │ └─IndexFullScan 10000.00 cop[tikv] table:t2, index:idx_a(a) keep order:false, stats:pseudo", " └─TableReader 10000.00 root data:TableFullScan", - " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", - "CTE_1 100000000.00 root Non-Recursive CTE", - "└─HashJoin(Seed Part) 100000000.00 root CARTESIAN inner join", - " ├─CTEFullScan(Build) 10000.00 root CTE:cte4 data:CTE_3", - " └─CTEFullScan(Probe) 10000.00 root CTE:cte3 data:CTE_2", - "CTE_3 10000.00 root Non-Recursive CTE", - "└─IndexReader(Seed Part) 10000.00 root index:IndexFullScan", - " └─IndexFullScan 10000.00 cop[tikv] table:t3, index:idx_a(a) keep order:false, stats:pseudo", - "CTE_2 10000.00 root Non-Recursive CTE", - "└─IndexReader(Seed Part) 10000.00 root index:IndexFullScan", - " └─IndexFullScan 10000.00 cop[tikv] table:t2, index:idx_a(a) keep order:false, stats:pseudo" + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Warn": null }, diff --git a/pkg/planner/core/casetest/physicalplantest/physical_plan_test.go b/pkg/planner/core/casetest/physicalplantest/physical_plan_test.go index 0d481614c6e25..e54f91b3ffc42 100644 --- a/pkg/planner/core/casetest/physicalplantest/physical_plan_test.go +++ b/pkg/planner/core/casetest/physicalplantest/physical_plan_test.go @@ -1206,6 +1206,8 @@ func TestSingleConsumerCTE(t *testing.T) { tk.MustExec("use test") tk.MustExec("drop table if exists t;") tk.MustExec("CREATE TABLE `t` (`a` int(11));") + tk.MustExec("create table t1 (c1 int primary key, c2 int, index c2 (c2));") + tk.MustExec("create table t2 (c1 int unique, c2 int);") tk.MustExec("insert into t values (1), (5), (10), (15), (20), (30), (50);") var ( diff --git a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_in.json b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_in.json index ec0cedcc67ec8..4b2cff277ce06 100644 --- a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_in.json +++ b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_in.json @@ -550,6 +550,10 @@ { "name": "TestSingleConsumerCTE", "cases": [ + "with recursive cte1(c1) as (select c1 from t1 union select c1 from t2 limit 1) select * from cte1; -- non-recursive limit, inline cte1", + "with recursive cte1(c1) as (select c1 from t1 union select c1 from t2 limit 100 offset 100) select * from cte1; -- non-recursive limit, inline cte1", + "with recursive cte1(c1) as (select c1 from t1 union select c1 from t2 limit 0 offset 0) select * from cte1; -- non-recursive limit, inline cte1", + "with cte1 as (select 1), cte2 as (select 2) select * from cte1 union (with cte2 as (select 3) select * from cte2 union all select * from cte2) -- inline cte1, not inline cte2", "with base1 as (WITH RECURSIVE cte(a) AS (with tmp as (select 1 as a) SELECT a from tmp UNION SELECT a+1 FROM cte) SELECT * FROM cte) select * from base1; -- issue #43318", "with cte as (select 1) select * from cte; -- inline cte", "with cte1 as (select 1), cte2 as (select 2) select * from cte1 union select * from cte2; -- inline cte1, cte2", @@ -564,7 +568,10 @@ "set tidb_opt_force_inline_cte=1; -- enable force inline CTE", "with cte as (select 1) select * from cte union select * from cte; -- force inline cte while multi-consumer", "set tidb_opt_force_inline_cte=0; -- disable force inline CTE", - "with cte as (select 1) select /*+ MERGE() */ * from cte union select * from cte; -- firstly inline cte, secondly cannot be inlined" + "with cte as (select 1) select /*+ MERGE() */ * from cte union select * from cte; -- firstly inline cte, secondly cannot be inlined", + "with a as (select 8 as id from dual),maxa as (select max(id) as max_id from a),b as (with recursive temp as (select 1 as lvl from dual union all select lvl+1 from temp, maxa where lvl < max_id)select * from temp) select * from b; -- issue #47711, maxa cannot be inlined because it contains agg and in the recursive part of cte temp", + "with a as (select count(*) from t1), b as (select 2 as bb from a), c as (with recursive tmp as (select 1 as res from t1 union all select res+1 from tmp,b where res+1 < bb) select * from tmp) select * from c; -- inline a, cannot be inline b because b indirectly contains agg and in the recursive part of cte tmp", + "with a as (select count(*) from t1), b as (select 2 as bb from a), c as (with recursive tmp as (select bb as res from b union all select res+1 from tmp where res +1 < 10) select * from tmp) select * from c; -- inline a, b, cannot be inline tmp, c" ] }, { diff --git a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json index 819ee1a8a225d..899c6f8922c1d 100644 --- a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json +++ b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json @@ -2774,7 +2774,7 @@ " └─CTETable 1.00 root Scan on CTE_0" ], "Warning": [ - "[planner:1815]Recursive CTE can not be inlined." + "[planner:1815]Recursive CTE cte1 can not be inlined by merge() or tidb_opt_force_inline_cte." ] }, { @@ -3023,7 +3023,9 @@ " └─Selection 0.80 root lt(Column#3, 100)", " └─CTETable 1.00 root Scan on CTE_0" ], - "Warning": null + "Warning": [ + "[planner:1815]Recursive CTE cte1 can not be inlined by merge() or tidb_opt_force_inline_cte." + ] }, { "SQL": "with cte1 as (with cte2 as (select * from t) select * from cte2) select * from cte1; -- non-recursive 'cte2' definition inside another non-recursive 'cte1'", @@ -3044,7 +3046,9 @@ " └─Selection 8000.00 root lt(test.t.a, 100)", " └─CTETable 10000.00 root Scan on CTE_0" ], - "Warning": null + "Warning": [ + "[planner:1815]Recursive CTE cte2 can not be inlined by merge() or tidb_opt_force_inline_cte." + ] }, { "SQL": "with cte1 as (with recursive cte2(c1) as (select 1 union select c1 + 1 c1 from cte2 where c1 < 100) select * from cte2) select * from cte1; -- recursive 'cte2' inside non-recursive 'cte1'", @@ -3057,7 +3061,10 @@ " └─Selection 0.80 root lt(Column#10, 100)", " └─CTETable 1.00 root Scan on CTE_2" ], - "Warning": null + "Warning": [ + "[planner:1815]Recursive CTE cte2 can not be inlined by merge() or tidb_opt_force_inline_cte.", + "[planner:1815]Recursive CTE cte2 can not be inlined by merge() or tidb_opt_force_inline_cte." + ] }, { "SQL": "set tidb_opt_force_inline_cte=0; -- disable force inline CTE", @@ -3092,7 +3099,7 @@ " └─CTETable 1.00 root Scan on CTE_0" ], "Warning": [ - "[planner:1815]Recursive CTE can not be inlined." + "[planner:1815]Recursive CTE cte1 can not be inlined by merge() or tidb_opt_force_inline_cte." ] }, { @@ -3134,6 +3141,55 @@ { "Name": "TestSingleConsumerCTE", "Cases": [ + { + "SQL": "with recursive cte1(c1) as (select c1 from t1 union select c1 from t2 limit 1) select * from cte1; -- non-recursive limit, inline cte1", + "Plan": [ + "Limit 1.00 root offset:0, count:1", + "└─HashAgg 1.00 root group by:Column#18, funcs:firstrow(Column#18)->Column#18", + " └─Union 20000.00 root ", + " ├─TableReader 10000.00 root data:TableFullScan", + " │ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─IndexReader 10000.00 root index:IndexFullScan", + " └─IndexFullScan 10000.00 cop[tikv] table:t2, index:c1(c1) keep order:false, stats:pseudo" + ], + "Warning": null + }, + { + "SQL": "with recursive cte1(c1) as (select c1 from t1 union select c1 from t2 limit 100 offset 100) select * from cte1; -- non-recursive limit, inline cte1", + "Plan": [ + "Limit 100.00 root offset:100, count:100", + "└─HashAgg 200.00 root group by:Column#18, funcs:firstrow(Column#18)->Column#18", + " └─Union 20000.00 root ", + " ├─TableReader 10000.00 root data:TableFullScan", + " │ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─IndexReader 10000.00 root index:IndexFullScan", + " └─IndexFullScan 10000.00 cop[tikv] table:t2, index:c1(c1) keep order:false, stats:pseudo" + ], + "Warning": null + }, + { + "SQL": "with recursive cte1(c1) as (select c1 from t1 union select c1 from t2 limit 0 offset 0) select * from cte1; -- non-recursive limit, inline cte1", + "Plan": [ + "TableDual 0.00 root rows:0" + ], + "Warning": null + }, + { + "SQL": "with cte1 as (select 1), cte2 as (select 2) select * from cte1 union (with cte2 as (select 3) select * from cte2 union all select * from cte2) -- inline cte1, not inline cte2", + "Plan": [ + "HashAgg 3.00 root group by:Column#9, funcs:firstrow(Column#9)->Column#9", + "└─Union 3.00 root ", + " ├─Projection 1.00 root 1->Column#9", + " │ └─TableDual 1.00 root rows:1", + " └─Union 2.00 root ", + " ├─CTEFullScan 1.00 root CTE:cte2 data:CTE_2", + " └─CTEFullScan 1.00 root CTE:cte2 data:CTE_2", + "CTE_2 1.00 root Non-Recursive CTE", + "└─Projection(Seed Part) 1.00 root 3->Column#5", + " └─TableDual 1.00 root rows:1" + ], + "Warning": null + }, { "SQL": "with base1 as (WITH RECURSIVE cte(a) AS (with tmp as (select 1 as a) SELECT a from tmp UNION SELECT a+1 FROM cte) SELECT * FROM cte) select * from base1; -- issue #43318", "Plan": [ @@ -3287,6 +3343,63 @@ " └─TableDual 1.00 root rows:1" ], "Warning": null + }, + { + "SQL": "with a as (select 8 as id from dual),maxa as (select max(id) as max_id from a),b as (with recursive temp as (select 1 as lvl from dual union all select lvl+1 from temp, maxa where lvl < max_id)select * from temp) select * from b; -- issue #47711, maxa cannot be inlined because it contains agg and in the recursive part of cte temp", + "Plan": [ + "CTEFullScan 1.64 root CTE:temp data:CTE_4", + "CTE_4 1.64 root Recursive CTE", + "├─Projection(Seed Part) 1.00 root 1->Column#14", + "│ └─TableDual 1.00 root rows:1", + "└─Projection(Recursive Part) 0.64 root cast(plus(Column#15, 1), bigint(1) BINARY)->Column#18", + " └─HashJoin 0.64 root CARTESIAN inner join, other cond:lt(Column#15, Column#16)", + " ├─Selection(Build) 0.80 root not(isnull(Column#16))", + " │ └─CTEFullScan 1.00 root CTE:maxa data:CTE_1", + " └─Selection(Probe) 0.80 root not(isnull(Column#15))", + " └─CTETable 1.00 root Scan on CTE_4", + "CTE_1 1.00 root Non-Recursive CTE", + "└─StreamAgg(Seed Part) 1.00 root funcs:max(Column#3)->Column#4", + " └─Projection 1.00 root 8->Column#3", + " └─TableDual 1.00 root rows:1" + ], + "Warning": null + }, + { + "SQL": "with a as (select count(*) from t1), b as (select 2 as bb from a), c as (with recursive tmp as (select 1 as res from t1 union all select res+1 from tmp,b where res+1 < bb) select * from tmp) select * from c; -- inline a, cannot be inline b because b indirectly contains agg and in the recursive part of cte tmp", + "Plan": [ + "CTEFullScan 20000.00 root CTE:tmp data:CTE_4", + "CTE_4 20000.00 root Recursive CTE", + "├─Projection(Seed Part) 10000.00 root 1->Column#26", + "│ └─TableReader 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─Projection(Recursive Part) 10000.00 root cast(plus(Column#27, 1), bigint(1) BINARY)->Column#30", + " └─HashJoin 10000.00 root CARTESIAN inner join, other cond:lt(plus(Column#27, 1), Column#28)", + " ├─CTEFullScan(Build) 1.00 root CTE:b data:CTE_1", + " └─CTETable(Probe) 10000.00 root Scan on CTE_4", + "CTE_1 1.00 root Non-Recursive CTE", + "└─Projection(Seed Part) 1.00 root 2->Column#8", + " └─HashAgg 1.00 root funcs:count(Column#37)->Column#35", + " └─TableReader 1.00 root data:HashAgg", + " └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#37", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Warning": null + }, + { + "SQL": "with a as (select count(*) from t1), b as (select 2 as bb from a), c as (with recursive tmp as (select bb as res from b union all select res+1 from tmp where res +1 < 10) select * from tmp) select * from c; -- inline a, b, cannot be inline tmp, c", + "Plan": [ + "CTEFullScan 1.80 root CTE:tmp data:CTE_4", + "CTE_4 1.80 root Recursive CTE", + "├─Projection(Seed Part) 1.00 root 2->Column#37", + "│ └─HashAgg 1.00 root funcs:count(Column#46)->Column#44", + "│ └─TableReader 1.00 root data:HashAgg", + "│ └─HashAgg 1.00 cop[tikv] funcs:count(1)->Column#46", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─Projection(Recursive Part) 0.80 root cast(plus(Column#38, 1), bigint(1) BINARY)->Column#40", + " └─Selection 0.80 root lt(plus(Column#38, 1), 10)", + " └─CTETable 1.00 root Scan on CTE_4" + ], + "Warning": null } ] }, diff --git a/pkg/planner/core/casetest/planstats/plan_stats_test.go b/pkg/planner/core/casetest/planstats/plan_stats_test.go index f29b42d2bfbf4..dde452f1ba820 100644 --- a/pkg/planner/core/casetest/planstats/plan_stats_test.go +++ b/pkg/planner/core/casetest/planstats/plan_stats_test.go @@ -166,13 +166,13 @@ func TestPlanStatsLoad(t *testing.T) { { // CTE sql: "with cte(x, y) as (select d + 1, b from t where c > 1) select * from cte where x < 3", check: func(p plannercore.Plan, tableInfo *model.TableInfo) { - ps, ok := p.(*plannercore.PhysicalSelection) + ps, ok := p.(*plannercore.PhysicalProjection) require.True(t, ok) - pc, ok := ps.Children()[0].(*plannercore.PhysicalCTE) + pc, ok := ps.Children()[0].(*plannercore.PhysicalTableReader) require.True(t, ok) - pp, ok := pc.SeedPlan.(*plannercore.PhysicalProjection) + pp, ok := pc.GetTablePlan().(*plannercore.PhysicalSelection) require.True(t, ok) - reader, ok := pp.Children()[0].(*plannercore.PhysicalTableReader) + reader, ok := pp.Children()[0].(*plannercore.PhysicalTableScan) require.True(t, ok) require.Greater(t, countFullStats(reader.StatsInfo().HistColl, tableInfo.Columns[2].ID), 0) }, diff --git a/pkg/planner/core/logical_plan_builder.go b/pkg/planner/core/logical_plan_builder.go index 307ebfd428512..812b27cedb350 100644 --- a/pkg/planner/core/logical_plan_builder.go +++ b/pkg/planner/core/logical_plan_builder.go @@ -345,6 +345,10 @@ func (b *PlanBuilder) buildAggregation(ctx context.Context, p LogicalPlan, aggFu if b.ctx.GetSessionVars().EnableSkewDistinctAgg { b.optFlag |= flagSkewDistinctAgg } + // flag it if cte contain aggregation + if b.buildingCTE { + b.outerCTEs[len(b.outerCTEs)-1].containAggOrWindow = true + } var rollupExpand *LogicalExpand if expand, ok := p.(*LogicalExpand); ok { rollupExpand = expand @@ -4346,7 +4350,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L // In particular, recursive CTE have separate warnings, so they are no longer called. if b.buildingCTE { if b.isCTE { - b.outerCTEs[len(b.outerCTEs)-1].isInline = true + b.outerCTEs[len(b.outerCTEs)-1].forceInlineByHintOrVar = true } else if !b.buildingRecursivePartForCTE { // If there has subquery which is not CTE and using `MERGE()` hint, we will show this warning; b.ctx.GetSessionVars().StmtCtx.AppendWarning( @@ -4876,10 +4880,13 @@ func (b *PlanBuilder) tryBuildCTE(ctx context.Context, tn *ast.TableName, asName prevSchema := cte.seedLP.Schema().Clone() lp.SetSchema(getResultCTESchema(cte.seedLP.Schema(), b.ctx.GetSessionVars())) - if cte.recurLP != nil && cte.isInline { - b.ctx.GetSessionVars().StmtCtx.AppendWarning( - ErrInternal.GenWithStack("Recursive CTE can not be inlined.")) + // If current CTE query contain another CTE which 'containAggOrWindow' is true, current CTE 'containAggOrWindow' will be true + if b.buildingCTE { + b.outerCTEs[len(b.outerCTEs)-1].containAggOrWindow = cte.containAggOrWindow || b.outerCTEs[len(b.outerCTEs)-1].containAggOrWindow } + // Compute cte inline + b.computeCTEInlineFlag(cte) + if cte.recurLP == nil && cte.isInline { saveCte := make([]*cteInfo, len(b.outerCTEs[i:])) copy(saveCte, b.outerCTEs[i:]) @@ -4916,6 +4923,36 @@ func (b *PlanBuilder) tryBuildCTE(ctx context.Context, tn *ast.TableName, asName return nil, nil } +// computeCTEInlineFlag, Combine the declaration of CTE and the use of CTE to jointly determine **whether a CTE can be inlined** +/* + There are some cases that CTE must be not inlined. + 1. CTE is recursive CTE. + 2. CTE contains agg or window and it is referenced by recursive part of CTE. + 3. Consumer count of CTE is more than one. + If 1 or 2 conditions are met, CTE cannot be inlined. + But if query is hint by 'merge()' or session variable "tidb_opt_force_inline_cte", + CTE will still not be inlined but a warning will be recorded "Hint or session variables are invalid" + If 3 condition is met, CTE can be inlined by hint and session variables. +*/ +func (b *PlanBuilder) computeCTEInlineFlag(cte *cteInfo) { + if cte.recurLP != nil { + if cte.forceInlineByHintOrVar { + b.ctx.GetSessionVars().StmtCtx.AppendWarning( + ErrInternal.GenWithStack("Recursive CTE %s can not be inlined by merge() or tidb_opt_force_inline_cte.", cte.def.Name)) + } + } else if cte.containAggOrWindow && b.buildingRecursivePartForCTE { + if cte.forceInlineByHintOrVar { + b.ctx.GetSessionVars().StmtCtx.AppendWarning(ErrCTERecursiveForbidsAggregation.FastGenByArgs(cte.def.Name)) + } + } else if cte.consumerCount > 1 { + if cte.forceInlineByHintOrVar { + cte.isInline = true + } + } else { + cte.isInline = true + } +} + func (b *PlanBuilder) buildDataSourceFromCTEMerge(ctx context.Context, cte *ast.CommonTableExpression) (LogicalPlan, error) { p, err := b.buildResultSetNode(ctx, cte.Query.Query, true) if err != nil { @@ -6941,6 +6978,9 @@ func sortWindowSpecs(groupedFuncs map[*ast.WindowSpec][]*ast.WindowFuncExpr, ord } func (b *PlanBuilder) buildWindowFunctions(ctx context.Context, p LogicalPlan, groupedFuncs map[*ast.WindowSpec][]*ast.WindowFuncExpr, orderedSpec []*ast.WindowSpec, aggMap map[*ast.AggregateFuncExpr]int) (LogicalPlan, map[*ast.WindowFuncExpr]int, error) { + if b.buildingCTE { + b.outerCTEs[len(b.outerCTEs)-1].containAggOrWindow = true + } args := make([]ast.ExprNode, 0, 4) windowMap := make(map[*ast.WindowFuncExpr]int) for _, window := range sortWindowSpecs(groupedFuncs, orderedSpec) { @@ -7832,16 +7872,13 @@ func (b *PlanBuilder) buildWith(ctx context.Context, w *ast.WithClause) ([]*cteI } ctes := make([]*cteInfo, 0, len(w.CTEs)) for _, cte := range w.CTEs { - b.outerCTEs = append(b.outerCTEs, &cteInfo{def: cte, nonRecursive: !w.IsRecursive, isBuilding: true, storageID: b.allocIDForCTEStorage, seedStat: &property.StatsInfo{}}) + b.outerCTEs = append(b.outerCTEs, &cteInfo{def: cte, nonRecursive: !w.IsRecursive, isBuilding: true, storageID: b.allocIDForCTEStorage, seedStat: &property.StatsInfo{}, consumerCount: cte.ConsumerCount}) b.allocIDForCTEStorage++ saveFlag := b.optFlag // Init the flag to flagPrunColumns, otherwise it's missing. b.optFlag = flagPrunColumns - // Case1: If the current CTE has only one consumer, the default is set to inline CTE - // Case2: If the session variable "tidb_opt_force_inline_cte" is true, all of CTEs will be inlined. - // Otherwise, whether CTEs are inlined depends on whether the merge() hint is declared. - if !cte.IsRecursive && (cte.ConsumerCount == 1 || b.ctx.GetSessionVars().EnableForceInlineCTE()) { - b.outerCTEs[len(b.outerCTEs)-1].isInline = true + if b.ctx.GetSessionVars().EnableForceInlineCTE() { + b.outerCTEs[len(b.outerCTEs)-1].forceInlineByHintOrVar = true } _, err := b.buildCte(ctx, cte, w.IsRecursive) if err != nil { diff --git a/pkg/planner/core/planbuilder.go b/pkg/planner/core/planbuilder.go index d4ec806e2a6a2..c5b0a464d3896 100644 --- a/pkg/planner/core/planbuilder.go +++ b/pkg/planner/core/planbuilder.go @@ -490,7 +490,14 @@ type cteInfo struct { // The LogicalCTEs that reference the same table should share the same CteClass. cteClass *CTEClass + // isInline will determine whether it can be inlined when **CTE is used** isInline bool + // forceInlineByHintOrVar will be true when CTE is hint by merge() or session variable "tidb_opt_force_inline_cte=true" + forceInlineByHintOrVar bool + // If CTE contain aggregation or window function in query (Indirect references to other cte containing agg or window in the query are also counted.) + containAggOrWindow bool + // Compute in preprocess phase. Record how many consumers the current CTE has + consumerCount int } type subQueryCtx = uint64 diff --git a/pkg/planner/core/preprocess.go b/pkg/planner/core/preprocess.go index 0ba0cd8f0be50..ea9ae7525d967 100644 --- a/pkg/planner/core/preprocess.go +++ b/pkg/planner/core/preprocess.go @@ -292,6 +292,9 @@ func (p *preprocessor) Enter(in ast.Node) (out ast.Node, skipChildren bool) { p.showTp = node.Tp p.resolveShowStmt(node) case *ast.SetOprSelectList: + if node.With != nil { + p.preprocessWith.cteStack = append(p.preprocessWith.cteStack, node.With.CTEs) + } p.checkSetOprSelectList(node) case *ast.DeleteTableList: p.stmtTp = TypeDelete @@ -649,6 +652,10 @@ func (p *preprocessor) Leave(in ast.Node) (out ast.Node, ok bool) { if x.With != nil { p.preprocessWith.cteStack = p.preprocessWith.cteStack[0 : len(p.preprocessWith.cteStack)-1] } + case *ast.SetOprSelectList: + if x.With != nil { + p.preprocessWith.cteStack = p.preprocessWith.cteStack[0 : len(p.preprocessWith.cteStack)-1] + } } return in, p.err == nil diff --git a/tests/integrationtest/r/cte.result b/tests/integrationtest/r/cte.result index 46c7233cf6b44..1e24a8e08a1aa 100644 --- a/tests/integrationtest/r/cte.result +++ b/tests/integrationtest/r/cte.result @@ -790,15 +790,3 @@ with cte1 as (select 1), cte2 as (select 2) select * from cte1 union (with cte2 1 1 3 -explain with cte1 as (select 1), cte2 as (select 2) select * from cte1 union (with cte2 as (select 3) select * from cte2 union all select * from cte2); -id estRows task access object operator info -HashAgg_24 3.00 root group by:Column#9, funcs:firstrow(Column#9)->Column#9 -└─Union_25 3.00 root - ├─Projection_26 1.00 root 1->Column#9 - │ └─TableDual_27 1.00 root rows:1 - └─Union_29 2.00 root - ├─CTEFullScan_31 1.00 root CTE:cte2 data:CTE_2 - └─CTEFullScan_33 1.00 root CTE:cte2 data:CTE_2 -CTE_2 1.00 root Non-Recursive CTE -└─Projection_22(Seed Part) 1.00 root 3->Column#5 - └─TableDual_23 1.00 root rows:1 diff --git a/tests/integrationtest/r/explain_cte.result b/tests/integrationtest/r/explain_cte.result index f0cd69583a59b..d22e165e87baf 100644 --- a/tests/integrationtest/r/explain_cte.result +++ b/tests/integrationtest/r/explain_cte.result @@ -166,33 +166,6 @@ CTE_0 8001.00 root Recursive CTE, limit(offset:0, count:0) │ └─TableFullScan_17 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo └─Projection_21(Recursive Part) 10000.00 root cast(plus(explain_cte.t1.c1, 1), int(11))->explain_cte.t1.c1 └─CTETable_22 10000.00 root Scan on CTE_0 -explain with recursive cte1(c1) as (select c1 from t1 union select c1 from t2 limit 1) select * from cte1; -id estRows task access object operator info -CTEFullScan_34 1.00 root CTE:cte1 data:CTE_0 -CTE_0 1.00 root Non-Recursive CTE -└─Limit_20(Seed Part) 1.00 root offset:0, count:1 - └─HashAgg_21 1.00 root group by:Column#11, funcs:firstrow(Column#11)->Column#11 - └─Union_22 20000.00 root - ├─TableReader_25 10000.00 root data:TableFullScan_24 - │ └─TableFullScan_24 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo - └─IndexReader_32 10000.00 root index:IndexFullScan_31 - └─IndexFullScan_31 10000.00 cop[tikv] table:t2, index:c1(c1) keep order:false, stats:pseudo -explain with recursive cte1(c1) as (select c1 from t1 union select c1 from t2 limit 100 offset 100) select * from cte1; -id estRows task access object operator info -CTEFullScan_34 100.00 root CTE:cte1 data:CTE_0 -CTE_0 100.00 root Non-Recursive CTE -└─Limit_20(Seed Part) 100.00 root offset:100, count:100 - └─HashAgg_21 200.00 root group by:Column#11, funcs:firstrow(Column#11)->Column#11 - └─Union_22 20000.00 root - ├─TableReader_25 10000.00 root data:TableFullScan_24 - │ └─TableFullScan_24 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo - └─IndexReader_32 10000.00 root index:IndexFullScan_31 - └─IndexFullScan_31 10000.00 cop[tikv] table:t2, index:c1(c1) keep order:false, stats:pseudo -explain with recursive cte1(c1) as (select c1 from t1 union select c1 from t2 limit 0 offset 0) select * from cte1; -id estRows task access object operator info -CTEFullScan_19 0.00 root CTE:cte1 data:CTE_0 -CTE_0 0.00 root Non-Recursive CTE -└─TableDual_16(Seed Part) 0.00 root rows:0 CREATE TABLE `customer` ( `c_customer_sk` int(11) NOT NULL, `c_customer_id` char(16) NOT NULL, diff --git a/tests/integrationtest/t/cte.test b/tests/integrationtest/t/cte.test index 1f4e4b3704e45..d5fe902434f10 100644 --- a/tests/integrationtest/t/cte.test +++ b/tests/integrationtest/t/cte.test @@ -336,4 +336,3 @@ INSERT INTO `t_dnmxh` VALUES (104,571000,NULL),(104,572000,44.37),(104,573000,59 WITH cte_0 AS (select distinct ref_0.wkey as c0, ref_0.pkey as c1, ref_0.c_xhsndb as c2 from t_dnmxh as ref_0 where (1 <= ( select ref_1.pkey not in ( select ref_5.wkey as c0 from t_dnmxh as ref_5 where (ref_5.wkey < ( select ref_6.pkey as c0 from t_cqmg3b as ref_6 where 88 between 96 and 76)) ) as c0 from (t_cqmg3b as ref_1 left outer join t_dnmxh as ref_2 on (ref_1.wkey = ref_2.wkey )) where ref_0.c_xhsndb is NULL union select 33 <= 91 as c0 from t_cqmg3b as ref_8 ))), cte_1 AS (select ref_9.wkey as c0, ref_9.pkey as c1, ref_9.c_anpf_c as c2, ref_9.c_b_fp_c as c3, ref_9.c_ndccfb as c4, ref_9.c_8rswc as c5 from t_cqmg3b as ref_9) select count(1) from cte_0 as ref_10 where case when 56 < 50 then case when 100 in ( select distinct ref_11.c4 as c0 from cte_1 as ref_11 where (ref_11.c4 > ( select ref_13.pkey as c0 from t_dnmxh as ref_13 where (ref_13.wkey > ( select distinct ref_11.c1 as c0 from cte_0 as ref_14)) )) or (1 = 1)) then null else null end else '7mxv6' end not like 'ki4%vc'; #case with cte1 as (select 1), cte2 as (select 2) select * from cte1 union (with cte2 as (select 3) select * from cte2 union all select * from cte2) order by 1; -explain with cte1 as (select 1), cte2 as (select 2) select * from cte1 union (with cte2 as (select 3) select * from cte2 union all select * from cte2); diff --git a/tests/integrationtest/t/explain_cte.test b/tests/integrationtest/t/explain_cte.test index 646fb15eab625..7da1523b47911 100644 --- a/tests/integrationtest/t/explain_cte.test +++ b/tests/integrationtest/t/explain_cte.test @@ -38,11 +38,6 @@ explain with recursive cte1(c1) as (select c1 from t1 union select c1 + 1 c1 fro explain with recursive cte1(c1) as (select c1 from t1 union select c1 + 1 c1 from cte1 limit 1) select * from cte1 dt1 join cte1 dt2 on dt1.c1 = dt2.c1; explain with recursive cte1(c1) as (select c1 from t1 union select c1 + 1 c1 from cte1 limit 0 offset 0) select * from cte1 dt1 join cte1 dt2 on dt1.c1 = dt2.c1; -# non-recursive limit -explain with recursive cte1(c1) as (select c1 from t1 union select c1 from t2 limit 1) select * from cte1; -explain with recursive cte1(c1) as (select c1 from t1 union select c1 from t2 limit 100 offset 100) select * from cte1; -explain with recursive cte1(c1) as (select c1 from t1 union select c1 from t2 limit 0 offset 0) select * from cte1; - # TPC-DS Q11 CREATE TABLE `customer` ( `c_customer_sk` int(11) NOT NULL,