Skip to content

Commit d355f69

Browse files
authored
Add rule to reimplement Eliminate cross join and remove it in planner (#4185)
* reimplement eliminate_cross_join * add test for subquery alias and projection alias. * add test * fix fmt * review * fmt * fix conflict
1 parent bfce076 commit d355f69

File tree

9 files changed

+404
-444
lines changed

9 files changed

+404
-444
lines changed

benchmarks/expected-plans/q2.txt

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,25 @@
11
Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST
22
Projection: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment
3-
Inner Join: part.p_partkey = __sq_1.ps_partkey, partsupp.ps_supplycost = __sq_1.__value
4-
Inner Join: nation.n_regionkey = region.r_regionkey
5-
Inner Join: supplier.s_nationkey = nation.n_nationkey
6-
Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
7-
Inner Join: part.p_partkey = partsupp.ps_partkey
8-
Filter: part.p_size = Int32(15) AND part.p_type LIKE Utf8("%BRASS")
9-
TableScan: part projection=[p_partkey, p_mfgr, p_type, p_size]
10-
TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
11-
TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]
12-
TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
13-
Filter: region.r_name = Utf8("EUROPE")
14-
TableScan: region projection=[r_regionkey, r_name]
15-
Projection: partsupp.ps_partkey, MIN(partsupp.ps_supplycost) AS __value, alias=__sq_1
16-
Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[MIN(partsupp.ps_supplycost)]]
17-
Inner Join: nation.n_regionkey = region.r_regionkey
18-
Inner Join: supplier.s_nationkey = nation.n_nationkey
19-
Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
3+
Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, nation.n_name
4+
Inner Join: part.p_partkey = __sq_1.ps_partkey, partsupp.ps_supplycost = __sq_1.__value
5+
Inner Join: nation.n_regionkey = region.r_regionkey
6+
Inner Join: supplier.s_nationkey = nation.n_nationkey
7+
Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
8+
Inner Join: part.p_partkey = partsupp.ps_partkey
9+
Filter: part.p_size = Int32(15) AND part.p_type LIKE Utf8("%BRASS")
10+
TableScan: part projection=[p_partkey, p_mfgr, p_type, p_size]
2011
TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
21-
TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]
22-
TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
23-
Filter: region.r_name = Utf8("EUROPE")
24-
TableScan: region projection=[r_regionkey, r_name]
12+
TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]
13+
TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
14+
Filter: region.r_name = Utf8("EUROPE")
15+
TableScan: region projection=[r_regionkey, r_name]
16+
Projection: partsupp.ps_partkey, MIN(partsupp.ps_supplycost) AS __value, alias=__sq_1
17+
Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[MIN(partsupp.ps_supplycost)]]
18+
Inner Join: nation.n_regionkey = region.r_regionkey
19+
Inner Join: supplier.s_nationkey = nation.n_nationkey
20+
Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
21+
TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
22+
TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]
23+
TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
24+
Filter: region.r_name = Utf8("EUROPE")
25+
TableScan: region projection=[r_regionkey, r_name]

benchmarks/expected-plans/q8.txt

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,24 @@ Sort: all_nations.o_year ASC NULLS LAST
33
Aggregate: groupBy=[[all_nations.o_year]], aggr=[[SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Decimal128(Some(0),38,4) END) AS SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), SUM(all_nations.volume)]]
44
Projection: o_year, volume, nation, alias=all_nations
55
Projection: datepart(Utf8("YEAR"), orders.o_orderdate) AS o_year, CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) AS volume, n2.n_name AS nation
6-
Inner Join: n1.n_regionkey = region.r_regionkey
7-
Inner Join: supplier.s_nationkey = n2.n_nationkey
8-
Inner Join: customer.c_nationkey = n1.n_nationkey
9-
Inner Join: orders.o_custkey = customer.c_custkey
10-
Inner Join: lineitem.l_orderkey = orders.o_orderkey
11-
Inner Join: lineitem.l_suppkey = supplier.s_suppkey
12-
Inner Join: part.p_partkey = lineitem.l_partkey
13-
Filter: part.p_type = Utf8("ECONOMY ANODIZED STEEL")
14-
TableScan: part projection=[p_partkey, p_type]
15-
TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount]
16-
TableScan: supplier projection=[s_suppkey, s_nationkey]
17-
Filter: orders.o_orderdate >= Date32("9131") AND orders.o_orderdate <= Date32("9861")
18-
TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate]
19-
TableScan: customer projection=[c_custkey, c_nationkey]
20-
SubqueryAlias: n1
21-
TableScan: nation projection=[n_nationkey, n_regionkey]
22-
SubqueryAlias: n2
23-
TableScan: nation projection=[n_nationkey, n_name]
24-
Filter: region.r_name = Utf8("AMERICA")
25-
TableScan: region projection=[r_regionkey, r_name]
6+
Projection: lineitem.l_extendedprice, lineitem.l_discount, orders.o_orderdate, n2.n_name
7+
Inner Join: n1.n_regionkey = region.r_regionkey
8+
Inner Join: supplier.s_nationkey = n2.n_nationkey
9+
Inner Join: customer.c_nationkey = n1.n_nationkey
10+
Inner Join: orders.o_custkey = customer.c_custkey
11+
Inner Join: lineitem.l_orderkey = orders.o_orderkey
12+
Inner Join: lineitem.l_suppkey = supplier.s_suppkey
13+
Inner Join: part.p_partkey = lineitem.l_partkey
14+
Filter: part.p_type = Utf8("ECONOMY ANODIZED STEEL")
15+
TableScan: part projection=[p_partkey, p_type]
16+
TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount]
17+
TableScan: supplier projection=[s_suppkey, s_nationkey]
18+
Filter: orders.o_orderdate >= Date32("9131") AND orders.o_orderdate <= Date32("9861")
19+
TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate]
20+
TableScan: customer projection=[c_custkey, c_nationkey]
21+
SubqueryAlias: n1
22+
TableScan: nation projection=[n_nationkey, n_regionkey]
23+
SubqueryAlias: n2
24+
TableScan: nation projection=[n_nationkey, n_name]
25+
Filter: region.r_name = Utf8("AMERICA")
26+
TableScan: region projection=[r_regionkey, r_name]

benchmarks/expected-plans/q9.txt

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,16 @@ Sort: profit.nation ASC NULLS LAST, profit.o_year DESC NULLS FIRST
33
Aggregate: groupBy=[[profit.nation, profit.o_year]], aggr=[[SUM(profit.amount)]]
44
Projection: nation, o_year, amount, alias=profit
55
Projection: nation.n_name AS nation, datepart(Utf8("YEAR"), orders.o_orderdate) AS o_year, CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) - CAST(partsupp.ps_supplycost * lineitem.l_quantity AS Decimal128(38, 4)) AS amount
6-
Inner Join: supplier.s_nationkey = nation.n_nationkey
7-
Inner Join: lineitem.l_orderkey = orders.o_orderkey
8-
Inner Join: lineitem.l_suppkey = partsupp.ps_suppkey, lineitem.l_partkey = partsupp.ps_partkey
9-
Inner Join: lineitem.l_suppkey = supplier.s_suppkey
10-
Inner Join: part.p_partkey = lineitem.l_partkey
11-
Filter: part.p_name LIKE Utf8("%green%")
12-
TableScan: part projection=[p_partkey, p_name]
13-
TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount]
14-
TableScan: supplier projection=[s_suppkey, s_nationkey]
15-
TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
16-
TableScan: orders projection=[o_orderkey, o_orderdate]
17-
TableScan: nation projection=[n_nationkey, n_name]
6+
Projection: lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, partsupp.ps_supplycost, orders.o_orderdate, nation.n_name
7+
Inner Join: supplier.s_nationkey = nation.n_nationkey
8+
Inner Join: lineitem.l_orderkey = orders.o_orderkey
9+
Inner Join: lineitem.l_suppkey = partsupp.ps_suppkey, lineitem.l_partkey = partsupp.ps_partkey
10+
Inner Join: lineitem.l_suppkey = supplier.s_suppkey
11+
Inner Join: part.p_partkey = lineitem.l_partkey
12+
Filter: part.p_name LIKE Utf8("%green%")
13+
TableScan: part projection=[p_partkey, p_name]
14+
TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount]
15+
TableScan: supplier projection=[s_suppkey, s_nationkey]
16+
TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
17+
TableScan: orders projection=[o_orderkey, o_orderdate]
18+
TableScan: nation projection=[n_nationkey, n_name]

datafusion/core/tests/sql/subqueries.rs

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -141,28 +141,29 @@ order by s_acctbal desc, n_name, s_name, p_partkey;"#;
141141
let actual = format!("{}", plan.display_indent());
142142
let expected = r#"Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST
143143
Projection: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment
144-
Inner Join: part.p_partkey = __sq_1.ps_partkey, partsupp.ps_supplycost = __sq_1.__value
145-
Inner Join: nation.n_regionkey = region.r_regionkey
146-
Inner Join: supplier.s_nationkey = nation.n_nationkey
147-
Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
148-
Inner Join: part.p_partkey = partsupp.ps_partkey
149-
Filter: part.p_size = Int32(15) AND part.p_type LIKE Utf8("%BRASS")
150-
TableScan: part projection=[p_partkey, p_mfgr, p_type, p_size], partial_filters=[part.p_size = Int32(15), part.p_type LIKE Utf8("%BRASS")]
151-
TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
152-
TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]
153-
TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
154-
Filter: region.r_name = Utf8("EUROPE")
155-
TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")]
156-
Projection: partsupp.ps_partkey, MIN(partsupp.ps_supplycost) AS __value, alias=__sq_1
157-
Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[MIN(partsupp.ps_supplycost)]]
158-
Inner Join: nation.n_regionkey = region.r_regionkey
159-
Inner Join: supplier.s_nationkey = nation.n_nationkey
160-
Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
144+
Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, nation.n_name
145+
Inner Join: part.p_partkey = __sq_1.ps_partkey, partsupp.ps_supplycost = __sq_1.__value
146+
Inner Join: nation.n_regionkey = region.r_regionkey
147+
Inner Join: supplier.s_nationkey = nation.n_nationkey
148+
Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
149+
Inner Join: part.p_partkey = partsupp.ps_partkey
150+
Filter: part.p_size = Int32(15) AND part.p_type LIKE Utf8("%BRASS")
151+
TableScan: part projection=[p_partkey, p_mfgr, p_type, p_size], partial_filters=[part.p_size = Int32(15), part.p_type LIKE Utf8("%BRASS")]
161152
TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
162-
TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]
163-
TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
164-
Filter: region.r_name = Utf8("EUROPE")
165-
TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")]"#
153+
TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]
154+
TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
155+
Filter: region.r_name = Utf8("EUROPE")
156+
TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")]
157+
Projection: partsupp.ps_partkey, MIN(partsupp.ps_supplycost) AS __value, alias=__sq_1
158+
Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[MIN(partsupp.ps_supplycost)]]
159+
Inner Join: nation.n_regionkey = region.r_regionkey
160+
Inner Join: supplier.s_nationkey = nation.n_nationkey
161+
Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
162+
TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
163+
TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]
164+
TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
165+
Filter: region.r_name = Utf8("EUROPE")
166+
TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")]"#
166167
.to_string();
167168
assert_eq!(actual, expected);
168169

0 commit comments

Comments
 (0)