Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove logical cross join in planning #12985

Merged
merged 23 commits into from
Oct 18, 2024
Prev Previous commit
Next Next commit
WIP
  • Loading branch information
Dandandan committed Oct 17, 2024
commit 8b1e0e974a9fab8ef8fe1973520e0aab12ef7e8c
12 changes: 10 additions & 2 deletions datafusion/core/src/physical_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ use datafusion_expr::expr::{
use datafusion_expr::expr_rewriter::unnormalize_cols;
use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary;
use datafusion_expr::{
DescribeTable, DmlStatement, Extension, Filter, RecursiveQuery, SortExpr,
DescribeTable, DmlStatement, Extension, Filter, JoinType, RecursiveQuery, SortExpr,
StringifiedPlan, WindowFrame, WindowFrameBound, WriteOp,
};
use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr};
Expand Down Expand Up @@ -1045,7 +1045,15 @@ impl DefaultPhysicalPlanner {
session_state.config_options().optimizer.prefer_hash_join;

let join: Arc<dyn ExecutionPlan> = if join_on.is_empty() {
if join_filter.is_none() {
if join_filter.is_none()
&& matches!(
join_type,
JoinType::Inner
| JoinType::Full
| JoinType::Left
| JoinType::Right
)
{
// no on and filter, use cross join
Dandandan marked this conversation as resolved.
Show resolved Hide resolved
Arc::new(CrossJoinExec::new(physical_left, physical_right))
} else {
Expand Down
4 changes: 2 additions & 2 deletions datafusion/expr/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -949,13 +949,13 @@ impl LogicalPlanBuilder {
/// Apply a cross join
pub fn cross_join(self, right: LogicalPlan) -> Result<Self> {
let join_schema =
build_join_schema(self.plan.schema(), right.schema(), &JoinType::Inner)?;
build_join_schema(self.plan.schema(), right.schema(), &JoinType::Full)?;
Ok(Self::new(LogicalPlan::Join(Join {
left: self.plan,
right: Arc::new(right),
on: vec![],
filter: None,
join_type: JoinType::Inner,
join_type: JoinType::Full,
join_constraint: JoinConstraint::On,
null_equals_null: false,
schema: DFSchemaRef::new(join_schema),
Expand Down
5 changes: 5 additions & 0 deletions datafusion/expr/src/logical_plan/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1870,6 +1870,11 @@ impl LogicalPlan {
.as_ref()
.map(|expr| format!(" Filter: {expr}"))
.unwrap_or_else(|| "".to_string());
let join_type = if filter.is_none() && keys.is_empty() && matches!(join_type, JoinType::Inner| JoinType::Left| JoinType::Right| JoinType::Full) {
"Cross".to_string()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we extend JoinType enum to support Cross?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it’s better not to, otherwise it will be similar to having LogicalPlan::CrossJoin (I.e. unnecesary).

} else {
join_type.to_string()
};
match join_constraint {
JoinConstraint::On => {
write!(
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/group_by.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4050,7 +4050,7 @@ EXPLAIN SELECT lhs.c, rhs.c, lhs.sum1, rhs.sum1
----
logical_plan
01)Projection: lhs.c, rhs.c, lhs.sum1, rhs.sum1
02)--Inner Join:
02)--Cross Join:
03)----SubqueryAlias: lhs
04)------Projection: multiple_ordered_table_with_pk.c, sum(multiple_ordered_table_with_pk.d) AS sum1
05)--------Aggregate: groupBy=[[multiple_ordered_table_with_pk.c]], aggr=[[sum(CAST(multiple_ordered_table_with_pk.d AS Int64))]]
Expand Down
4 changes: 2 additions & 2 deletions datafusion/sqllogictest/test_files/join.slt
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,7 @@ query TT
explain select * from t1 inner join t2 on true;
----
logical_plan
01)Inner Join:
01)Cross Join:
02)--TableScan: t1 projection=[t1_id, t1_name, t1_int]
03)--TableScan: t2 projection=[t2_id, t2_name, t2_int]
physical_plan
Expand Down Expand Up @@ -905,7 +905,7 @@ JOIN department AS d
ON (e.name = 'Alice' OR e.name = 'Bob');
----
logical_plan
01)Inner Join:
01)Cross Join:
02)--SubqueryAlias: e
03)----Filter: employees.name = Utf8("Alice") OR employees.name = Utf8("Bob")
04)------TableScan: employees projection=[emp_id, name]
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/joins.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4050,7 +4050,7 @@ query TT
explain select t1_id, t1_name, i from join_t1 t1 cross join lateral (select * from unnest(generate_series(1, t1_int))) as series(i);
----
logical_plan
01)Inner Join:
01)Cross Join:
02)--SubqueryAlias: t1
03)----TableScan: join_t1 projection=[t1_id, t1_name]
04)--SubqueryAlias: series
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/select.slt
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ EXPLAIN SELECT * FROM ((SELECT column1 FROM foo) "T1" CROSS JOIN (SELECT column2
----
logical_plan
01)SubqueryAlias: F
02)--Inner Join:
02)--Cross Join:
03)----SubqueryAlias: T1
04)------TableScan: foo projection=[column1]
05)----SubqueryAlias: T2
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/subquery.slt
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,7 @@ explain SELECT t1_id, (SELECT t2_id FROM t2 limit 0) FROM t1
----
logical_plan
01)Projection: t1.t1_id, __scalar_sq_1.t2_id AS t2_id
02)--Left Join:
02)--Cross Join:
03)----TableScan: t1 projection=[t1_id]
04)----EmptyRelation

Expand Down
4 changes: 2 additions & 2 deletions datafusion/sqllogictest/test_files/update.slt
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ logical_plan
01)Dml: op=[Update] table=[t1]
02)--Projection: t1.a AS a, t2.b AS b, CAST(t2.a AS Float64) AS c, CAST(Int64(1) AS Int32) AS d
03)----Filter: t1.a = t2.a AND t1.b > Utf8("foo") AND t2.c > Float64(1)
04)------Inner Join:
04)------Cross Join:
05)--------TableScan: t1
06)--------TableScan: t2

Expand All @@ -86,7 +86,7 @@ logical_plan
01)Dml: op=[Update] table=[t1]
02)--Projection: t.a AS a, t2.b AS b, CAST(t.a AS Float64) AS c, CAST(Int64(1) AS Int32) AS d
03)----Filter: t.a = t2.a AND t.b > Utf8("foo") AND t2.c > Float64(1)
04)------Inner Join:
04)------Cross Join:
05)--------SubqueryAlias: t
06)----------TableScan: t1
07)--------TableScan: t2
Loading