Skip to content

Commit b5e034b

Browse files
authored
Remove unnecessary projection in logical plan optimization phase (#747)
* eliminate super-set project Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * keep projection right before table scan Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * tidy Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
1 parent a4f6cdd commit b5e034b

File tree

1 file changed

+66
-3
lines changed

1 file changed

+66
-3
lines changed

datafusion/src/optimizer/projection_push_down.rs

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,17 @@ fn optimize_plan(
173173
true,
174174
execution_props,
175175
)?;
176-
if new_fields.is_empty() {
176+
177+
let new_required_columns_optimized = new_input
178+
.schema()
179+
.fields()
180+
.iter()
181+
.map(|f| f.qualified_column())
182+
.collect::<HashSet<Column>>();
183+
184+
if new_fields.is_empty()
185+
|| (has_projection && &new_required_columns_optimized == required_columns)
186+
{
177187
// no need for an expression at all
178188
Ok(new_input)
179189
} else {
@@ -496,6 +506,60 @@ mod tests {
496506
Ok(())
497507
}
498508

509+
#[test]
510+
fn redundunt_project() -> Result<()> {
511+
let table_scan = test_table_scan()?;
512+
513+
let plan = LogicalPlanBuilder::from(table_scan)
514+
.project(vec![col("a"), col("b"), col("c")])?
515+
.project(vec![col("a"), col("c"), col("b")])?
516+
.build()?;
517+
let expected = "Projection: #test.a, #test.c, #test.b\
518+
\n TableScan: test projection=Some([0, 1, 2])";
519+
520+
assert_optimized_plan_eq(&plan, expected);
521+
522+
Ok(())
523+
}
524+
525+
#[test]
526+
fn reorder_projection() -> Result<()> {
527+
let table_scan = test_table_scan()?;
528+
529+
let plan = LogicalPlanBuilder::from(table_scan)
530+
.project(vec![col("c"), col("b"), col("a")])?
531+
.build()?;
532+
let expected = "Projection: #test.c, #test.b, #test.a\
533+
\n TableScan: test projection=Some([0, 1, 2])";
534+
535+
assert_optimized_plan_eq(&plan, expected);
536+
537+
Ok(())
538+
}
539+
540+
#[test]
541+
fn noncontiguous_redundunt_projection() -> Result<()> {
542+
let table_scan = test_table_scan()?;
543+
544+
let plan = LogicalPlanBuilder::from(table_scan)
545+
.project(vec![col("c"), col("b"), col("a")])?
546+
.filter(col("c").gt(lit(1)))?
547+
.project(vec![col("c"), col("a"), col("b")])?
548+
.filter(col("b").gt(lit(1)))?
549+
.filter(col("a").gt(lit(1)))?
550+
.project(vec![col("a"), col("c"), col("b")])?
551+
.build()?;
552+
let expected = "Projection: #test.a, #test.c, #test.b\
553+
\n Filter: #test.a Gt Int32(1)\
554+
\n Filter: #test.b Gt Int32(1)\
555+
\n Filter: #test.c Gt Int32(1)\
556+
\n TableScan: test projection=Some([0, 1, 2])";
557+
558+
assert_optimized_plan_eq(&plan, expected);
559+
560+
Ok(())
561+
}
562+
499563
#[test]
500564
fn join_schema_trim_full_join_column_projection() -> Result<()> {
501565
let table_scan = test_table_scan()?;
@@ -812,8 +876,7 @@ mod tests {
812876

813877
assert_fields_eq(&plan, vec!["c", "a", "MAX(test.b)"]);
814878

815-
let expected = "\
816-
Projection: #test.c, #test.a, #MAX(test.b)\
879+
let expected = "Projection: #test.c, #test.a, #MAX(test.b)\
817880
\n Filter: #test.c Gt Int32(1)\
818881
\n Aggregate: groupBy=[[#test.a, #test.c]], aggr=[[MAX(#test.b)]]\
819882
\n TableScan: test projection=Some([0, 1, 2])";

0 commit comments

Comments
 (0)