Skip to content

Commit f9dddad

Browse files
authored
if none columns in window expr are needed, remove the window exprs (#2634)
* if none columns in window expr are needed, remove the window exprs * add test case for windo expr eliminate
1 parent df2094f commit f9dddad

File tree

2 files changed

+147
-0
lines changed

2 files changed

+147
-0
lines changed

datafusion/core/src/optimizer/projection_push_down.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,18 @@ fn optimize_plan(
277277
})?;
278278
}
279279

280+
// none columns in window expr are needed, remove the window expr
281+
if new_window_expr.is_empty() {
282+
return LogicalPlanBuilder::from(optimize_plan(
283+
_optimizer,
284+
input,
285+
required_columns,
286+
true,
287+
_execution_props,
288+
)?)
289+
.build();
290+
};
291+
280292
// for all the retained window expr, find their sort expressions if any, and retain these
281293
exprlist_to_columns(
282294
&find_sort_exprs(&new_window_expr),

datafusion/core/tests/sql/window.rs

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,3 +298,138 @@ async fn window_partition_by_order_by() -> Result<()> {
298298
assert_batches_eq!(expected, &results);
299299
Ok(())
300300
}
301+
302+
#[tokio::test]
303+
async fn window_expr_eliminate() -> Result<()> {
304+
let ctx = SessionContext::new();
305+
306+
// window expr is not referenced anywhere, eliminate it.
307+
let sql = "WITH _sample_data AS (
308+
SELECT 1 as a, 'aa' AS b
309+
UNION ALL
310+
SELECT 3 as a, 'aa' AS b
311+
UNION ALL
312+
SELECT 5 as a, 'bb' AS b
313+
UNION ALL
314+
SELECT 7 as a, 'bb' AS b
315+
), _data2 AS (
316+
SELECT
317+
row_number() OVER (PARTITION BY s.b ORDER BY s.a) AS seq,
318+
s.a,
319+
s.b
320+
FROM _sample_data s
321+
)
322+
SELECT d.b, MAX(d.a) AS max_a
323+
FROM _data2 d
324+
GROUP BY d.b
325+
ORDER BY d.b;";
326+
327+
let msg = format!("Creating logical plan for '{}'", sql);
328+
let plan = ctx
329+
.create_logical_plan(&("explain ".to_owned() + sql))
330+
.expect(&msg);
331+
let state = ctx.state.read().clone();
332+
let plan = state.optimize(&plan)?;
333+
let expected = vec![
334+
"Explain [plan_type:Utf8, plan:Utf8]",
335+
" Sort: #d.b ASC NULLS LAST [b:Utf8, max_a:Int64;N]",
336+
" Projection: #d.b, #MAX(d.a) AS max_a [b:Utf8, max_a:Int64;N]",
337+
" Aggregate: groupBy=[[#d.b]], aggr=[[MAX(#d.a)]] [b:Utf8, MAX(d.a):Int64;N]",
338+
" Projection: #_data2.a, #_data2.b, alias=d [a:Int64, b:Utf8]",
339+
" Projection: #s.a, #s.b, alias=_data2 [a:Int64, b:Utf8]",
340+
" Projection: #a, #b, alias=s [a:Int64, b:Utf8]",
341+
" Union [a:Int64, b:Utf8]",
342+
" Projection: Int64(1) AS a, Utf8(\"aa\") AS b [a:Int64, b:Utf8]",
343+
" EmptyRelation []",
344+
" Projection: Int64(3) AS a, Utf8(\"aa\") AS b [a:Int64, b:Utf8]",
345+
" EmptyRelation []",
346+
" Projection: Int64(5) AS a, Utf8(\"bb\") AS b [a:Int64, b:Utf8]",
347+
" EmptyRelation []",
348+
" Projection: Int64(7) AS a, Utf8(\"bb\") AS b [a:Int64, b:Utf8]",
349+
" EmptyRelation []",
350+
];
351+
let formatted = plan.display_indent_schema().to_string();
352+
let actual: Vec<&str> = formatted.trim().lines().collect();
353+
assert_eq!(
354+
expected, actual,
355+
"\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
356+
expected, actual
357+
);
358+
359+
let results = execute_to_batches(&ctx, sql).await;
360+
let expected = vec![
361+
"+----+-------+",
362+
"| b | max_a |",
363+
"+----+-------+",
364+
"| aa | 3 |",
365+
"| bb | 7 |",
366+
"+----+-------+",
367+
];
368+
369+
assert_batches_eq!(expected, &results);
370+
371+
// window expr is referenced by the output, keep it
372+
let sql = "WITH _sample_data AS (
373+
SELECT 1 as a, 'aa' AS b
374+
UNION ALL
375+
SELECT 3 as a, 'aa' AS b
376+
UNION ALL
377+
SELECT 5 as a, 'bb' AS b
378+
UNION ALL
379+
SELECT 7 as a, 'bb' AS b
380+
), _data2 AS (
381+
SELECT
382+
row_number() OVER (PARTITION BY s.b ORDER BY s.a) AS seq,
383+
s.a,
384+
s.b
385+
FROM _sample_data s
386+
)
387+
SELECT d.b, MAX(d.a) AS max_a, max(d.seq)
388+
FROM _data2 d
389+
GROUP BY d.b
390+
ORDER BY d.b;";
391+
392+
let plan = ctx
393+
.create_logical_plan(&("explain ".to_owned() + sql))
394+
.expect(&msg);
395+
let plan = state.optimize(&plan)?;
396+
let expected = vec![
397+
"Explain [plan_type:Utf8, plan:Utf8]",
398+
" Sort: #d.b ASC NULLS LAST [b:Utf8, max_a:Int64;N, MAX(d.seq):UInt64;N]",
399+
" Projection: #d.b, #MAX(d.a) AS max_a, #MAX(d.seq) [b:Utf8, max_a:Int64;N, MAX(d.seq):UInt64;N]",
400+
" Aggregate: groupBy=[[#d.b]], aggr=[[MAX(#d.a), MAX(#d.seq)]] [b:Utf8, MAX(d.a):Int64;N, MAX(d.seq):UInt64;N]",
401+
" Projection: #_data2.seq, #_data2.a, #_data2.b, alias=d [seq:UInt64;N, a:Int64, b:Utf8]",
402+
" Projection: #ROW_NUMBER() PARTITION BY [#s.b] ORDER BY [#s.a ASC NULLS LAST] AS seq, #s.a, #s.b, alias=_data2 [seq:UInt64;N, a:Int64, b:Utf8]",
403+
" WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [#s.b] ORDER BY [#s.a ASC NULLS LAST]]] [ROW_NUMBER() PARTITION BY [#s.b] ORDER BY [#s.a ASC NULLS LAST]:UInt64;N, a:Int64, b:Utf8]",
404+
" Projection: #a, #b, alias=s [a:Int64, b:Utf8]",
405+
" Union [a:Int64, b:Utf8]",
406+
" Projection: Int64(1) AS a, Utf8(\"aa\") AS b [a:Int64, b:Utf8]",
407+
" EmptyRelation []",
408+
" Projection: Int64(3) AS a, Utf8(\"aa\") AS b [a:Int64, b:Utf8]",
409+
" EmptyRelation []",
410+
" Projection: Int64(5) AS a, Utf8(\"bb\") AS b [a:Int64, b:Utf8]",
411+
" EmptyRelation []",
412+
" Projection: Int64(7) AS a, Utf8(\"bb\") AS b [a:Int64, b:Utf8]",
413+
" EmptyRelation []",
414+
];
415+
let formatted = plan.display_indent_schema().to_string();
416+
let actual: Vec<&str> = formatted.trim().lines().collect();
417+
assert_eq!(
418+
expected, actual,
419+
"\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
420+
expected, actual
421+
);
422+
423+
let results = execute_to_batches(&ctx, sql).await;
424+
let expected = vec![
425+
"+----+-------+------------+",
426+
"| b | max_a | MAX(d.seq) |",
427+
"+----+-------+------------+",
428+
"| aa | 3 | 2 |",
429+
"| bb | 7 | 2 |",
430+
"+----+-------+------------+",
431+
];
432+
433+
assert_batches_eq!(expected, &results);
434+
Ok(())
435+
}

0 commit comments

Comments
 (0)