20
20
//! projections one by one if the operator below is amenable to this. If a
21
21
//! projection reaches a source, it can even dissappear from the plan entirely.
22
22
23
+ use std:: sync:: Arc ;
24
+
23
25
use super :: output_requirements:: OutputRequirementExec ;
24
26
use super :: PhysicalOptimizerRule ;
25
27
use crate :: datasource:: physical_plan:: CsvExec ;
@@ -39,18 +41,17 @@ use crate::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
39
41
use crate :: physical_plan:: { Distribution , ExecutionPlan } ;
40
42
41
43
use arrow_schema:: SchemaRef ;
42
-
43
44
use datafusion_common:: config:: ConfigOptions ;
44
45
use datafusion_common:: tree_node:: { Transformed , TreeNode } ;
45
46
use datafusion_common:: JoinSide ;
46
47
use datafusion_physical_expr:: expressions:: Column ;
47
48
use datafusion_physical_expr:: {
48
49
Partitioning , PhysicalExpr , PhysicalSortExpr , PhysicalSortRequirement ,
49
50
} ;
51
+ use datafusion_physical_plan:: streaming:: StreamingTableExec ;
50
52
use datafusion_physical_plan:: union:: UnionExec ;
51
53
52
54
use itertools:: Itertools ;
53
- use std:: sync:: Arc ;
54
55
55
56
/// This rule inspects [`ProjectionExec`]'s in the given physical plan and tries to
56
57
/// remove or swap with its child.
@@ -135,6 +136,8 @@ pub fn remove_unnecessary_projections(
135
136
try_swapping_with_sort_merge_join ( projection, sm_join) ?
136
137
} else if let Some ( sym_join) = input. downcast_ref :: < SymmetricHashJoinExec > ( ) {
137
138
try_swapping_with_sym_hash_join ( projection, sym_join) ?
139
+ } else if let Some ( ste) = input. downcast_ref :: < StreamingTableExec > ( ) {
140
+ try_swapping_with_streaming_table ( projection, ste) ?
138
141
} else {
139
142
// If the input plan of the projection is not one of the above, we
140
143
// conservatively assume that pushing the projection down may hurt.
@@ -149,8 +152,8 @@ pub fn remove_unnecessary_projections(
149
152
Ok ( maybe_modified. map_or ( Transformed :: No ( plan) , Transformed :: Yes ) )
150
153
}
151
154
152
- /// Tries to swap `projection` with its input (`csv`). If possible, performs
153
- /// the swap and returns [`CsvExec`] as the top plan. Otherwise, returns `None`.
155
+ /// Tries to embed `projection` to its input (`csv`). If possible, returns
156
+ /// [`CsvExec`] as the top plan. Otherwise, returns `None`.
154
157
fn try_swapping_with_csv (
155
158
projection : & ProjectionExec ,
156
159
csv : & CsvExec ,
@@ -174,8 +177,8 @@ fn try_swapping_with_csv(
174
177
} )
175
178
}
176
179
177
- /// Tries to swap `projection` with its input (`memory`). If possible, performs
178
- /// the swap and returns [`MemoryExec`] as the top plan. Otherwise, returns `None`.
180
+ /// Tries to embed `projection` to its input (`memory`). If possible, returns
181
+ /// [`MemoryExec`] as the top plan. Otherwise, returns `None`.
179
182
fn try_swapping_with_memory (
180
183
projection : & ProjectionExec ,
181
184
memory : & MemoryExec ,
@@ -197,10 +200,52 @@ fn try_swapping_with_memory(
197
200
. transpose ( )
198
201
}
199
202
203
+ /// Tries to embed `projection` to its input (`streaming table`).
204
+ /// If possible, returns [`StreamingTableExec`] as the top plan. Otherwise,
205
+ /// returns `None`.
206
+ fn try_swapping_with_streaming_table (
207
+ projection : & ProjectionExec ,
208
+ streaming_table : & StreamingTableExec ,
209
+ ) -> Result < Option < Arc < dyn ExecutionPlan > > > {
210
+ if !all_alias_free_columns ( projection. expr ( ) ) {
211
+ return Ok ( None ) ;
212
+ }
213
+
214
+ let streaming_table_projections = streaming_table
215
+ . projection ( )
216
+ . as_ref ( )
217
+ . map ( |i| i. as_ref ( ) . to_vec ( ) ) ;
218
+ let new_projections =
219
+ new_projections_for_columns ( projection, & streaming_table_projections) ;
220
+
221
+ let mut lex_orderings = vec ! [ ] ;
222
+ for lex_ordering in streaming_table. projected_output_ordering ( ) . into_iter ( ) {
223
+ let mut orderings = vec ! [ ] ;
224
+ for order in lex_ordering {
225
+ let Some ( new_ordering) = update_expr ( & order. expr , projection. expr ( ) , false ) ?
226
+ else {
227
+ return Ok ( None ) ;
228
+ } ;
229
+ orderings. push ( PhysicalSortExpr {
230
+ expr : new_ordering,
231
+ options : order. options ,
232
+ } ) ;
233
+ }
234
+ lex_orderings. push ( orderings) ;
235
+ }
236
+
237
+ StreamingTableExec :: try_new (
238
+ streaming_table. partition_schema ( ) . clone ( ) ,
239
+ streaming_table. partitions ( ) . clone ( ) ,
240
+ Some ( & new_projections) ,
241
+ lex_orderings,
242
+ streaming_table. is_infinite ( ) ,
243
+ )
244
+ . map ( |e| Some ( Arc :: new ( e) as _ ) )
245
+ }
246
+
200
247
/// Unifies `projection` with its input (which is also a [`ProjectionExec`]).
201
- /// Two consecutive projections can always merge into a single projection unless
202
- /// the [`update_expr`] function does not support one of the expression
203
- /// types involved in the projection.
248
+ /// Two consecutive projections can always merge into a single projection.
204
249
fn try_unifying_projections (
205
250
projection : & ProjectionExec ,
206
251
child : & ProjectionExec ,
@@ -779,10 +824,6 @@ fn new_projections_for_columns(
779
824
/// given the expressions `c@0`, `a@1` and `b@2`, and the [`ProjectionExec`] with
780
825
/// an output schema of `a, c_new`, then `c@0` becomes `c_new@1`, `a@1` becomes
781
826
/// `a@0`, but `b@2` results in `None` since the projection does not include `b`.
782
- ///
783
- /// If the expression contains a `PhysicalExpr` variant that this function does
784
- /// not support, it will return `None`. An error can only be introduced if
785
- /// `CaseExpr::try_new` returns an error.
786
827
fn update_expr (
787
828
expr : & Arc < dyn PhysicalExpr > ,
788
829
projected_exprs : & [ ( Arc < dyn PhysicalExpr > , String ) ] ,
@@ -1102,10 +1143,11 @@ mod tests {
1102
1143
use crate :: physical_plan:: sorts:: sort_preserving_merge:: SortPreservingMergeExec ;
1103
1144
use crate :: physical_plan:: ExecutionPlan ;
1104
1145
1105
- use arrow_schema:: { DataType , Field , Schema , SortOptions } ;
1146
+ use arrow_schema:: { DataType , Field , Schema , SchemaRef , SortOptions } ;
1106
1147
use datafusion_common:: config:: ConfigOptions ;
1107
1148
use datafusion_common:: { JoinSide , JoinType , Result , ScalarValue , Statistics } ;
1108
1149
use datafusion_execution:: object_store:: ObjectStoreUrl ;
1150
+ use datafusion_execution:: { SendableRecordBatchStream , TaskContext } ;
1109
1151
use datafusion_expr:: { ColumnarValue , Operator } ;
1110
1152
use datafusion_physical_expr:: expressions:: {
1111
1153
BinaryExpr , CaseExpr , CastExpr , Column , Literal , NegativeExpr ,
@@ -1115,8 +1157,11 @@ mod tests {
1115
1157
PhysicalSortRequirement , ScalarFunctionExpr ,
1116
1158
} ;
1117
1159
use datafusion_physical_plan:: joins:: SymmetricHashJoinExec ;
1160
+ use datafusion_physical_plan:: streaming:: { PartitionStream , StreamingTableExec } ;
1118
1161
use datafusion_physical_plan:: union:: UnionExec ;
1119
1162
1163
+ use itertools:: Itertools ;
1164
+
1120
1165
#[ test]
1121
1166
fn test_update_matching_exprs ( ) -> Result < ( ) > {
1122
1167
let exprs: Vec < Arc < dyn PhysicalExpr > > = vec ! [
@@ -1575,6 +1620,119 @@ mod tests {
1575
1620
Ok ( ( ) )
1576
1621
}
1577
1622
1623
+ #[ test]
1624
+ fn test_streaming_table_after_projection ( ) -> Result < ( ) > {
1625
+ struct DummyStreamPartition {
1626
+ schema : SchemaRef ,
1627
+ }
1628
+ impl PartitionStream for DummyStreamPartition {
1629
+ fn schema ( & self ) -> & SchemaRef {
1630
+ & self . schema
1631
+ }
1632
+ fn execute ( & self , _ctx : Arc < TaskContext > ) -> SendableRecordBatchStream {
1633
+ unreachable ! ( )
1634
+ }
1635
+ }
1636
+
1637
+ let streaming_table = StreamingTableExec :: try_new (
1638
+ Arc :: new ( Schema :: new ( vec ! [
1639
+ Field :: new( "a" , DataType :: Int32 , true ) ,
1640
+ Field :: new( "b" , DataType :: Int32 , true ) ,
1641
+ Field :: new( "c" , DataType :: Int32 , true ) ,
1642
+ Field :: new( "d" , DataType :: Int32 , true ) ,
1643
+ Field :: new( "e" , DataType :: Int32 , true ) ,
1644
+ ] ) ) ,
1645
+ vec ! [ Arc :: new( DummyStreamPartition {
1646
+ schema: Arc :: new( Schema :: new( vec![
1647
+ Field :: new( "a" , DataType :: Int32 , true ) ,
1648
+ Field :: new( "b" , DataType :: Int32 , true ) ,
1649
+ Field :: new( "c" , DataType :: Int32 , true ) ,
1650
+ Field :: new( "d" , DataType :: Int32 , true ) ,
1651
+ Field :: new( "e" , DataType :: Int32 , true ) ,
1652
+ ] ) ) ,
1653
+ } ) as _] ,
1654
+ Some ( & vec ! [ 0_usize , 2 , 4 , 3 ] ) ,
1655
+ vec ! [
1656
+ vec![
1657
+ PhysicalSortExpr {
1658
+ expr: Arc :: new( Column :: new( "e" , 2 ) ) ,
1659
+ options: SortOptions :: default ( ) ,
1660
+ } ,
1661
+ PhysicalSortExpr {
1662
+ expr: Arc :: new( Column :: new( "a" , 0 ) ) ,
1663
+ options: SortOptions :: default ( ) ,
1664
+ } ,
1665
+ ] ,
1666
+ vec![ PhysicalSortExpr {
1667
+ expr: Arc :: new( Column :: new( "d" , 3 ) ) ,
1668
+ options: SortOptions :: default ( ) ,
1669
+ } ] ,
1670
+ ]
1671
+ . into_iter ( ) ,
1672
+ true ,
1673
+ ) ?;
1674
+ let projection = Arc :: new ( ProjectionExec :: try_new (
1675
+ vec ! [
1676
+ ( Arc :: new( Column :: new( "d" , 3 ) ) , "d" . to_string( ) ) ,
1677
+ ( Arc :: new( Column :: new( "e" , 2 ) ) , "e" . to_string( ) ) ,
1678
+ ( Arc :: new( Column :: new( "a" , 0 ) ) , "a" . to_string( ) ) ,
1679
+ ] ,
1680
+ Arc :: new ( streaming_table) as _ ,
1681
+ ) ?) as _ ;
1682
+
1683
+ let after_optimize =
1684
+ ProjectionPushdown :: new ( ) . optimize ( projection, & ConfigOptions :: new ( ) ) ?;
1685
+
1686
+ let result = after_optimize
1687
+ . as_any ( )
1688
+ . downcast_ref :: < StreamingTableExec > ( )
1689
+ . unwrap ( ) ;
1690
+ assert_eq ! (
1691
+ result. partition_schema( ) ,
1692
+ & Arc :: new( Schema :: new( vec![
1693
+ Field :: new( "a" , DataType :: Int32 , true ) ,
1694
+ Field :: new( "b" , DataType :: Int32 , true ) ,
1695
+ Field :: new( "c" , DataType :: Int32 , true ) ,
1696
+ Field :: new( "d" , DataType :: Int32 , true ) ,
1697
+ Field :: new( "e" , DataType :: Int32 , true ) ,
1698
+ ] ) )
1699
+ ) ;
1700
+ assert_eq ! (
1701
+ result. projection( ) . clone( ) . unwrap( ) . to_vec( ) ,
1702
+ vec![ 3_usize , 4 , 0 ]
1703
+ ) ;
1704
+ assert_eq ! (
1705
+ result. projected_schema( ) ,
1706
+ & Schema :: new( vec![
1707
+ Field :: new( "d" , DataType :: Int32 , true ) ,
1708
+ Field :: new( "e" , DataType :: Int32 , true ) ,
1709
+ Field :: new( "a" , DataType :: Int32 , true ) ,
1710
+ ] )
1711
+ ) ;
1712
+ assert_eq ! (
1713
+ result. projected_output_ordering( ) . into_iter( ) . collect_vec( ) ,
1714
+ vec![
1715
+ vec![
1716
+ PhysicalSortExpr {
1717
+ expr: Arc :: new( Column :: new( "e" , 1 ) ) ,
1718
+ options: SortOptions :: default ( ) ,
1719
+ } ,
1720
+ PhysicalSortExpr {
1721
+ expr: Arc :: new( Column :: new( "a" , 2 ) ) ,
1722
+ options: SortOptions :: default ( ) ,
1723
+ } ,
1724
+ ] ,
1725
+ vec![ PhysicalSortExpr {
1726
+ expr: Arc :: new( Column :: new( "d" , 0 ) ) ,
1727
+ options: SortOptions :: default ( ) ,
1728
+ } ] ,
1729
+ ]
1730
+ ) ;
1731
+ assert ! ( result. is_infinite( ) ) ;
1732
+
1733
+ Ok ( ( ) )
1734
+ }
1735
+
1578
1736
#[ test]
1579
1737
fn test_projection_after_projection ( ) -> Result < ( ) > {
1580
1738
let csv = create_simple_csv_exec ( ) ;
0 commit comments