@@ -800,34 +800,36 @@ impl ExecutionPlan for HashJoinExec {
800
800
801
801
let join_metrics = BuildProbeJoinMetrics :: new ( partition, & self . metrics ) ;
802
802
let left_fut = match self . mode {
803
- PartitionMode :: CollectLeft => self . left_fut . once ( || {
804
- let reservation =
805
- MemoryConsumer :: new ( "HashJoinInput" ) . register ( context. memory_pool ( ) ) ;
803
+ PartitionMode :: CollectLeft => {
804
+ let left = coalesce_partitions_if_needed ( Arc :: clone ( & self . left ) ) ;
805
+ let left_stream = left . execute ( 0 , Arc :: clone ( & context) ) ? ;
806
806
807
- let left = coalesce_partitions_if_needed ( self . left . clone ( ) ) ;
808
- collect_left_input (
809
- 0 ,
810
- self . random_state . clone ( ) ,
811
- left,
812
- on_left. clone ( ) ,
813
- Arc :: clone ( & context) ,
814
- join_metrics. clone ( ) ,
815
- reservation,
816
- need_produce_result_in_final ( self . join_type ) ,
817
- self . right ( ) . output_partitioning ( ) . partition_count ( ) ,
818
- )
819
- } ) ,
807
+ self . left_fut . once ( || {
808
+ let reservation = MemoryConsumer :: new ( "HashJoinInput" )
809
+ . register ( context. memory_pool ( ) ) ;
810
+
811
+ collect_left_input (
812
+ self . random_state . clone ( ) ,
813
+ left_stream,
814
+ on_left. clone ( ) ,
815
+ join_metrics. clone ( ) ,
816
+ reservation,
817
+ need_produce_result_in_final ( self . join_type ) ,
818
+ self . right ( ) . output_partitioning ( ) . partition_count ( ) ,
819
+ )
820
+ } )
821
+ }
820
822
PartitionMode :: Partitioned => {
823
+ let left_stream = self . left . execute ( partition, Arc :: clone ( & context) ) ?;
824
+
821
825
let reservation =
822
826
MemoryConsumer :: new ( format ! ( "HashJoinInput[{partition}]" ) )
823
827
. register ( context. memory_pool ( ) ) ;
824
828
825
829
OnceFut :: new ( collect_left_input (
826
- partition,
827
830
self . random_state . clone ( ) ,
828
- Arc :: clone ( & self . left ) ,
831
+ left_stream ,
829
832
on_left. clone ( ) ,
830
- Arc :: clone ( & context) ,
831
833
join_metrics. clone ( ) ,
832
834
reservation,
833
835
need_produce_result_in_final ( self . join_type ) ,
@@ -943,25 +945,21 @@ fn coalesce_partitions_if_needed(plan: Arc<dyn ExecutionPlan>) -> Arc<dyn Execut
943
945
/// hash table (`LeftJoinData`)
944
946
#[ allow( clippy:: too_many_arguments) ]
945
947
async fn collect_left_input (
946
- partition : usize ,
947
948
random_state : RandomState ,
948
- left : Arc < dyn ExecutionPlan > ,
949
+ left_stream : SendableRecordBatchStream ,
949
950
on_left : Vec < PhysicalExprRef > ,
950
- context : Arc < TaskContext > ,
951
951
metrics : BuildProbeJoinMetrics ,
952
952
reservation : MemoryReservation ,
953
953
with_visited_indices_bitmap : bool ,
954
954
probe_threads_count : usize ,
955
955
) -> Result < JoinLeftData > {
956
- let schema = left. schema ( ) ;
957
-
958
- let stream = left. execute ( partition, Arc :: clone ( & context) ) ?;
956
+ let schema = left_stream. schema ( ) ;
959
957
960
958
// This operation performs 2 steps at once:
961
959
// 1. creates a [JoinHashMap] of all batches from the stream
962
960
// 2. stores the batches in a vector.
963
961
let initial = ( Vec :: new ( ) , 0 , metrics, reservation) ;
964
- let ( batches, num_rows, metrics, mut reservation) = stream
962
+ let ( batches, num_rows, metrics, mut reservation) = left_stream
965
963
. try_fold ( initial, |mut acc, batch| async {
966
964
let batch_size = get_record_batch_memory_size ( & batch) ;
967
965
// Reserve memory for incoming batch
0 commit comments