@@ -24,7 +24,6 @@ use std::fmt;
24
24
use std:: fmt:: { Debug , Formatter } ;
25
25
use std:: sync:: Arc ;
26
26
27
- use crate :: common:: spawn_buffered;
28
27
use crate :: execution_plan:: { Boundedness , CardinalityEffect , EmissionType } ;
29
28
use crate :: expressions:: PhysicalSortExpr ;
30
29
use crate :: limit:: LimitStream ;
@@ -687,23 +686,35 @@ impl ExternalSorter {
687
686
let mut current_batches = Vec :: new ( ) ;
688
687
let mut current_size = 0 ;
689
688
690
- for batch in std:: mem:: take ( & mut self . in_mem_batches ) {
689
+ // Drain in_mem_batches using pop() to release memory earlier.
690
+ // This avoids holding onto the entire vector during iteration.
691
+ // Note:
692
+ // Now we use `sort_in_place_threshold_bytes` to determine, in future we can make it more dynamic.
693
+ while let Some ( batch) = self . in_mem_batches . pop ( ) {
691
694
let batch_size = get_reserved_byte_for_record_batch ( & batch) ;
695
+
696
+ // If adding this batch would exceed the memory threshold, merge current_batches.
692
697
if current_size + batch_size > self . sort_in_place_threshold_bytes
693
698
&& !current_batches. is_empty ( )
694
699
{
700
+ // Merge accumulated batches into one.
695
701
let merged = concat_batches ( & self . schema , & current_batches) ?;
696
702
current_batches. clear ( ) ;
703
+
704
+ // Update memory reservation.
697
705
self . reservation . try_shrink ( current_size) ?;
698
706
let merged_size = get_reserved_byte_for_record_batch ( & merged) ;
699
707
self . reservation . try_grow ( merged_size) ?;
708
+
700
709
merged_batches. push ( merged) ;
701
710
current_size = 0 ;
702
711
}
712
+
703
713
current_batches. push ( batch) ;
704
714
current_size += batch_size;
705
715
}
706
716
717
+ // Merge any remaining batches after the loop.
707
718
if !current_batches. is_empty ( ) {
708
719
let merged = concat_batches ( & self . schema , & current_batches) ?;
709
720
self . reservation . try_shrink ( current_size) ?;
@@ -712,15 +723,19 @@ impl ExternalSorter {
712
723
merged_batches. push ( merged) ;
713
724
}
714
725
726
+ // Create sorted streams directly without using spawn_buffered.
727
+ // This allows for sorting to happen inline and enables earlier batch drop.
715
728
let streams = merged_batches
716
729
. into_iter ( )
717
730
. map ( |batch| {
718
731
let metrics = self . metrics . baseline . intermediate ( ) ;
719
732
let reservation = self
720
733
. reservation
721
734
. split ( get_reserved_byte_for_record_batch ( & batch) ) ;
735
+
736
+ // Sort the batch inline.
722
737
let input = self . sort_batch_stream ( batch, metrics, reservation) ?;
723
- Ok ( spawn_buffered ( input, 1 ) )
738
+ Ok ( input)
724
739
} )
725
740
. collect :: < Result < _ > > ( ) ?;
726
741
0 commit comments