@@ -41,6 +41,8 @@ pub fn sort<T, F: FnMut(&T, &T) -> bool, BufT: BufGuard<T>>(v: &mut [T], is_less
4141
4242 cfg_if ! {
4343 if #[ cfg( any( feature = "optimize_for_size" , target_pointer_width = "16" ) ) ] {
44+ // Unlike driftsort, mergesort only requires len / 2,
45+ // not len - len / 2.
4446 let alloc_len = len / 2 ;
4547
4648 cfg_if! {
@@ -91,16 +93,26 @@ fn driftsort_main<T, F: FnMut(&T, &T) -> bool, BufT: BufGuard<T>>(v: &mut [T], i
9193 // By allocating n elements of memory we can ensure the entire input can
9294 // be sorted using stable quicksort, which allows better performance on
9395 // random and low-cardinality distributions. However, we still want to
94- // reduce our memory usage to n / 2 for large inputs. We do this by scaling
95- // our allocation as max(n / 2, min(n, 8MB)), ensuring we scale like n for
96- // small inputs and n / 2 for large inputs, without a sudden drop off. We
96+ // reduce our memory usage to n - n / 2 for large inputs. We do this by scaling
97+ // our allocation as max(n - n / 2, min(n, 8MB)), ensuring we scale like n for
98+ // small inputs and n - n / 2 for large inputs, without a sudden drop off. We
9799 // also need to ensure our alloc >= MIN_SMALL_SORT_SCRATCH_LEN, as the
98100 // small-sort always needs this much memory.
101+ //
102+ // driftsort will produce unsorted runs of up to min_good_run_len, which
103+ // is at most len - len / 2.
104+ // Unsorted runs need to be processed by quicksort, which requires as much
105+ // scratch space as the run length, therefore the scratch space must be at
106+ // least len - len / 2.
107+ // If min_good_run_len is ever modified, this code must be updated to allocate
108+ // the correct scratch size for it.
99109 const MAX_FULL_ALLOC_BYTES : usize = 8_000_000 ; // 8MB
100110 let max_full_alloc = MAX_FULL_ALLOC_BYTES / mem:: size_of :: < T > ( ) ;
101111 let len = v. len ( ) ;
102- let alloc_len =
103- cmp:: max ( cmp:: max ( len / 2 , cmp:: min ( len, max_full_alloc) ) , SMALL_SORT_GENERAL_SCRATCH_LEN ) ;
112+ let alloc_len = cmp:: max (
113+ cmp:: max ( len - len / 2 , cmp:: min ( len, max_full_alloc) ) ,
114+ SMALL_SORT_GENERAL_SCRATCH_LEN ,
115+ ) ;
104116
105117 // For small inputs 4KiB of stack storage suffices, which allows us to avoid
106118 // calling the (de-)allocator. Benchmarks showed this was quite beneficial.
0 commit comments