@@ -19,10 +19,12 @@ use core::cmp::Ordering::{self, Less};
1919use core:: mem:: { self , SizedTypeProperties } ;
2020#[ cfg( not( no_global_oom_handling) ) ]
2121use core:: ptr;
22+ #[ cfg( not( no_global_oom_handling) ) ]
23+ use core:: slice:: sort;
2224
2325use crate :: alloc:: Allocator ;
2426#[ cfg( not( no_global_oom_handling) ) ]
25- use crate :: alloc:: Global ;
27+ use crate :: alloc:: { self , Global } ;
2628#[ cfg( not( no_global_oom_handling) ) ]
2729use crate :: borrow:: ToOwned ;
2830use crate :: boxed:: Box ;
@@ -206,7 +208,7 @@ impl<T> [T] {
206208 where
207209 T : Ord ,
208210 {
209- merge_sort ( self , T :: lt) ;
211+ stable_sort ( self , T :: lt) ;
210212 }
211213
212214 /// Sorts the slice with a comparator function.
@@ -262,7 +264,7 @@ impl<T> [T] {
262264 where
263265 F : FnMut ( & T , & T ) -> Ordering ,
264266 {
265- merge_sort ( self , |a, b| compare ( a, b) == Less ) ;
267+ stable_sort ( self , |a, b| compare ( a, b) == Less ) ;
266268 }
267269
268270 /// Sorts the slice with a key extraction function.
@@ -305,7 +307,7 @@ impl<T> [T] {
305307 F : FnMut ( & T ) -> K ,
306308 K : Ord ,
307309 {
308- merge_sort ( self , |a, b| f ( a) . lt ( & f ( b) ) ) ;
310+ stable_sort ( self , |a, b| f ( a) . lt ( & f ( b) ) ) ;
309311 }
310312
311313 /// Sorts the slice with a key extraction function.
@@ -812,324 +814,52 @@ impl<T: Clone> ToOwned for [T] {
812814// Sorting
813815////////////////////////////////////////////////////////////////////////////////
814816
815- /// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
816- ///
817- /// This is the integral subroutine of insertion sort.
818- #[ cfg( not( no_global_oom_handling) ) ]
819- fn insert_head < T , F > ( v : & mut [ T ] , is_less : & mut F )
820- where
821- F : FnMut ( & T , & T ) -> bool ,
822- {
823- if v. len ( ) >= 2 && is_less ( & v[ 1 ] , & v[ 0 ] ) {
824- unsafe {
825- // There are three ways to implement insertion here:
826- //
827- // 1. Swap adjacent elements until the first one gets to its final destination.
828- // However, this way we copy data around more than is necessary. If elements are big
829- // structures (costly to copy), this method will be slow.
830- //
831- // 2. Iterate until the right place for the first element is found. Then shift the
832- // elements succeeding it to make room for it and finally place it into the
833- // remaining hole. This is a good method.
834- //
835- // 3. Copy the first element into a temporary variable. Iterate until the right place
836- // for it is found. As we go along, copy every traversed element into the slot
837- // preceding it. Finally, copy data from the temporary variable into the remaining
838- // hole. This method is very good. Benchmarks demonstrated slightly better
839- // performance than with the 2nd method.
840- //
841- // All methods were benchmarked, and the 3rd showed best results. So we chose that one.
842- let tmp = mem:: ManuallyDrop :: new ( ptr:: read ( & v[ 0 ] ) ) ;
843-
844- // Intermediate state of the insertion process is always tracked by `hole`, which
845- // serves two purposes:
846- // 1. Protects integrity of `v` from panics in `is_less`.
847- // 2. Fills the remaining hole in `v` in the end.
848- //
849- // Panic safety:
850- //
851- // If `is_less` panics at any point during the process, `hole` will get dropped and
852- // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
853- // initially held exactly once.
854- let mut hole = InsertionHole { src : & * tmp, dest : & mut v[ 1 ] } ;
855- ptr:: copy_nonoverlapping ( & v[ 1 ] , & mut v[ 0 ] , 1 ) ;
856-
857- for i in 2 ..v. len ( ) {
858- if !is_less ( & v[ i] , & * tmp) {
859- break ;
860- }
861- ptr:: copy_nonoverlapping ( & v[ i] , & mut v[ i - 1 ] , 1 ) ;
862- hole. dest = & mut v[ i] ;
863- }
864- // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
865- }
866- }
867-
868- // When dropped, copies from `src` into `dest`.
869- struct InsertionHole < T > {
870- src : * const T ,
871- dest : * mut T ,
872- }
873-
874- impl < T > Drop for InsertionHole < T > {
875- fn drop ( & mut self ) {
876- unsafe {
877- ptr:: copy_nonoverlapping ( self . src , self . dest , 1 ) ;
878- }
879- }
880- }
881- }
882-
883- /// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
884- /// stores the result into `v[..]`.
885- ///
886- /// # Safety
887- ///
888- /// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough
889- /// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type.
890- #[ cfg( not( no_global_oom_handling) ) ]
891- unsafe fn merge < T , F > ( v : & mut [ T ] , mid : usize , buf : * mut T , is_less : & mut F )
892- where
893- F : FnMut ( & T , & T ) -> bool ,
894- {
895- let len = v. len ( ) ;
896- let v = v. as_mut_ptr ( ) ;
897- let ( v_mid, v_end) = unsafe { ( v. add ( mid) , v. add ( len) ) } ;
898-
899- // The merge process first copies the shorter run into `buf`. Then it traces the newly copied
900- // run and the longer run forwards (or backwards), comparing their next unconsumed elements and
901- // copying the lesser (or greater) one into `v`.
902- //
903- // As soon as the shorter run is fully consumed, the process is done. If the longer run gets
904- // consumed first, then we must copy whatever is left of the shorter run into the remaining
905- // hole in `v`.
906- //
907- // Intermediate state of the process is always tracked by `hole`, which serves two purposes:
908- // 1. Protects integrity of `v` from panics in `is_less`.
909- // 2. Fills the remaining hole in `v` if the longer run gets consumed first.
910- //
911- // Panic safety:
912- //
913- // If `is_less` panics at any point during the process, `hole` will get dropped and fill the
914- // hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every
915- // object it initially held exactly once.
916- let mut hole;
917-
918- if mid <= len - mid {
919- // The left run is shorter.
920- unsafe {
921- ptr:: copy_nonoverlapping ( v, buf, mid) ;
922- hole = MergeHole { start : buf, end : buf. add ( mid) , dest : v } ;
923- }
924-
925- // Initially, these pointers point to the beginnings of their arrays.
926- let left = & mut hole. start ;
927- let mut right = v_mid;
928- let out = & mut hole. dest ;
929-
930- while * left < hole. end && right < v_end {
931- // Consume the lesser side.
932- // If equal, prefer the left run to maintain stability.
933- unsafe {
934- let to_copy = if is_less ( & * right, & * * left) {
935- get_and_increment ( & mut right)
936- } else {
937- get_and_increment ( left)
938- } ;
939- ptr:: copy_nonoverlapping ( to_copy, get_and_increment ( out) , 1 ) ;
940- }
941- }
942- } else {
943- // The right run is shorter.
944- unsafe {
945- ptr:: copy_nonoverlapping ( v_mid, buf, len - mid) ;
946- hole = MergeHole { start : buf, end : buf. add ( len - mid) , dest : v_mid } ;
947- }
948-
949- // Initially, these pointers point past the ends of their arrays.
950- let left = & mut hole. dest ;
951- let right = & mut hole. end ;
952- let mut out = v_end;
953-
954- while v < * left && buf < * right {
955- // Consume the greater side.
956- // If equal, prefer the right run to maintain stability.
957- unsafe {
958- let to_copy = if is_less ( & * right. sub ( 1 ) , & * left. sub ( 1 ) ) {
959- decrement_and_get ( left)
960- } else {
961- decrement_and_get ( right)
962- } ;
963- ptr:: copy_nonoverlapping ( to_copy, decrement_and_get ( & mut out) , 1 ) ;
964- }
965- }
966- }
967- // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
968- // it will now be copied into the hole in `v`.
969-
970- unsafe fn get_and_increment < T > ( ptr : & mut * mut T ) -> * mut T {
971- let old = * ptr;
972- * ptr = unsafe { ptr. add ( 1 ) } ;
973- old
974- }
975-
976- unsafe fn decrement_and_get < T > ( ptr : & mut * mut T ) -> * mut T {
977- * ptr = unsafe { ptr. sub ( 1 ) } ;
978- * ptr
979- }
980-
981- // When dropped, copies the range `start..end` into `dest..`.
982- struct MergeHole < T > {
983- start : * mut T ,
984- end : * mut T ,
985- dest : * mut T ,
986- }
987-
988- impl < T > Drop for MergeHole < T > {
989- fn drop ( & mut self ) {
990- // `T` is not a zero-sized type, and these are pointers into a slice's elements.
991- unsafe {
992- let len = self . end . sub_ptr ( self . start ) ;
993- ptr:: copy_nonoverlapping ( self . start , self . dest , len) ;
994- }
995- }
996- }
997- }
998-
999- /// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail
1000- /// [here](https://github.com/python/cpython/blob/main/Objects/listsort.txt).
1001- ///
1002- /// The algorithm identifies strictly descending and non-descending subsequences, which are called
1003- /// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
1004- /// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are
1005- /// satisfied:
1006- ///
1007- /// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len`
1008- /// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
1009- ///
1010- /// The invariants ensure that the total running time is *O*(*n* \* log(*n*)) worst-case.
817+ #[ inline]
1011818#[ cfg( not( no_global_oom_handling) ) ]
1012- fn merge_sort < T , F > ( v : & mut [ T ] , mut is_less : F )
819+ fn stable_sort < T , F > ( v : & mut [ T ] , mut is_less : F )
1013820where
1014821 F : FnMut ( & T , & T ) -> bool ,
1015822{
1016- // Slices of up to this length get sorted using insertion sort.
1017- const MAX_INSERTION : usize = 20 ;
1018- // Very short runs are extended using insertion sort to span at least this many elements.
1019- const MIN_RUN : usize = 10 ;
1020-
1021- // Sorting has no meaningful behavior on zero-sized types.
1022823 if T :: IS_ZST {
824+ // Sorting has no meaningful behavior on zero-sized types. Do nothing.
1023825 return ;
1024826 }
1025827
1026- let len = v. len ( ) ;
1027-
1028- // Short arrays get sorted in-place via insertion sort to avoid allocations.
1029- if len <= MAX_INSERTION {
1030- if len >= 2 {
1031- for i in ( 0 ..len - 1 ) . rev ( ) {
1032- insert_head ( & mut v[ i..] , & mut is_less) ;
1033- }
1034- }
1035- return ;
1036- }
1037-
1038- // Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it
1039- // shallow copies of the contents of `v` without risking the dtors running on copies if
1040- // `is_less` panics. When merging two sorted runs, this buffer holds a copy of the shorter run,
1041- // which will always have length at most `len / 2`.
1042- let mut buf = Vec :: with_capacity ( len / 2 ) ;
828+ let elem_alloc_fn = |len : usize | -> * mut T {
829+ // SAFETY: Creating the layout is safe as long as merge_sort never calls this with len >
830+ // v.len(). Alloc in general will only be used as 'shadow-region' to store temporary swap
831+ // elements.
832+ unsafe { alloc:: alloc ( alloc:: Layout :: array :: < T > ( len) . unwrap_unchecked ( ) ) as * mut T }
833+ } ;
1043834
1044- // In order to identify natural runs in `v`, we traverse it backwards. That might seem like a
1045- // strange decision, but consider the fact that merges more often go in the opposite direction
1046- // (forwards). According to benchmarks, merging forwards is slightly faster than merging
1047- // backwards. To conclude, identifying runs by traversing backwards improves performance.
1048- let mut runs = vec ! [ ] ;
1049- let mut end = len;
1050- while end > 0 {
1051- // Find the next natural run, and reverse it if it's strictly descending.
1052- let mut start = end - 1 ;
1053- if start > 0 {
1054- start -= 1 ;
1055- unsafe {
1056- if is_less ( v. get_unchecked ( start + 1 ) , v. get_unchecked ( start) ) {
1057- while start > 0 && is_less ( v. get_unchecked ( start) , v. get_unchecked ( start - 1 ) ) {
1058- start -= 1 ;
1059- }
1060- v[ start..end] . reverse ( ) ;
1061- } else {
1062- while start > 0 && !is_less ( v. get_unchecked ( start) , v. get_unchecked ( start - 1 ) )
1063- {
1064- start -= 1 ;
1065- }
1066- }
1067- }
1068- }
1069-
1070- // Insert some more elements into the run if it's too short. Insertion sort is faster than
1071- // merge sort on short sequences, so this significantly improves performance.
1072- while start > 0 && end - start < MIN_RUN {
1073- start -= 1 ;
1074- insert_head ( & mut v[ start..end] , & mut is_less) ;
835+ let elem_dealloc_fn = |buf_ptr : * mut T , len : usize | {
836+ // SAFETY: Creating the layout is safe as long as merge_sort never calls this with len >
837+ // v.len(). The caller must ensure that buf_ptr was created by elem_alloc_fn with the same
838+ // len.
839+ unsafe {
840+ alloc:: dealloc ( buf_ptr as * mut u8 , alloc:: Layout :: array :: < T > ( len) . unwrap_unchecked ( ) ) ;
1075841 }
842+ } ;
1076843
1077- // Push this run onto the stack.
1078- runs. push ( Run { start, len : end - start } ) ;
1079- end = start;
1080-
1081- // Merge some pairs of adjacent runs to satisfy the invariants.
1082- while let Some ( r) = collapse ( & runs) {
1083- let left = runs[ r + 1 ] ;
1084- let right = runs[ r] ;
1085- unsafe {
1086- merge (
1087- & mut v[ left. start ..right. start + right. len ] ,
1088- left. len ,
1089- buf. as_mut_ptr ( ) ,
1090- & mut is_less,
1091- ) ;
1092- }
1093- runs[ r] = Run { start : left. start , len : left. len + right. len } ;
1094- runs. remove ( r + 1 ) ;
844+ let run_alloc_fn = |len : usize | -> * mut sort:: TimSortRun {
845+ // SAFETY: Creating the layout is safe as long as merge_sort never calls this with an
846+ // obscene length or 0.
847+ unsafe {
848+ alloc:: alloc ( alloc:: Layout :: array :: < sort:: TimSortRun > ( len) . unwrap_unchecked ( ) )
849+ as * mut sort:: TimSortRun
1095850 }
1096- }
1097-
1098- // Finally, exactly one run must remain in the stack.
1099- debug_assert ! ( runs. len( ) == 1 && runs[ 0 ] . start == 0 && runs[ 0 ] . len == len) ;
851+ } ;
1100852
1101- // Examines the stack of runs and identifies the next pair of runs to merge. More specifically,
1102- // if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
1103- // algorithm should continue building a new run instead, `None` is returned.
1104- //
1105- // TimSort is infamous for its buggy implementations, as described here:
1106- // http://envisage-project.eu/timsort-specification-and-verification/
1107- //
1108- // The gist of the story is: we must enforce the invariants on the top four runs on the stack.
1109- // Enforcing them on just top three is not sufficient to ensure that the invariants will still
1110- // hold for *all* runs in the stack.
1111- //
1112- // This function correctly checks invariants for the top four runs. Additionally, if the top
1113- // run starts at index 0, it will always demand a merge operation until the stack is fully
1114- // collapsed, in order to complete the sort.
1115- #[ inline]
1116- fn collapse ( runs : & [ Run ] ) -> Option < usize > {
1117- let n = runs. len ( ) ;
1118- if n >= 2
1119- && ( runs[ n - 1 ] . start == 0
1120- || runs[ n - 2 ] . len <= runs[ n - 1 ] . len
1121- || ( n >= 3 && runs[ n - 3 ] . len <= runs[ n - 2 ] . len + runs[ n - 1 ] . len )
1122- || ( n >= 4 && runs[ n - 4 ] . len <= runs[ n - 3 ] . len + runs[ n - 2 ] . len ) )
1123- {
1124- if n >= 3 && runs[ n - 3 ] . len < runs[ n - 1 ] . len { Some ( n - 3 ) } else { Some ( n - 2 ) }
1125- } else {
1126- None
853+ let run_dealloc_fn = |buf_ptr : * mut sort:: TimSortRun , len : usize | {
854+ // SAFETY: The caller must ensure that buf_ptr was created by elem_alloc_fn with the same
855+ // len.
856+ unsafe {
857+ alloc:: dealloc (
858+ buf_ptr as * mut u8 ,
859+ alloc:: Layout :: array :: < sort:: TimSortRun > ( len) . unwrap_unchecked ( ) ,
860+ ) ;
1127861 }
1128- }
862+ } ;
1129863
1130- #[ derive( Clone , Copy ) ]
1131- struct Run {
1132- start : usize ,
1133- len : usize ,
1134- }
864+ sort:: merge_sort ( v, & mut is_less, elem_alloc_fn, elem_dealloc_fn, run_alloc_fn, run_dealloc_fn) ;
1135865}
0 commit comments