@@ -117,6 +117,90 @@ pub unsafe fn swap<T>(x: *mut T, y: *mut T) {
117117 mem:: forget ( tmp) ;
118118}
119119
120+ /// Swaps a sequence of values at two mutable locations of the same type.
121+ ///
122+ /// # Safety
123+ ///
124+ /// The two arguments must each point to the beginning of `count` locations
125+ /// of valid memory, and the two memory ranges must not overlap.
126+ ///
127+ /// # Examples
128+ ///
129+ /// Basic usage:
130+ ///
131+ /// ```
132+ /// #![feature(swap_nonoverlapping)]
133+ ///
134+ /// use std::ptr;
135+ ///
136+ /// let mut x = [1, 2, 3, 4];
137+ /// let mut y = [7, 8, 9];
138+ ///
139+ /// unsafe {
140+ /// ptr::swap_nonoverlapping(x.as_mut_ptr(), y.as_mut_ptr(), 2);
141+ /// }
142+ ///
143+ /// assert_eq!(x, [7, 8, 3, 4]);
144+ /// assert_eq!(y, [1, 2, 9]);
145+ /// ```
146+ #[ inline]
147+ #[ unstable( feature = "swap_nonoverlapping" , issue = "42818" ) ]
148+ pub unsafe fn swap_nonoverlapping < T > ( x : * mut T , y : * mut T , count : usize ) {
149+ let x = x as * mut u8 ;
150+ let y = y as * mut u8 ;
151+ let len = mem:: size_of :: < T > ( ) * count;
152+ swap_nonoverlapping_bytes ( x, y, len)
153+ }
154+
155+ #[ inline]
156+ unsafe fn swap_nonoverlapping_bytes ( x : * mut u8 , y : * mut u8 , len : usize ) {
157+ // The approach here is to utilize simd to swap x & y efficiently. Testing reveals
158+ // that swapping either 32 bytes or 64 bytes at a time is most efficient for intel
159+ // Haswell E processors. LLVM is more able to optimize if we give a struct a
160+ // #[repr(simd)], even if we don't actually use this struct directly.
161+ //
162+ // FIXME repr(simd) broken on emscripten and redox
163+ #[ cfg_attr( not( any( target_os = "emscripten" , target_os = "redox" ) ) , repr( simd) ) ]
164+ struct Block ( u64 , u64 , u64 , u64 ) ;
165+ struct UnalignedBlock ( u64 , u64 , u64 , u64 ) ;
166+
167+ let block_size = mem:: size_of :: < Block > ( ) ;
168+
169+ // Loop through x & y, copying them `Block` at a time
170+ // The optimizer should unroll the loop fully for most types
171+ // N.B. We can't use a for loop as the `range` impl calls `mem::swap` recursively
172+ let mut i = 0 ;
173+ while i + block_size <= len {
174+ // Create some uninitialized memory as scratch space
175+ // Declaring `t` here avoids aligning the stack when this loop is unused
176+ let mut t: Block = mem:: uninitialized ( ) ;
177+ let t = & mut t as * mut _ as * mut u8 ;
178+ let x = x. offset ( i as isize ) ;
179+ let y = y. offset ( i as isize ) ;
180+
181+ // Swap a block of bytes of x & y, using t as a temporary buffer
182+ // This should be optimized into efficient SIMD operations where available
183+ copy_nonoverlapping ( x, t, block_size) ;
184+ copy_nonoverlapping ( y, x, block_size) ;
185+ copy_nonoverlapping ( t, y, block_size) ;
186+ i += block_size;
187+ }
188+
189+ if i < len {
190+ // Swap any remaining bytes
191+ let mut t: UnalignedBlock = mem:: uninitialized ( ) ;
192+ let rem = len - i;
193+
194+ let t = & mut t as * mut _ as * mut u8 ;
195+ let x = x. offset ( i as isize ) ;
196+ let y = y. offset ( i as isize ) ;
197+
198+ copy_nonoverlapping ( x, t, rem) ;
199+ copy_nonoverlapping ( y, x, rem) ;
200+ copy_nonoverlapping ( t, y, rem) ;
201+ }
202+ }
203+
120204/// Replaces the value at `dest` with `src`, returning the old
121205/// value, without dropping either.
122206///
0 commit comments