|
7 | 7 | #![stable(feature = "rust1", since = "1.0.0")] |
8 | 8 |
|
9 | 9 | use crate::cmp::Ordering::{self, Equal, Greater, Less}; |
10 | | -use crate::intrinsics::{exact_div, unchecked_sub}; |
| 10 | +use crate::intrinsics::{exact_div, select_unpredictable, unchecked_sub}; |
11 | 11 | use crate::mem::{self, SizedTypeProperties}; |
12 | 12 | use crate::num::NonZero; |
13 | 13 | use crate::ops::{Bound, OneSidedRange, Range, RangeBounds}; |
@@ -2770,41 +2770,54 @@ impl<T> [T] { |
2770 | 2770 | where |
2771 | 2771 | F: FnMut(&'a T) -> Ordering, |
2772 | 2772 | { |
2773 | | - // INVARIANTS: |
2774 | | - // - 0 <= left <= left + size = right <= self.len() |
2775 | | - // - f returns Less for everything in self[..left] |
2776 | | - // - f returns Greater for everything in self[right..] |
2777 | 2773 | let mut size = self.len(); |
2778 | | - let mut left = 0; |
2779 | | - let mut right = size; |
2780 | | - while left < right { |
2781 | | - let mid = left + size / 2; |
2782 | | - |
2783 | | - // SAFETY: the while condition means `size` is strictly positive, so |
2784 | | - // `size/2 < size`. Thus `left + size/2 < left + size`, which |
2785 | | - // coupled with the `left + size <= self.len()` invariant means |
2786 | | - // we have `left + size/2 < self.len()`, and this is in-bounds. |
| 2774 | + if size == 0 { |
| 2775 | + return Err(0); |
| 2776 | + } |
| 2777 | + let mut base = 0usize; |
| 2778 | + |
| 2779 | + // This loop intentionally doesn't have an early exit if the comparison |
| 2780 | + // returns Equal. We want the number of loop iterations to depend *only* |
| 2781 | + // on the size of the input slice so that the CPU can reliably predict |
| 2782 | + // the loop count. |
| 2783 | + while size > 1 { |
| 2784 | + let half = size / 2; |
| 2785 | + let mid = base + half; |
| 2786 | + |
| 2787 | + // SAFETY: the call is made safe by the following inconstants: |
| 2788 | + // - `mid >= 0`: by definition |
| 2789 | + // - `mid < size`: `mid = size / 2 + size / 4 + size / 8 ...` |
2787 | 2790 | let cmp = f(unsafe { self.get_unchecked(mid) }); |
2788 | 2791 |
|
2789 | | - // This control flow produces conditional moves, which results in |
2790 | | - // fewer branches and instructions than if/else or matching on |
2791 | | - // cmp::Ordering. |
2792 | | - // This is x86 asm for u8: https://rust.godbolt.org/z/698eYffTx. |
2793 | | - left = if cmp == Less { mid + 1 } else { left }; |
2794 | | - right = if cmp == Greater { mid } else { right }; |
2795 | | - if cmp == Equal { |
2796 | | - // SAFETY: same as the `get_unchecked` above |
2797 | | - unsafe { hint::assert_unchecked(mid < self.len()) }; |
2798 | | - return Ok(mid); |
2799 | | - } |
2800 | | - |
2801 | | - size = right - left; |
| 2792 | + // Binary search interacts poorly with branch prediction, so force |
| 2793 | + // the compiler to use conditional moves if supported by the target |
| 2794 | + // architecture. |
| 2795 | + base = select_unpredictable(cmp == Greater, base, mid); |
| 2796 | + |
| 2797 | + // This is imprecise in the case where `size` is odd and the |
| 2798 | + // comparison returns Greater: the mid element still gets included |
| 2799 | + // by `size` even though it's known to be larger than the element |
| 2800 | + // being searched for. |
| 2801 | + // |
| 2802 | + // This is fine though: we gain more performance by keeping the |
| 2803 | + // loop iteration count invariant (and thus predictable) than we |
| 2804 | + // lose from considering one additional element. |
| 2805 | + size -= half; |
2802 | 2806 | } |
2803 | 2807 |
|
2804 | | - // SAFETY: directly true from the overall invariant. |
2805 | | - // Note that this is `<=`, unlike the assume in the `Ok` path. |
2806 | | - unsafe { hint::assert_unchecked(left <= self.len()) }; |
2807 | | - Err(left) |
| 2808 | + // SAFETY: base is always in [0, size) because base <= mid. |
| 2809 | + let cmp = f(unsafe { self.get_unchecked(base) }); |
| 2810 | + if cmp == Equal { |
| 2811 | + // SAFETY: same as the `get_unchecked` above. |
| 2812 | + unsafe { hint::assert_unchecked(base < self.len()) }; |
| 2813 | + Ok(base) |
| 2814 | + } else { |
| 2815 | + let result = base + (cmp == Less) as usize; |
| 2816 | + // SAFETY: same as the `get_unchecked` above. |
| 2817 | + // Note that this is `<=`, unlike the assume in the `Ok` path. |
| 2818 | + unsafe { hint::assert_unchecked(result <= self.len()) }; |
| 2819 | + Err(result) |
| 2820 | + } |
2808 | 2821 | } |
2809 | 2822 |
|
2810 | 2823 | /// Binary searches this slice with a key extraction function. |
|
0 commit comments