Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions diskann-wide/src/arch/emulated/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ impl Architecture for Scalar {

// SAFETY: It's always safe to construct the `Scalar` architecture. Additionally,
// since `Scalar` is a `Copy` zero-sized type, it is safe to wink into existence
// and is ABI compattible with `Hidden`.
// and is ABI compatible with `Hidden`.
unsafe { arch::hide1(f) }
}

Expand All @@ -147,7 +147,7 @@ impl Architecture for Scalar {

// SAFETY: It's always safe to construct the `Scalar` architecture. Additionally,
// since `Scalar` is a `Copy` zero-sized type, it is safe to wink into existence
// and is ABI compattible with `Hidden`.
// and is ABI compatible with `Hidden`.
unsafe { arch::hide2(f) }
}

Expand All @@ -163,7 +163,7 @@ impl Architecture for Scalar {

// SAFETY: It's always safe to construct the `Scalar` architecture. Additionally,
// since `Scalar` is a `Copy` zero-sized type, it is safe to wink into existence
// and is ABI compattible with `Hidden`.
// and is ABI compatible with `Hidden`.
unsafe { arch::hide3(f) }
}
}
Expand Down
40 changes: 20 additions & 20 deletions diskann-wide/src/arch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@
//! The consequence of this is that we need to take an **unsafe** function pointer so we
//! can dispatch call directly to the implementation.
//!
//! Reason 2: Even if the above approach worked, the [`Architecture`] is sill present in the
//! Reason 2: Even if the above approach worked, the [`Architecture`] is still present in the
//! signature of the `fn`, meaning we haven't really hidden the micro-architecture
//! information.
//!
Expand Down Expand Up @@ -690,17 +690,17 @@ pub trait Architecture: sealed::Sealed {
/// Run the provided closure targeting this architecture.
///
/// This function is always safe to call, but the function `f` likely needs to be
/// inlined into `run` in for the correct target features to be applied.
/// inlined into `run` for the correct target features to be applied.
fn run<F, R>(self, f: F) -> R
where
F: Target<Self, R>;

/// Run the provided closure targeting this architecture with an inlining hint.
///
/// This function is always safe to call, but the function `f` likely needs to be
/// inlined into `run` in for the correct target features to be applied.
/// inlined into `run` for the correct target features to be applied.
///
/// Note that although an inline hint is applied, it is not a guaranteed that this call
/// Note that although an inline hint is applied, it is not guaranteed that this call
/// will be inlined due to the interaction of `target_features`. If you really need `F`
/// to be inlined, you can call its `Target` method directly, but care must be taken
/// because this will not reapply `target_features`.
Expand All @@ -711,7 +711,7 @@ pub trait Architecture: sealed::Sealed {
/// Run the provided closure targeting this architecture with an additional argument.
///
/// This function is always safe to call, but the function `f` likely needs to be
/// inlined into `run` in for the correct target features to be applied.
/// inlined into `run` for the correct target features to be applied.
fn run1<F, T0, R>(self, f: F, x0: T0) -> R
where
F: Target1<Self, R, T0>;
Expand All @@ -720,9 +720,9 @@ pub trait Architecture: sealed::Sealed {
/// an inlining hint.
///
/// This function is always safe to call, but the function `f` likely needs to be
/// inlined into `run` in for the correct target features to be applied.
/// inlined into `run` for the correct target features to be applied.
///
/// Note that although an inline hint is applied, it is not a guaranteed that this call
/// Note that although an inline hint is applied, it is not guaranteed that this call
/// will be inlined due to the interaction of `target_features`. If you really need `F`
/// to be inlined, you can call its `Target1` method directly, but care must be taken
/// because this will not reapply `target_features`.
Expand All @@ -733,7 +733,7 @@ pub trait Architecture: sealed::Sealed {
/// Run the provided closure targeting this architecture with two additional arguments.
///
/// This function is always safe to call, but the function `f` likely needs to be
/// inlined into `run` in for the correct target features to be applied.
/// inlined into `run` for the correct target features to be applied.
fn run2<F, T0, T1, R>(self, f: F, x0: T0, x1: T1) -> R
where
F: Target2<Self, R, T0, T1>;
Expand All @@ -742,9 +742,9 @@ pub trait Architecture: sealed::Sealed {
/// and an inlining hint.
///
/// This function is always safe to call, but the function `f` likely needs to be
/// inlined into `run` in for the correct target features to be applied.
/// inlined into `run` for the correct target features to be applied.
///
/// Note that although an inline hint is applied, it is not a guaranteed that this call
/// Note that although an inline hint is applied, it is not guaranteed that this call
/// will be inlined due to the interaction of `target_features`. If you really need `F`
/// to be inlined, you can call its `Target2` method directly, but care must be taken
/// because this will not reapply `target_features`.
Expand All @@ -755,7 +755,7 @@ pub trait Architecture: sealed::Sealed {
/// Run the provided closure targeting this architecture with three additional arguments.
///
/// This function is always safe to call, but the function `f` likely needs to be
/// inlined into `run` in for the correct target features to be applied.
/// inlined into `run` for the correct target features to be applied.
fn run3<F, T0, T1, T2, R>(self, f: F, x0: T0, x1: T1, x2: T2) -> R
where
F: Target3<Self, R, T0, T1, T2>;
Expand All @@ -764,9 +764,9 @@ pub trait Architecture: sealed::Sealed {
/// and an inlining hint.
///
/// This function is always safe to call, but the function `f` likely needs to be
/// inlined into `run` in for the correct target features to be applied.
/// inlined into `run` for the correct target features to be applied.
///
/// Note that although an inline hint is applied, it is not a guaranteed that this call
/// Note that although an inline hint is applied, it is not guaranteed that this call
/// will be inlined due to the interaction of `target_features`. If you really need `F`
/// to be inlined, you can call its `Target3` method directly, but care must be taken
/// because this will not reapply `target_features`.
Expand Down Expand Up @@ -893,7 +893,7 @@ where

/// A variation of [`Target1`] that uses an associated function instead of a method.
///
/// This is useful used in the function pointer API.
/// This is used in the function pointer API.
pub trait FTarget1<A, R, T0>
where
A: Architecture,
Expand All @@ -903,7 +903,7 @@ where

/// A variation of [`Target2`] that uses an associated function instead of a method.
///
/// This is useful used in the function pointer API.
/// This is used in the function pointer API.
pub trait FTarget2<A, R, T0, T1>
where
A: Architecture,
Expand All @@ -913,7 +913,7 @@ where

/// A variation of [`Target3`] that uses an associated function instead of a method.
///
/// This is useful used in the function pointer API.
/// This is used in the function pointer API.
pub trait FTarget3<A, R, T0, T1, T2>
where
A: Architecture,
Expand Down Expand Up @@ -994,7 +994,7 @@ const _ASSERT_ALIGNED: () = assert!(
macro_rules! dispatched {
($name:ident, { $($Ts:ident )* }, { $($xs:ident )* }, { $($lt:lifetime )* }) => {
/// A function pointer that calls directly into a micro-architecture optimized
/// function, returning a value of type `R` and accepting the speficied number of
/// function, returning a value of type `R` and accepting the specified number of
/// arguments.
///
/// Arguments are mapped using the [`AddLifetime`] trait to enable passing structs
Expand Down Expand Up @@ -1065,7 +1065,7 @@ dispatched!(Dispatched1, { T0 }, { x0 }, { 'a0 });
dispatched!(Dispatched2, { T0 T1 }, { x0 x1 }, { 'a0 'a1 });
dispatched!(Dispatched3, { T0 T1 T2 }, { x0 x1 x2 }, { 'a0 'a1 'a2 });

/// This macro stamps out the function-pointer tranmute trick we use to type-erase
/// This macro stamps out the function-pointer transmute trick we use to type-erase
/// architecture in the function-pointer API.
macro_rules! hide {
($name:ident, $dispatched:ident, { $($Ts:ident )* }) => {
Expand All @@ -1082,9 +1082,9 @@ macro_rules! hide {
/// We can do this because Rust guarantees that zero sized types are ABI
/// compatible.
///
/// The caller must ensure that winking into existance and instance of `A` is
/// The caller must ensure that winking into existence an instance of `A` is
/// a safe operation. For [`Architectures`], this means that the requirements
/// of `A::new()` are uphelf.
/// of `A::new()` are upheld.
///
/// Put plainly:
///
Expand Down
2 changes: 1 addition & 1 deletion diskann-wide/src/arch/x86_64/algorithms.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ pub(crate) unsafe fn __load_first_of_16_bytes(arch: V3, ptr: *const u8, first: u
// SAFETY:
// * Pointer Cast: The instruction `_mm_loadu_si128` does not have any alignment
// restrictions, so if `[ptr, ptr + first)` is valid, the cast will be valid.
// * `_mm_loadu_si128`: Use of the intrinsic is gated by the `cfg` macro.
// * `_mm_loadu_si128`: The intrinsic requires SSE2, implied by V3.
// The load is valid since the caller passed a value greater than 16.
// *`__m128i` and `u128` are both the same size, do not own any resources, and are
// valid for all bit patterns.
Expand Down
6 changes: 3 additions & 3 deletions diskann-wide/src/arch/x86_64/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,21 @@ pub(crate) trait AllOnes {

impl AllOnes for __m128i {
fn all_ones() -> Self {
// SAFETY: Gated by CFG
// SAFETY: `_mm_set1_epi32` requires SSE2, which is baseline for x86_64.
unsafe { _mm_set1_epi32(-1) }
}
}

impl AllOnes for __m256i {
fn all_ones() -> Self {
// SAFETY: Gated by CFG
// SAFETY: `_mm256_set1_epi32` requires AVX, implied by the caller's architecture.
unsafe { _mm256_set1_epi32(-1) }
}
}

impl AllOnes for __m512i {
fn all_ones() -> Self {
// SAFETY: Gated by CFG
// SAFETY: `_mm512_set1_epi32` requires AVX-512F, implied by the caller's architecture.
unsafe { _mm512_set1_epi32(-1) }
}
}
12 changes: 6 additions & 6 deletions diskann-wide/src/arch/x86_64/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ macro_rules! x86_define_register {
#[inline(always)]
fn to_array(self) -> [$scalar; $lanes] {
// SAFETY: Provided the scalar type is an integer or floating point,
// then all bit pattens are valid between source and destination types.
// then all bit patterns are valid between source and destination types.
// (provided an x86 intrinsic is one of the transmuted types).
//
// The source argument is taken by value (no reference conversion) and
Expand All @@ -79,7 +79,7 @@ macro_rules! x86_define_register {
#[inline(always)]
fn from_array(_: $arch, x: [$scalar; $lanes]) -> Self {
// SAFETY: Provided the scalar type is an integer or floating point,
// then all bit pattens are valid between source and destination types.
// then all bit patterns are valid between source and destination types.
// (provided an x86 intrinsic is one of the transmuted types).
//
// The source argument is taken by value (no reference conversion) and
Expand Down Expand Up @@ -208,7 +208,7 @@ macro_rules! x86_retarget {
/// Utility macro for defining `X86Splat`.
///
/// SAFETY: It is the invoker's responsibility to ensure that the intrinsic is safe to call.
/// That is - any intrinsics invoked must be compatbiel with `$type`'s associated architecture.
/// That is - any intrinsics invoked must be compatible with `$type`'s associated architecture.
macro_rules! x86_define_splat {
($type:ty, $intrinsic:expr, $requires:literal) => {
impl X86Splat for $type {
Expand Down Expand Up @@ -241,7 +241,7 @@ macro_rules! x86_define_splat {
/// Utility macro for defining `X86Default`.
///
/// SAFETY: It is the invoker's responsibility to ensure that the intrinsic is safe to call.
/// That is - any intrinsics invoked must be compatbiel with `$type`'s associated architecture.
/// That is - any intrinsics invoked must be compatible with `$type`'s associated architecture.
macro_rules! x86_define_default {
($type:ty, $intrinsic:expr, $requires:literal) => {
impl X86Default for $type {
Expand All @@ -258,9 +258,9 @@ macro_rules! x86_define_default {
}

/// SAFETY: It is the invoker's responsibility to ensure that the provided intrinsics are
/// safe to call. T
/// safe to call.
///
/// hat is - any intrinsics invoked must be compatbiel with `$type`'s associated architecture.
/// That is - any intrinsics invoked must be compatible with `$type`'s associated architecture.
macro_rules! x86_splitjoin {
(__m512i, $type:path, $half:path) => {
impl $crate::SplitJoin for $type {
Expand Down
7 changes: 4 additions & 3 deletions diskann-wide/src/arch/x86_64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,9 @@ cfg_if::cfg_if! {
// We cache a single enum and use it to indicate the version with the following meaning:
//
// 0: Uninitialized
// 1: V3
// 2 and above: Scalar
// 1: Scalar
// 2: V3
// 3: V4
static ARCH_NUMBER: AtomicU64 = AtomicU64::new(ARCH_UNINITIALIZED);

// NOTE: Architecture must be properly nested in ascending order so compatibility checks
Expand Down Expand Up @@ -445,7 +446,7 @@ mod tests {
// These tests reach directly into the dispatch mechanism.
//
// There should only be a single test (this one) that does this, and all other tests
// involving dispatch should either be configured to work properly regarless of the
// involving dispatch should either be configured to work properly regardless of the
// backend architecture, or be run in their own process.
#[test]
fn test_dispatch() {
Expand Down
2 changes: 1 addition & 1 deletion diskann-wide/src/arch/x86_64/v3/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ helpers::unsafe_map_conversion!(i8x16, i16x16, _mm256_cvtepi8_epi16, "avx2");
helpers::unsafe_map_conversion!(u8x16, i16x16, _mm256_cvtepu8_epi16, "avx2");

// i32 to f32
helpers::unsafe_map_cast!(i32x8 => (f32, f32x8), _mm256_cvtepi32_ps, "avx2");
helpers::unsafe_map_cast!(i32x8 => (f32, f32x8), _mm256_cvtepi32_ps, "avx");

//////////////////
// Reinterprets //
Expand Down
6 changes: 3 additions & 3 deletions diskann-wide/src/arch/x86_64/v3/f16x16_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ impl X86Splat for f16x16 {
// (1) .to_bits() -> Returns the underlying `u16` from the `f16`.
// (2) as i16 -> Bit-cast to `i16` to give to the intrinsic.
//
// SAFETY: `_mm256_set1_epi` requires AVX - implied by V3.
// SAFETY: `_mm256_set1_epi16` requires AVX - implied by V3.
Self(unsafe { _mm256_set1_epi16(value.to_bits() as i16) })
}
}
Expand All @@ -56,7 +56,7 @@ impl X86LoadStore for f16x16 {
unsafe fn load_simd(_: V3, ptr: *const f16) -> Self {
// SAFETY: Pointer access guaranteed by caller.
//
// `_mm256_loadu_si256` requires AVX - implied by V4.
// `_mm256_loadu_si256` requires AVX - implied by V3.
Self(unsafe { _mm256_loadu_si256(ptr as *const Self::Underlying) })
}

Expand All @@ -80,7 +80,7 @@ impl X86LoadStore for f16x16 {
unsafe fn store_simd(self, ptr: *mut f16) {
// SAFETY: Pointer access guaranteed by caller.
//
// `_mm256_storeu_si256` requires AVX - implied by V4.
// `_mm256_storeu_si256` requires AVX - implied by V3.
unsafe { _mm256_storeu_si256(ptr as *mut Self::Underlying, self.to_underlying()) }
}

Expand Down
3 changes: 1 addition & 2 deletions diskann-wide/src/arch/x86_64/v3/f16x8_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@ impl X86Splat for f16x8 {
// (1) .to_bits() -> Returns the underlying `u16` from the `f16`.
// (2) as i16 -> Bit-cast to `i16` to give to the intrinsic.
//
// SAFETY: Safe invocation of this function is gated by the CFG macro conditionally
// compiling this implementation.
// SAFETY: `_mm_set1_epi16` requires SSE2, implied by V3.
Self(unsafe { _mm_set1_epi16(value.to_bits() as i16) })
}
}
Expand Down
17 changes: 9 additions & 8 deletions diskann-wide/src/arch/x86_64/v3/f32x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ helpers::unsafe_map_binary_op!(f32x4, std::ops::Mul, mul, _mm_mul_ps, "sse");
impl f32x4 {
#[inline(always)]
fn is_nan(self) -> mask32x4 {
// NOTE: `_CMP_UNORD_Q` returns `true` only if both arguments are NAN.
// NOTE: `_CMP_UNORD_Q` returns `true` if either argument is NaN. Since we compare
// `self` with `self`, this returns `true` exactly when `self` is NaN.
mask32x4::from_underlying(
self.arch(),
// SAFETY: `_mm_castps_si128` requires SSE2 and `_mm_cmp_ps` requires AVX,
Expand Down Expand Up @@ -133,14 +134,14 @@ impl X86LoadStore for f32x4 {
impl SIMDPartialEq for f32x4 {
#[inline(always)]
fn eq_simd(self, other: Self) -> Self::Mask {
// SAFETY: Gated by CFG
// SAFETY: `_mm_castps_si128` and `_mm_cmp_ps` require AVX, implied by V3.
let m = unsafe { _mm_castps_si128(_mm_cmp_ps(self.0, other.0, _CMP_EQ_OQ)) };
Self::Mask::from_underlying(self.arch(), m)
}

#[inline(always)]
fn ne_simd(self, other: Self) -> Self::Mask {
// SAFETY: Gated by CFG
// SAFETY: `_mm_castps_si128` and `_mm_cmp_ps` require AVX, implied by V3.
let m = unsafe { _mm_castps_si128(_mm_cmp_ps(self.0, other.0, _CMP_NEQ_UQ)) };
Self::Mask::from_underlying(self.arch(), m)
}
Expand All @@ -149,28 +150,28 @@ impl SIMDPartialEq for f32x4 {
impl SIMDPartialOrd for f32x4 {
#[inline(always)]
fn lt_simd(self, other: Self) -> Self::Mask {
// SAFETY: Gated by CFG.
// SAFETY: `_mm_castps_si128` and `_mm_cmp_ps` require AVX, implied by V3.
let m = unsafe { _mm_castps_si128(_mm_cmp_ps(self.0, other.0, _CMP_LT_OQ)) };
Self::Mask::from_underlying(self.arch(), m)
}

#[inline(always)]
fn le_simd(self, other: Self) -> Self::Mask {
// SAFETY: Gated by CFG.
// SAFETY: `_mm_castps_si128` and `_mm_cmp_ps` require AVX, implied by V3.
let m = unsafe { _mm_castps_si128(_mm_cmp_ps(self.0, other.0, _CMP_LE_OQ)) };
Self::Mask::from_underlying(self.arch(), m)
}

#[inline(always)]
fn gt_simd(self, other: Self) -> Self::Mask {
// SAFETY: Gated by CFG.
// SAFETY: `_mm_castps_si128` and `_mm_cmp_ps` require AVX, implied by V3.
let m = unsafe { _mm_castps_si128(_mm_cmp_ps(self.0, other.0, _CMP_GT_OQ)) };
Self::Mask::from_underlying(self.arch(), m)
}

#[inline(always)]
fn ge_simd(self, other: Self) -> Self::Mask {
// SAFETY: Gated by CFG.
// SAFETY: `_mm_castps_si128` and `_mm_cmp_ps` require AVX, implied by V3.
let m = unsafe { _mm_castps_si128(_mm_cmp_ps(self.0, other.0, _CMP_GE_OQ)) };
Self::Mask::from_underlying(self.arch(), m)
}
Expand All @@ -180,7 +181,7 @@ impl SIMDSumTree for f32x4 {
#[inline(always)]
fn sum_tree(self) -> f32 {
let x = self.to_underlying();
// SAFETY: Gated by CFG.
// SAFETY: These intrinsics require SSE, implied by V3.
unsafe {
// loDual = ( -, -, x1, x0 )
let lo_dual = x;
Expand Down
Loading
Loading