Skip to content

Adding the x86 part of behavioural testing for std::arch intrinsics #1814

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 38 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
a17a1b4
fix: updated function definition of "from_c" in IntrinsicTypeDefinition
madhav-madhusoodanan May 30, 2025
c009f9f
Feat: started the skeleton for x86 module, added XML intrinsic parsin…
madhav-madhusoodanan May 30, 2025
c80661a
feat: added functionality to convert XML description of intrinsics to
madhav-madhusoodanan Jun 3, 2025
6729521
Check cfg on features that stage0 compiler support
a4lg May 14, 2025
7f4e962
cmpxchg16b: use atomic_compare_exchange from libcore
RalfJung May 29, 2025
1a95f0e
mark gfni, vaes, vpclmulqdq intrinsics as safe
usamoi May 29, 2025
3dde64e
Fix `ldpte` and `lddir` signature
sayantn May 27, 2025
d07ac79
Fix PPC shift and rotate intrinsics
sayantn May 27, 2025
570a52a
Fix s390x intrinsics
sayantn May 27, 2025
eac6f19
Use the new definition of `rdtscp` intrinsic
sayantn May 27, 2025
19c6da3
Upgrade more intrinsics to the new version
sayantn May 28, 2025
f331ba4
Add back `std_detect_env_override`
sayantn May 22, 2025
e6cb1dc
RISC-V: Linux: Imply Zicntr from the IMA base behavior
a4lg May 26, 2025
3713f07
stdarch-gen-arm: Modernization of the coding style
a4lg May 31, 2025
e04906c
stdarch-gen-loongarch: Modernization of the coding style
a4lg May 31, 2025
bfd4f86
stdarch-test: Modernization of the coding style
a4lg May 31, 2025
4ebbace
stdarch-verify: Modernization of the coding style
a4lg May 31, 2025
a9bd211
stdarch_examples: Modernization of the coding style
a4lg May 31, 2025
254a1d5
intrinsic-test: Modernization of the coding style
a4lg May 31, 2025
461fc9c
Stabilize keylocker intrinsics and runtime detection
sayantn May 7, 2025
7184391
Stabilize `sha512`, `sm3` and `sm4` intrinsics and runtime detection
sayantn May 7, 2025
9705c00
intrinsic-test: Use `c_prefix` to generate type names
a4lg May 31, 2025
3c8250a
intrinsic-test: Reverse `has_constraints()` condition
a4lg May 31, 2025
7ae987a
use `simd_bitreverse` on `aarch64`
folkertdev May 30, 2025
76dec19
RISC-V: Linux 6.15 `riscv_hwprobe` support
a4lg May 31, 2025
ce83907
Revert vbsl[q]_f16 to unstable
adamgemmell May 30, 2025
1039498
Mark Neon f16 vectors as unstable
adamgemmell May 30, 2025
8b6dc9b
Use rust intrinsics for more ARM intrinsics
sayantn Jun 2, 2025
24c7154
Use correct LLVM intrinsic for `vmull` and `vaddv`
sayantn Jun 2, 2025
57c3ea1
Remove uses of deprecated type-specific pointers from ARM
sayantn Jun 2, 2025
63083ba
Fix incorrect intrinsic name in X86
sayantn Jun 2, 2025
3ca10bd
Fix incorrect intrinsic name in WASM
sayantn Jun 2, 2025
ae72698
Stabilize AVX512 intrinsics
Amanieu Jun 2, 2025
9595fde
feat: added the simple set of argument types for X86 intrinsics
madhav-madhusoodanan Jun 8, 2025
6b0d166
feat: added X86IntrinsicType parsing from string.
madhav-madhusoodanan Jun 8, 2025
7047369
fix: removing Box<> types from IntrinsicType in "from_c" definition for
madhav-madhusoodanan Jun 8, 2025
2934899
feat: implemented c_type for X86IntrinsicType
madhav-madhusoodanan Jun 8, 2025
d92ce71
Sharpening the parsing logic:
madhav-madhusoodanan Jun 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/run-docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ run() {
--env NORUN \
--env RUSTFLAGS \
--env CARGO_UNSTABLE_BUILD_STD \
--env RUST_STD_DETECT_UNSTABLE \
--volume "${HOME}/.cargo":/cargo \
--volume "$(rustc --print sysroot)":/rust:ro \
--volume "$(pwd)":/checkout:ro \
Expand Down
2 changes: 1 addition & 1 deletion ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ set -ex
#export RUST_TEST_NOCAPTURE=1
#export RUST_TEST_THREADS=1

export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled "
export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled -Z verify-llvm-ir"
export HOST_RUSTFLAGS="${RUSTFLAGS}"
export PROFILE="${PROFILE:="--profile=release"}"

Expand Down
509 changes: 118 additions & 391 deletions crates/core_arch/src/aarch64/neon/generated.rs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion crates/core_arch/src/aarch64/neon/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub use self::generated::*;
use crate::{
core_arch::{arm_shared::*, simd::*},
hint::unreachable_unchecked,
intrinsics::simd::*,
intrinsics::{simd::*, *},
mem::transmute,
};
#[cfg(test)]
Expand Down
604 changes: 267 additions & 337 deletions crates/core_arch/src/arm_shared/neon/generated.rs

Large diffs are not rendered by default.

13 changes: 9 additions & 4 deletions crates/core_arch/src/arm_shared/neon/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ types! {
pub struct int16x4_t(4 x pub(crate) i16);
/// Arm-specific 64-bit wide vector of four packed `u16`.
pub struct uint16x4_t(4 x pub(crate) u16);
// Arm-specific 64-bit wide vector of four packed `f16`.
pub struct float16x4_t(4 x pub(crate) f16);
/// Arm-specific 64-bit wide vector of four packed `p16`.
pub struct poly16x4_t(4 x pub(crate) p16);
/// Arm-specific 64-bit wide vector of two packed `i32`.
Expand All @@ -89,8 +87,6 @@ types! {
pub struct int16x8_t(8 x pub(crate) i16);
/// Arm-specific 128-bit wide vector of eight packed `u16`.
pub struct uint16x8_t(8 x pub(crate) u16);
// Arm-specific 128-bit wide vector of eight packed `f16`.
pub struct float16x8_t(8 x pub(crate) f16);
/// Arm-specific 128-bit wide vector of eight packed `p16`.
pub struct poly16x8_t(8 x pub(crate) p16);
/// Arm-specific 128-bit wide vector of four packed `i32`.
Expand All @@ -107,6 +103,15 @@ types! {
pub struct poly64x2_t(2 x pub(crate) p64);
}

types! {
#![unstable(feature = "stdarch_neon_f16", issue = "136306")]

/// Arm-specific 64-bit wide vector of four packed `f16`.
pub struct float16x4_t(4 x pub(crate) f16);
/// Arm-specific 128-bit wide vector of eight packed `f16`.
pub struct float16x8_t(8 x pub(crate) f16);
}

/// Arm-specific type containing two `int8x8_t` vectors.
#[repr(C)]
#[derive(Copy, Clone, Debug)]
Expand Down
10 changes: 6 additions & 4 deletions crates/core_arch/src/loongarch64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -329,16 +329,18 @@ pub unsafe fn asrtgt(a: i64, b: i64) {

/// Loads the page table directory entry
#[inline]
#[rustc_legacy_const_generics(1)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn lddir(a: i64, b: i64) -> i64 {
__lddir(a, b)
pub unsafe fn lddir<const B: i64>(a: i64) -> i64 {
__lddir(a, B)
}

/// Loads the page table entry
#[inline]
#[rustc_legacy_const_generics(1)]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn ldpte(a: i64, b: i64) {
__ldpte(a, b)
pub unsafe fn ldpte<const B: i64>(a: i64) {
__ldpte(a, B)
}

/// Calculate the approximate single-precision result of 1.0 divided
Expand Down
18 changes: 9 additions & 9 deletions crates/core_arch/src/powerpc/altivec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,26 +338,26 @@ unsafe extern "C" {
#[link_name = "llvm.ppc.altivec.vlogefp"]
fn vlogefp(a: vector_float) -> vector_float;

#[link_name = "llvm.ppc.altivec.sll"]
#[link_name = "llvm.ppc.altivec.vsl"]
fn vsl(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int;
#[link_name = "llvm.ppc.altivec.slo"]
#[link_name = "llvm.ppc.altivec.vslo"]
fn vslo(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int;

#[link_name = "llvm.ppc.altivec.srab"]
#[link_name = "llvm.ppc.altivec.vsrab"]
fn vsrab(a: vector_signed_char, b: vector_unsigned_char) -> vector_signed_char;
#[link_name = "llvm.ppc.altivec.srah"]
#[link_name = "llvm.ppc.altivec.vsrah"]
fn vsrah(a: vector_signed_short, b: vector_unsigned_short) -> vector_signed_short;
#[link_name = "llvm.ppc.altivec.sraw"]
#[link_name = "llvm.ppc.altivec.vsraw"]
fn vsraw(a: vector_signed_int, b: vector_unsigned_int) -> vector_signed_int;

#[link_name = "llvm.ppc.altivec.srl"]
#[link_name = "llvm.ppc.altivec.vsr"]
fn vsr(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int;
#[link_name = "llvm.ppc.altivec.sro"]
#[link_name = "llvm.ppc.altivec.vsro"]
fn vsro(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int;

#[link_name = "llvm.ppc.altivec.slv"]
#[link_name = "llvm.ppc.altivec.vslv"]
fn vslv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char;
#[link_name = "llvm.ppc.altivec.srv"]
#[link_name = "llvm.ppc.altivec.vsrv"]
fn vsrv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char;

#[link_name = "llvm.fshl.v16i8"]
Expand Down
18 changes: 9 additions & 9 deletions crates/core_arch/src/s390x/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,9 @@ unsafe extern "unadjusted" {
#[link_name = "llvm.s390.vpklsfs"] fn vpklsfs(a: vector_unsigned_int, b: vector_unsigned_int) -> PackedTuple<vector_unsigned_short, i32>;
#[link_name = "llvm.s390.vpklsgs"] fn vpklsgs(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> PackedTuple<vector_unsigned_int, i32>;

#[link_name = "llvm.s390.vuplbw"] fn vuplbw (a: vector_signed_char) -> vector_signed_short;
#[link_name = "llvm.s390.vuplb"] fn vuplb (a: vector_signed_char) -> vector_signed_short;
#[link_name = "llvm.s390.vuplhw"] fn vuplhw (a: vector_signed_short) -> vector_signed_int;
#[link_name = "llvm.s390.vuplfw"] fn vuplfw (a: vector_signed_int) -> vector_signed_long_long;
#[link_name = "llvm.s390.vuplf"] fn vuplf (a: vector_signed_int) -> vector_signed_long_long;
#[link_name = "llvm.s390.vupllb"] fn vupllb (a: vector_unsigned_char) -> vector_unsigned_short;
#[link_name = "llvm.s390.vupllh"] fn vupllh (a: vector_unsigned_short) -> vector_unsigned_int;
#[link_name = "llvm.s390.vupllf"] fn vupllf (a: vector_unsigned_int) -> vector_unsigned_long_long;
Expand Down Expand Up @@ -2581,9 +2581,9 @@ mod sealed {
// FIXME(llvm): a shuffle + simd_as does not currently optimize into a single instruction like
// unpachk above. Tracked in https://github.com/llvm/llvm-project/issues/129576.

impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplbw (vector_signed_char) -> vector_signed_short}
impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplb (vector_signed_char) -> vector_signed_short}
impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplhw (vector_signed_short) -> vector_signed_int}
impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplfw (vector_signed_int) -> vector_signed_long_long}
impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplf (vector_signed_int) -> vector_signed_long_long}

impl_vec_trait! {[VectorUnpackl vec_unpackl] vupllb (vector_unsigned_char) -> vector_unsigned_short}
impl_vec_trait! {[VectorUnpackl vec_unpackl] vupllh (vector_unsigned_short) -> vector_unsigned_int}
Expand Down Expand Up @@ -3011,9 +3011,9 @@ mod sealed {
#[inline]
#[target_feature(enable = "vector")]
unsafe fn vec_sel(self, b: Self, c: t_u!($ty)) -> Self {
let b = simd_and(b, transmute(c));
let a = simd_and(self, simd_xor(transmute(c), transmute(vector_signed_char([!0; 16]))));
simd_or(a, b)
let b = simd_and(transmute(b), c);
let a = simd_and(transmute(self), simd_xor(c, transmute(vector_signed_char([!0; 16]))));
transmute(simd_or(a, b))
}
}

Expand Down Expand Up @@ -3198,14 +3198,14 @@ mod sealed {
#[unstable(feature = "stdarch_s390x", issue = "135681")]
impl VectorSearchString for $ty {
#[inline]
#[target_feature(enable = "vector")]
#[target_feature(enable = "vector-enhancements-2")]
unsafe fn vec_search_string_cc(self, b: Self, c: vector_unsigned_char) -> (vector_unsigned_char, i32) {
let PackedTuple { x,y } = $intr_s(transmute(self), transmute(b), c);
(x, y)
}

#[inline]
#[target_feature(enable = "vector")]
#[target_feature(enable = "vector-enhancements-2")]
unsafe fn vec_search_string_until_zero_cc(self, b: Self, c: vector_unsigned_char) -> (vector_unsigned_char, i32) {
let PackedTuple { x,y } = $intr_sz(transmute(self), transmute(b), c);
(x, y)
Expand Down
4 changes: 2 additions & 2 deletions crates/core_arch/src/wasm32/simd128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@ unsafe extern "unadjusted" {
#[link_name = "llvm.wasm.avgr.unsigned.v8i16"]
fn llvm_avgr_u_i16x8(a: simd::i16x8, b: simd::i16x8) -> simd::i16x8;

#[link_name = "llvm.wasm.extadd.pairwise.signed.v16i8"]
#[link_name = "llvm.wasm.extadd.pairwise.signed.v4i32"]
fn llvm_i32x4_extadd_pairwise_i16x8_s(x: simd::i16x8) -> simd::i32x4;
#[link_name = "llvm.wasm.extadd.pairwise.unsigned.v16i8"]
#[link_name = "llvm.wasm.extadd.pairwise.unsigned.v4i32"]
fn llvm_i32x4_extadd_pairwise_i16x8_u(x: simd::i16x8) -> simd::i32x4;
#[link_name = "llvm.wasm.alltrue.v4i32"]
fn llvm_i32x4_all_true(x: simd::i32x4) -> i32;
Expand Down
4 changes: 2 additions & 2 deletions crates/core_arch/src/x86/avx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ pub fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_dp_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
static_assert_uimm_bits!(IMM8, 8);
unsafe { vdpps(a, b, IMM8) }
unsafe { vdpps(a, b, IMM8 as i8) }
}

/// Horizontal addition of adjacent pairs in the two packed vectors
Expand Down Expand Up @@ -3043,7 +3043,7 @@ unsafe extern "C" {
#[link_name = "llvm.x86.avx.round.ps.256"]
fn roundps256(a: __m256, b: i32) -> __m256;
#[link_name = "llvm.x86.avx.dp.ps.256"]
fn vdpps(a: __m256, b: __m256, imm8: i32) -> __m256;
fn vdpps(a: __m256, b: __m256, imm8: i8) -> __m256;
#[link_name = "llvm.x86.avx.hadd.pd.256"]
fn vhaddpd(a: __m256d, b: __m256d) -> __m256d;
#[link_name = "llvm.x86.avx.hadd.ps.256"]
Expand Down
4 changes: 2 additions & 2 deletions crates/core_arch/src/x86/avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2146,7 +2146,7 @@ pub fn _mm256_movemask_epi8(a: __m256i) -> i32 {
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8)) }
unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8 as i8)) }
}

/// Multiplies the low 32-bit integers from each packed 64-bit element in
Expand Down Expand Up @@ -3800,7 +3800,7 @@ unsafe extern "C" {
#[link_name = "llvm.x86.avx2.maskstore.q.256"]
fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
#[link_name = "llvm.x86.avx2.mpsadbw"]
fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
#[link_name = "llvm.x86.avx2.pmul.hr.sw"]
fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
#[link_name = "llvm.x86.avx2.packsswb"]
Expand Down
Loading