Skip to content

Fix errors in decoupling avx512vl and avx512dq from avx512fp16 #1788

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions crates/core_arch/src/x86/avx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1425,7 +1425,10 @@ pub fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m256i {
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_pd)
#[inline]
#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovap))]
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovap)
)]
#[stable(feature = "simd_x86", since = "1.27.0")]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
Expand All @@ -1440,7 +1443,10 @@ pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_pd)
#[inline]
#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovap))]
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovap)
)]
#[stable(feature = "simd_x86", since = "1.27.0")]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) {
Expand All @@ -1455,7 +1461,10 @@ pub unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) {
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_ps)
#[inline]
#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovaps))]
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)]
#[stable(feature = "simd_x86", since = "1.27.0")]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 {
Expand All @@ -1470,7 +1479,10 @@ pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 {
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_ps)
#[inline]
#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovaps))]
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)]
#[stable(feature = "simd_x86", since = "1.27.0")]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) {
Expand Down Expand Up @@ -1548,7 +1560,10 @@ pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) {
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_si256)
#[inline]
#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)] // FIXME vmovdqa expected
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
*mem_addr
Expand All @@ -1561,7 +1576,10 @@ pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_si256)
#[inline]
#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)] // FIXME vmovdqa expected
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) {
*mem_addr = a;
Expand Down
90 changes: 72 additions & 18 deletions crates/core_arch/src/x86/avx512f.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34468,7 +34468,10 @@ pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)] //should be vmovdqa32
pub unsafe fn _mm512_load_si512(mem_addr: *const __m512i) -> __m512i {
ptr::read(mem_addr)
}
Expand All @@ -34479,7 +34482,10 @@ pub unsafe fn _mm512_load_si512(mem_addr: *const __m512i) -> __m512i {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)] //should be vmovdqa32
pub unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
ptr::write(mem_addr, a);
}
Expand All @@ -34490,7 +34496,10 @@ pub unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)] //should be vmovdqa32
pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
ptr::read(mem_addr as *const __m512i)
}
Expand All @@ -34501,7 +34510,10 @@ pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)] //should be vmovdqa32
pub unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
ptr::read(mem_addr as *const __m256i)
}
Expand All @@ -34512,7 +34524,10 @@ pub unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)] //should be vmovdqa32
pub unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
ptr::read(mem_addr as *const __m128i)
}
Expand All @@ -34523,7 +34538,10 @@ pub unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)] //should be vmovdqa32
pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
ptr::write(mem_addr as *mut __m512i, a);
}
Expand All @@ -34534,7 +34552,10 @@ pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)] //should be vmovdqa32
pub unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
ptr::write(mem_addr as *mut __m256i, a);
}
Expand All @@ -34545,7 +34566,10 @@ pub unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)] //should be vmovdqa32
pub unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
ptr::write(mem_addr as *mut __m128i, a);
}
Expand All @@ -34556,7 +34580,10 @@ pub unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)] //should be vmovdqa64
pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
ptr::read(mem_addr as *const __m512i)
}
Expand All @@ -34567,7 +34594,10 @@ pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)] //should be vmovdqa64
pub unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
ptr::read(mem_addr as *const __m256i)
}
Expand All @@ -34578,7 +34608,10 @@ pub unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)] //should be vmovdqa64
pub unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
ptr::read(mem_addr as *const __m128i)
}
Expand All @@ -34589,7 +34622,10 @@ pub unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)] //should be vmovdqa64
pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
ptr::write(mem_addr as *mut __m512i, a);
}
Expand All @@ -34600,7 +34636,10 @@ pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)] //should be vmovdqa64
pub unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
ptr::write(mem_addr as *mut __m256i, a);
}
Expand All @@ -34611,7 +34650,10 @@ pub unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)] //should be vmovdqa64
pub unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
ptr::write(mem_addr as *mut __m128i, a);
}
Expand All @@ -34622,7 +34664,10 @@ pub unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vmovaps))]
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)]
pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
ptr::read(mem_addr as *const __m512)
}
Expand All @@ -34633,7 +34678,10 @@ pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vmovaps))]
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)]
pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
ptr::write(mem_addr as *mut __m512, a);
}
Expand All @@ -34644,7 +34692,10 @@ pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)] //should be vmovapd
pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
ptr::read(mem_addr as *const __m512d)
}
Expand All @@ -34655,7 +34706,10 @@ pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
#[cfg_attr(
all(test, not(all(target_arch = "x86", target_env = "msvc"))),
assert_instr(vmovaps)
)] //should be vmovapd
pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
ptr::write(mem_addr as *mut __m512d, a);
}
Expand Down
Loading