Skip to content

Commit 7387776

Browse files
committed
Fixed many intrinsics
fixed avx512-fma, mask-load/store stream, reduce-add and reduce-mul. and load/store of mask32 and mask64. added preserves-flags to load and store asm. fixed the missing list
1 parent 258f489 commit 7387776

File tree

8 files changed

+475
-710
lines changed

8 files changed

+475
-710
lines changed

crates/core_arch/missing-x86.md

Lines changed: 6 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353

5454
<details><summary>["AVX2"]</summary><p>
5555

56+
* [ ] [`_mm256_stream_load_si256`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_stream_load_si256)
5657
* [ ] [`_mm_broadcastsi128_si256`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastsi128_si256)
5758
</p></details>
5859

@@ -174,6 +175,7 @@
174175
* [ ] [`_mm512_mask_i32logather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
175176
* [ ] [`_mm512_mask_i32loscatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
176177
* [ ] [`_mm512_mask_i32loscatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
178+
* [ ] [`_mm512_stream_load_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_stream_load_si512)
177179
* [ ] [`_mm_mask_load_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd)
178180
* [ ] [`_mm_mask_load_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss)
179181
* [ ] [`_mm_mask_store_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd)
@@ -1539,95 +1541,25 @@
15391541

15401542
<details><summary>["SSE"]</summary><p>
15411543

1542-
* [ ] [`_m_maskmovq`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_maskmovq)
1543-
* [ ] [`_m_pavgb`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pavgb)
1544-
* [ ] [`_m_pavgw`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pavgw)
1545-
* [ ] [`_m_pextrw`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pextrw)
1546-
* [ ] [`_m_pinsrw`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pinsrw)
1547-
* [ ] [`_m_pmaxsw`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmaxsw)
1548-
* [ ] [`_m_pmaxub`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmaxub)
1549-
* [ ] [`_m_pminsw`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pminsw)
1550-
* [ ] [`_m_pminub`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pminub)
1551-
* [ ] [`_m_pmovmskb`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmovmskb)
1552-
* [ ] [`_m_pmulhuw`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmulhuw)
1553-
* [ ] [`_m_psadbw`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_psadbw)
1554-
* [ ] [`_m_pshufw`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pshufw)
1555-
* [ ] [`_mm_avg_pu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_pu16)
1556-
* [ ] [`_mm_avg_pu8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_pu8)
1557-
* [ ] [`_mm_cvt_pi2ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_pi2ps)
1558-
* [ ] [`_mm_cvt_ps2pi`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_ps2pi)
1559-
* [ ] [`_mm_cvtpi16_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi16_ps)
1560-
* [ ] [`_mm_cvtpi32_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi32_ps)
1561-
* [ ] [`_mm_cvtpi32x2_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi32x2_ps)
1562-
* [ ] [`_mm_cvtpi8_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi8_ps)
1563-
* [ ] [`_mm_cvtps_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pi16)
1564-
* [ ] [`_mm_cvtps_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pi32)
1565-
* [ ] [`_mm_cvtps_pi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pi8)
1566-
* [ ] [`_mm_cvtpu16_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpu16_ps)
1567-
* [ ] [`_mm_cvtpu8_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpu8_ps)
1568-
* [ ] [`_mm_cvtt_ps2pi`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_ps2pi)
1569-
* [ ] [`_mm_cvttps_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_pi32)
1570-
* [ ] [`_mm_extract_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_pi16)
15711544
* [ ] [`_mm_free`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_free)
1572-
* [ ] [`_mm_insert_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_pi16)
1573-
* [ ] [`_mm_loadh_pi`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadh_pi)
1574-
* [ ] [`_mm_loadl_pi`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_pi)
15751545
* [ ] [`_mm_malloc`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_malloc)
1576-
* [ ] [`_mm_maskmove_si64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskmove_si64)
1577-
* [ ] [`_mm_max_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pi16)
1578-
* [ ] [`_mm_max_pu8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pu8)
1579-
* [ ] [`_mm_min_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pi16)
1580-
* [ ] [`_mm_min_pu8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pu8)
1581-
* [ ] [`_mm_movemask_pi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_pi8)
1582-
* [ ] [`_mm_mulhi_pu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_pu16)
1583-
* [ ] [`_mm_sad_pu8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_pu8)
1584-
* [ ] [`_mm_shuffle_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pi16)
1585-
* [ ] [`_mm_shuffle_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_ps)
1586-
* [ ] [`_mm_storeh_pi`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeh_pi)
1587-
* [ ] [`_mm_storel_pi`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_pi)
1588-
* [ ] [`_mm_stream_pi`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_pi)
15891546
</p></details>
15901547

15911548

15921549
<details><summary>["SSE2"]</summary><p>
15931550

1594-
* [ ] [`_mm_add_si64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_si64)
1595-
* [ ] [`_mm_cvtpd_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_pi32)
1596-
* [ ] [`_mm_cvtpi32_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi32_pd)
1597-
* [ ] [`_mm_cvttpd_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_pi32)
15981551
* [ ] [`_mm_loadu_si16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si16)
15991552
* [ ] [`_mm_loadu_si32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si32)
1600-
* [ ] [`_mm_movepi64_pi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi64_pi64)
1601-
* [ ] [`_mm_movpi64_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movpi64_epi64)
1602-
* [ ] [`_mm_mul_su32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_su32)
1603-
* [ ] [`_mm_set1_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64)
1604-
* [ ] [`_mm_set_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi64)
1605-
* [ ] [`_mm_setr_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi64)
1553+
* [ ] [`_mm_loadu_si64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si64)
16061554
* [ ] [`_mm_storeu_si16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si16)
16071555
* [ ] [`_mm_storeu_si32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si32)
16081556
* [ ] [`_mm_storeu_si64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si64)
1609-
* [ ] [`_mm_sub_si64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_si64)
16101557
</p></details>
16111558

16121559

1613-
<details><summary>["SSSE3"]</summary><p>
1614-
1615-
* [ ] [`_mm_abs_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi16)
1616-
* [ ] [`_mm_abs_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi32)
1617-
* [ ] [`_mm_abs_pi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi8)
1618-
* [ ] [`_mm_alignr_pi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_pi8)
1619-
* [ ] [`_mm_hadd_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pi16)
1620-
* [ ] [`_mm_hadd_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pi32)
1621-
* [ ] [`_mm_hadds_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadds_pi16)
1622-
* [ ] [`_mm_hsub_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_pi16)
1623-
* [ ] [`_mm_hsub_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_pi32)
1624-
* [ ] [`_mm_hsubs_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_pi16)
1625-
* [ ] [`_mm_maddubs_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maddubs_pi16)
1626-
* [ ] [`_mm_mulhrs_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhrs_pi16)
1627-
* [ ] [`_mm_shuffle_pi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pi8)
1628-
* [ ] [`_mm_sign_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi16)
1629-
* [ ] [`_mm_sign_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi32)
1630-
* [ ] [`_mm_sign_pi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi8)
1560+
<details><summary>["SSE4.1"]</summary><p>
1561+
1562+
* [ ] [`_mm_stream_load_si128`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_load_si128)
16311563
</p></details>
16321564

16331565

crates/core_arch/src/x86/avx.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1715,11 +1715,11 @@ pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
17151715
/// See [`_mm_sfence`] for details.
17161716
#[inline]
17171717
#[target_feature(enable = "avx")]
1718-
#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntdq
1718+
#[cfg_attr(test, assert_instr(vmovntdq))]
17191719
#[stable(feature = "simd_x86", since = "1.27.0")]
17201720
pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
17211721
crate::arch::asm!(
1722-
"vmovntps [{mem_addr}], {a}",
1722+
"vmovntdq [{mem_addr}], {a}",
17231723
mem_addr = in(reg) mem_addr,
17241724
a = in(ymm_reg) a,
17251725
options(nostack, preserves_flags),
@@ -1742,12 +1742,12 @@ pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
17421742
/// See [`_mm_sfence`] for details.
17431743
#[inline]
17441744
#[target_feature(enable = "avx")]
1745-
#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd
1745+
#[cfg_attr(test, assert_instr(vmovntpd))]
17461746
#[stable(feature = "simd_x86", since = "1.27.0")]
17471747
#[allow(clippy::cast_ptr_alignment)]
17481748
pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) {
17491749
crate::arch::asm!(
1750-
"vmovntps [{mem_addr}], {a}",
1750+
"vmovntpd [{mem_addr}], {a}",
17511751
mem_addr = in(reg) mem_addr,
17521752
a = in(ymm_reg) a,
17531753
options(nostack, preserves_flags),

crates/core_arch/src/x86/avx2.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3124,8 +3124,6 @@ pub unsafe fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
31243124
transmute(psrlvq256(a.as_i64x4(), count.as_i64x4()))
31253125
}
31263126

3127-
// TODO _mm256_stream_load_si256 (__m256i const* mem_addr)
3128-
31293127
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
31303128
///
31313129
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi16)

crates/core_arch/src/x86/avx512bw.rs

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4583,7 +4583,7 @@ pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *con
45834583
p = in(reg) mem_addr,
45844584
k = in(kreg) k,
45854585
dst = inout(zmm_reg) dst,
4586-
options(pure, readonly, nostack)
4586+
options(pure, readonly, nostack, preserves_flags)
45874587
);
45884588
dst
45894589
}
@@ -4603,7 +4603,7 @@ pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __
46034603
p = in(reg) mem_addr,
46044604
k = in(kreg) k,
46054605
dst = out(zmm_reg) dst,
4606-
options(pure, readonly, nostack)
4606+
options(pure, readonly, nostack, preserves_flags)
46074607
);
46084608
dst
46094609
}
@@ -4623,7 +4623,7 @@ pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *cons
46234623
p = in(reg) mem_addr,
46244624
k = in(kreg) k,
46254625
dst = inout(zmm_reg) dst,
4626-
options(pure, readonly, nostack)
4626+
options(pure, readonly, nostack, preserves_flags)
46274627
);
46284628
dst
46294629
}
@@ -4643,7 +4643,7 @@ pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m5
46434643
p = in(reg) mem_addr,
46444644
k = in(kreg) k,
46454645
dst = out(zmm_reg) dst,
4646-
options(pure, readonly, nostack)
4646+
options(pure, readonly, nostack, preserves_flags)
46474647
);
46484648
dst
46494649
}
@@ -4663,7 +4663,7 @@ pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *con
46634663
p = in(reg) mem_addr,
46644664
k = in(kreg) k,
46654665
dst = inout(ymm_reg) dst,
4666-
options(pure, readonly, nostack)
4666+
options(pure, readonly, nostack, preserves_flags)
46674667
);
46684668
dst
46694669
}
@@ -4683,7 +4683,7 @@ pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __
46834683
p = in(reg) mem_addr,
46844684
k = in(kreg) k,
46854685
dst = out(ymm_reg) dst,
4686-
options(pure, readonly, nostack)
4686+
options(pure, readonly, nostack, preserves_flags)
46874687
);
46884688
dst
46894689
}
@@ -4703,7 +4703,7 @@ pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *cons
47034703
p = in(reg) mem_addr,
47044704
k = in(kreg) k,
47054705
dst = inout(ymm_reg) dst,
4706-
options(pure, readonly, nostack)
4706+
options(pure, readonly, nostack, preserves_flags)
47074707
);
47084708
dst
47094709
}
@@ -4723,7 +4723,7 @@ pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m2
47234723
p = in(reg) mem_addr,
47244724
k = in(kreg) k,
47254725
dst = out(ymm_reg) dst,
4726-
options(pure, readonly, nostack)
4726+
options(pure, readonly, nostack, preserves_flags)
47274727
);
47284728
dst
47294729
}
@@ -4743,7 +4743,7 @@ pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i
47434743
p = in(reg) mem_addr,
47444744
k = in(kreg) k,
47454745
dst = inout(xmm_reg) dst,
4746-
options(pure, readonly, nostack)
4746+
options(pure, readonly, nostack, preserves_flags)
47474747
);
47484748
dst
47494749
}
@@ -4763,7 +4763,7 @@ pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128
47634763
p = in(reg) mem_addr,
47644764
k = in(kreg) k,
47654765
dst = out(xmm_reg) dst,
4766-
options(pure, readonly, nostack)
4766+
options(pure, readonly, nostack, preserves_flags)
47674767
);
47684768
dst
47694769
}
@@ -4783,7 +4783,7 @@ pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i
47834783
p = in(reg) mem_addr,
47844784
k = in(kreg) k,
47854785
dst = inout(xmm_reg) dst,
4786-
options(pure, readonly, nostack)
4786+
options(pure, readonly, nostack, preserves_flags)
47874787
);
47884788
dst
47894789
}
@@ -4803,7 +4803,7 @@ pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i
48034803
p = in(reg) mem_addr,
48044804
k = in(kreg) k,
48054805
dst = out(xmm_reg) dst,
4806-
options(pure, readonly, nostack)
4806+
options(pure, readonly, nostack, preserves_flags)
48074807
);
48084808
dst
48094809
}
@@ -4821,7 +4821,7 @@ pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: _
48214821
p = in(reg) mem_addr,
48224822
mask = in(kreg) mask,
48234823
a = in(zmm_reg) a,
4824-
options(nostack)
4824+
options(nostack, preserves_flags)
48254825
);
48264826
}
48274827

@@ -4838,7 +4838,7 @@ pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m
48384838
p = in(reg) mem_addr,
48394839
mask = in(kreg) mask,
48404840
a = in(zmm_reg) a,
4841-
options(nostack)
4841+
options(nostack, preserves_flags)
48424842
);
48434843
}
48444844

@@ -4855,7 +4855,7 @@ pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: _
48554855
p = in(reg) mem_addr,
48564856
mask = in(kreg) mask,
48574857
a = in(ymm_reg) a,
4858-
options(nostack)
4858+
options(nostack, preserves_flags)
48594859
);
48604860
}
48614861

@@ -4872,7 +4872,7 @@ pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m
48724872
p = in(reg) mem_addr,
48734873
mask = in(kreg) mask,
48744874
a = in(ymm_reg) a,
4875-
options(nostack)
4875+
options(nostack, preserves_flags)
48764876
);
48774877
}
48784878

@@ -4889,7 +4889,7 @@ pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m12
48894889
p = in(reg) mem_addr,
48904890
mask = in(kreg) mask,
48914891
a = in(xmm_reg) a,
4892-
options(nostack)
4892+
options(nostack, preserves_flags)
48934893
);
48944894
}
48954895

@@ -4906,7 +4906,7 @@ pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128
49064906
p = in(reg) mem_addr,
49074907
mask = in(kreg) mask,
49084908
a = in(xmm_reg) a,
4909-
options(nostack)
4909+
options(nostack, preserves_flags)
49104910
);
49114911
}
49124912

@@ -8761,7 +8761,7 @@ pub unsafe fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) ->
87618761
#[target_feature(enable = "avx512bw")]
87628762
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
87638763
#[cfg_attr(test, assert_instr(mov))] //should be kmovq
8764-
pub unsafe fn _store_mask64(mem_addr: *mut u64, a: __mmask64) {
8764+
pub unsafe fn _store_mask64(mem_addr: *mut __mmask64, a: __mmask64) {
87658765
ptr::write(mem_addr as *mut __mmask64, a);
87668766
}
87678767

@@ -8772,7 +8772,7 @@ pub unsafe fn _store_mask64(mem_addr: *mut u64, a: __mmask64) {
87728772
#[target_feature(enable = "avx512bw")]
87738773
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
87748774
#[cfg_attr(test, assert_instr(mov))] //should be kmovd
8775-
pub unsafe fn _store_mask32(mem_addr: *mut u32, a: __mmask32) {
8775+
pub unsafe fn _store_mask32(mem_addr: *mut __mmask32, a: __mmask32) {
87768776
ptr::write(mem_addr as *mut __mmask32, a);
87778777
}
87788778

@@ -8783,7 +8783,7 @@ pub unsafe fn _store_mask32(mem_addr: *mut u32, a: __mmask32) {
87838783
#[target_feature(enable = "avx512bw")]
87848784
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
87858785
#[cfg_attr(test, assert_instr(mov))] //should be kmovq
8786-
pub unsafe fn _load_mask64(mem_addr: *const u64) -> __mmask64 {
8786+
pub unsafe fn _load_mask64(mem_addr: *const __mmask64) -> __mmask64 {
87878787
ptr::read(mem_addr as *const __mmask64)
87888788
}
87898789

@@ -8794,7 +8794,7 @@ pub unsafe fn _load_mask64(mem_addr: *const u64) -> __mmask64 {
87948794
#[target_feature(enable = "avx512bw")]
87958795
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
87968796
#[cfg_attr(test, assert_instr(mov))] //should be kmovd
8797-
pub unsafe fn _load_mask32(mem_addr: *const u32) -> __mmask32 {
8797+
pub unsafe fn _load_mask32(mem_addr: *const __mmask32) -> __mmask32 {
87988798
ptr::read(mem_addr as *const __mmask32)
87998799
}
88008800

0 commit comments

Comments
 (0)