Skip to content

Commit 738a603

Browse files
authored
Merge pull request #1955 from sayantn/vector-shifts
Use SIMD intrinsics for vector shifts
2 parents 6c97650 + 102f03d commit 738a603

File tree

3 files changed

+162
-87
lines changed

3 files changed

+162
-87
lines changed

crates/core_arch/src/x86/avx2.rs

Lines changed: 60 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2786,7 +2786,12 @@ pub fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
27862786
#[cfg_attr(test, assert_instr(vpsllvd))]
27872787
#[stable(feature = "simd_x86", since = "1.27.0")]
27882788
pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
2789-
unsafe { transmute(psllvd(a.as_i32x4(), count.as_i32x4())) }
2789+
unsafe {
2790+
let count = count.as_u32x4();
2791+
let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
2792+
let count = simd_select(no_overflow, count, u32x4::ZERO);
2793+
simd_select(no_overflow, simd_shl(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
2794+
}
27902795
}
27912796

27922797
/// Shifts packed 32-bit integers in `a` left by the amount
@@ -2799,7 +2804,12 @@ pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
27992804
#[cfg_attr(test, assert_instr(vpsllvd))]
28002805
#[stable(feature = "simd_x86", since = "1.27.0")]
28012806
pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
2802-
unsafe { transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) }
2807+
unsafe {
2808+
let count = count.as_u32x8();
2809+
let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
2810+
let count = simd_select(no_overflow, count, u32x8::ZERO);
2811+
simd_select(no_overflow, simd_shl(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
2812+
}
28032813
}
28042814

28052815
/// Shifts packed 64-bit integers in `a` left by the amount
@@ -2812,7 +2822,12 @@ pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
28122822
#[cfg_attr(test, assert_instr(vpsllvq))]
28132823
#[stable(feature = "simd_x86", since = "1.27.0")]
28142824
pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
2815-
unsafe { transmute(psllvq(a.as_i64x2(), count.as_i64x2())) }
2825+
unsafe {
2826+
let count = count.as_u64x2();
2827+
let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
2828+
let count = simd_select(no_overflow, count, u64x2::ZERO);
2829+
simd_select(no_overflow, simd_shl(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
2830+
}
28162831
}
28172832

28182833
/// Shifts packed 64-bit integers in `a` left by the amount
@@ -2825,7 +2840,12 @@ pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
28252840
#[cfg_attr(test, assert_instr(vpsllvq))]
28262841
#[stable(feature = "simd_x86", since = "1.27.0")]
28272842
pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
2828-
unsafe { transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) }
2843+
unsafe {
2844+
let count = count.as_u64x4();
2845+
let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
2846+
let count = simd_select(no_overflow, count, u64x4::ZERO);
2847+
simd_select(no_overflow, simd_shl(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
2848+
}
28292849
}
28302850

28312851
/// Shifts packed 16-bit integers in `a` right by `count` while
@@ -2889,7 +2909,12 @@ pub fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
28892909
#[cfg_attr(test, assert_instr(vpsravd))]
28902910
#[stable(feature = "simd_x86", since = "1.27.0")]
28912911
pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
2892-
unsafe { transmute(psravd(a.as_i32x4(), count.as_i32x4())) }
2912+
unsafe {
2913+
let count = count.as_u32x4();
2914+
let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
2915+
let count = simd_select(no_overflow, transmute(count), i32x4::splat(31));
2916+
simd_shr(a.as_i32x4(), count).as_m128i()
2917+
}
28932918
}
28942919

28952920
/// Shifts packed 32-bit integers in `a` right by the amount specified by the
@@ -2901,7 +2926,12 @@ pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
29012926
#[cfg_attr(test, assert_instr(vpsravd))]
29022927
#[stable(feature = "simd_x86", since = "1.27.0")]
29032928
pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
2904-
unsafe { transmute(psravd256(a.as_i32x8(), count.as_i32x8())) }
2929+
unsafe {
2930+
let count = count.as_u32x8();
2931+
let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
2932+
let count = simd_select(no_overflow, transmute(count), i32x8::splat(31));
2933+
simd_shr(a.as_i32x8(), count).as_m256i()
2934+
}
29052935
}
29062936

29072937
/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
@@ -3084,7 +3114,12 @@ pub fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
30843114
#[cfg_attr(test, assert_instr(vpsrlvd))]
30853115
#[stable(feature = "simd_x86", since = "1.27.0")]
30863116
pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
3087-
unsafe { transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) }
3117+
unsafe {
3118+
let count = count.as_u32x4();
3119+
let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
3120+
let count = simd_select(no_overflow, count, u32x4::ZERO);
3121+
simd_select(no_overflow, simd_shr(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
3122+
}
30883123
}
30893124

30903125
/// Shifts packed 32-bit integers in `a` right by the amount specified by
@@ -3096,7 +3131,12 @@ pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
30963131
#[cfg_attr(test, assert_instr(vpsrlvd))]
30973132
#[stable(feature = "simd_x86", since = "1.27.0")]
30983133
pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
3099-
unsafe { transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) }
3134+
unsafe {
3135+
let count = count.as_u32x8();
3136+
let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
3137+
let count = simd_select(no_overflow, count, u32x8::ZERO);
3138+
simd_select(no_overflow, simd_shr(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
3139+
}
31003140
}
31013141

31023142
/// Shifts packed 64-bit integers in `a` right by the amount specified by
@@ -3108,7 +3148,12 @@ pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
31083148
#[cfg_attr(test, assert_instr(vpsrlvq))]
31093149
#[stable(feature = "simd_x86", since = "1.27.0")]
31103150
pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
3111-
unsafe { transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) }
3151+
unsafe {
3152+
let count = count.as_u64x2();
3153+
let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
3154+
let count = simd_select(no_overflow, count, u64x2::ZERO);
3155+
simd_select(no_overflow, simd_shr(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
3156+
}
31123157
}
31133158

31143159
/// Shifts packed 64-bit integers in `a` right by the amount specified by
@@ -3120,7 +3165,12 @@ pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
31203165
#[cfg_attr(test, assert_instr(vpsrlvq))]
31213166
#[stable(feature = "simd_x86", since = "1.27.0")]
31223167
pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
3123-
unsafe { transmute(psrlvq256(a.as_i64x4(), count.as_i64x4())) }
3168+
unsafe {
3169+
let count = count.as_u64x4();
3170+
let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
3171+
let count = simd_select(no_overflow, count, u64x4::ZERO);
3172+
simd_select(no_overflow, simd_shr(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
3173+
}
31243174
}
31253175

31263176
/// Load 256-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
@@ -3679,36 +3729,16 @@ unsafe extern "C" {
36793729
fn pslld(a: i32x8, count: i32x4) -> i32x8;
36803730
#[link_name = "llvm.x86.avx2.psll.q"]
36813731
fn psllq(a: i64x4, count: i64x2) -> i64x4;
3682-
#[link_name = "llvm.x86.avx2.psllv.d"]
3683-
fn psllvd(a: i32x4, count: i32x4) -> i32x4;
3684-
#[link_name = "llvm.x86.avx2.psllv.d.256"]
3685-
fn psllvd256(a: i32x8, count: i32x8) -> i32x8;
3686-
#[link_name = "llvm.x86.avx2.psllv.q"]
3687-
fn psllvq(a: i64x2, count: i64x2) -> i64x2;
3688-
#[link_name = "llvm.x86.avx2.psllv.q.256"]
3689-
fn psllvq256(a: i64x4, count: i64x4) -> i64x4;
36903732
#[link_name = "llvm.x86.avx2.psra.w"]
36913733
fn psraw(a: i16x16, count: i16x8) -> i16x16;
36923734
#[link_name = "llvm.x86.avx2.psra.d"]
36933735
fn psrad(a: i32x8, count: i32x4) -> i32x8;
3694-
#[link_name = "llvm.x86.avx2.psrav.d"]
3695-
fn psravd(a: i32x4, count: i32x4) -> i32x4;
3696-
#[link_name = "llvm.x86.avx2.psrav.d.256"]
3697-
fn psravd256(a: i32x8, count: i32x8) -> i32x8;
36983736
#[link_name = "llvm.x86.avx2.psrl.w"]
36993737
fn psrlw(a: i16x16, count: i16x8) -> i16x16;
37003738
#[link_name = "llvm.x86.avx2.psrl.d"]
37013739
fn psrld(a: i32x8, count: i32x4) -> i32x8;
37023740
#[link_name = "llvm.x86.avx2.psrl.q"]
37033741
fn psrlq(a: i64x4, count: i64x2) -> i64x4;
3704-
#[link_name = "llvm.x86.avx2.psrlv.d"]
3705-
fn psrlvd(a: i32x4, count: i32x4) -> i32x4;
3706-
#[link_name = "llvm.x86.avx2.psrlv.d.256"]
3707-
fn psrlvd256(a: i32x8, count: i32x8) -> i32x8;
3708-
#[link_name = "llvm.x86.avx2.psrlv.q"]
3709-
fn psrlvq(a: i64x2, count: i64x2) -> i64x2;
3710-
#[link_name = "llvm.x86.avx2.psrlv.q.256"]
3711-
fn psrlvq256(a: i64x4, count: i64x4) -> i64x4;
37123742
#[link_name = "llvm.x86.avx2.pshuf.b"]
37133743
fn pshufb(a: u8x32, b: u8x32) -> u8x32;
37143744
#[link_name = "llvm.x86.avx2.permd"]

crates/core_arch/src/x86/avx512bw.rs

Lines changed: 54 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6864,7 +6864,12 @@ pub fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
68646864
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
68656865
#[cfg_attr(test, assert_instr(vpsllvw))]
68666866
pub fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
6867-
unsafe { transmute(vpsllvw(a.as_i16x32(), count.as_i16x32())) }
6867+
unsafe {
6868+
let count = count.as_u16x32();
6869+
let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
6870+
let count = simd_select(no_overflow, count, u16x32::ZERO);
6871+
simd_select(no_overflow, simd_shl(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
6872+
}
68686873
}
68696874

68706875
/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -6903,7 +6908,12 @@ pub fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5
69036908
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
69046909
#[cfg_attr(test, assert_instr(vpsllvw))]
69056910
pub fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
6906-
unsafe { transmute(vpsllvw256(a.as_i16x16(), count.as_i16x16())) }
6911+
unsafe {
6912+
let count = count.as_u16x16();
6913+
let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
6914+
let count = simd_select(no_overflow, count, u16x16::ZERO);
6915+
simd_select(no_overflow, simd_shl(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
6916+
}
69076917
}
69086918

69096919
/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -6942,7 +6952,12 @@ pub fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2
69426952
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
69436953
#[cfg_attr(test, assert_instr(vpsllvw))]
69446954
pub fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
6945-
unsafe { transmute(vpsllvw128(a.as_i16x8(), count.as_i16x8())) }
6955+
unsafe {
6956+
let count = count.as_u16x8();
6957+
let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
6958+
let count = simd_select(no_overflow, count, u16x8::ZERO);
6959+
simd_select(no_overflow, simd_shl(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
6960+
}
69466961
}
69476962

69486963
/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -7200,7 +7215,12 @@ pub fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i
72007215
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
72017216
#[cfg_attr(test, assert_instr(vpsrlvw))]
72027217
pub fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
7203-
unsafe { transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32())) }
7218+
unsafe {
7219+
let count = count.as_u16x32();
7220+
let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
7221+
let count = simd_select(no_overflow, count, u16x32::ZERO);
7222+
simd_select(no_overflow, simd_shr(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
7223+
}
72047224
}
72057225

72067226
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -7239,7 +7259,12 @@ pub fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5
72397259
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
72407260
#[cfg_attr(test, assert_instr(vpsrlvw))]
72417261
pub fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
7242-
unsafe { transmute(vpsrlvw256(a.as_i16x16(), count.as_i16x16())) }
7262+
unsafe {
7263+
let count = count.as_u16x16();
7264+
let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
7265+
let count = simd_select(no_overflow, count, u16x16::ZERO);
7266+
simd_select(no_overflow, simd_shr(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
7267+
}
72437268
}
72447269

72457270
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -7278,7 +7303,12 @@ pub fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2
72787303
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
72797304
#[cfg_attr(test, assert_instr(vpsrlvw))]
72807305
pub fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
7281-
unsafe { transmute(vpsrlvw128(a.as_i16x8(), count.as_i16x8())) }
7306+
unsafe {
7307+
let count = count.as_u16x8();
7308+
let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
7309+
let count = simd_select(no_overflow, count, u16x8::ZERO);
7310+
simd_select(no_overflow, simd_shr(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
7311+
}
72827312
}
72837313

72847314
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -7523,7 +7553,12 @@ pub fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
75237553
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
75247554
#[cfg_attr(test, assert_instr(vpsravw))]
75257555
pub fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
7526-
unsafe { transmute(vpsravw(a.as_i16x32(), count.as_i16x32())) }
7556+
unsafe {
7557+
let count = count.as_u16x32();
7558+
let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
7559+
let count = simd_select(no_overflow, transmute(count), i16x32::splat(15));
7560+
simd_shr(a.as_i16x32(), count).as_m512i()
7561+
}
75277562
}
75287563

75297564
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -7562,7 +7597,12 @@ pub fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5
75627597
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
75637598
#[cfg_attr(test, assert_instr(vpsravw))]
75647599
pub fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
7565-
unsafe { transmute(vpsravw256(a.as_i16x16(), count.as_i16x16())) }
7600+
unsafe {
7601+
let count = count.as_u16x16();
7602+
let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
7603+
let count = simd_select(no_overflow, transmute(count), i16x16::splat(15));
7604+
simd_shr(a.as_i16x16(), count).as_m256i()
7605+
}
75667606
}
75677607

75687608
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -7601,7 +7641,12 @@ pub fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2
76017641
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
76027642
#[cfg_attr(test, assert_instr(vpsravw))]
76037643
pub fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
7604-
unsafe { transmute(vpsravw128(a.as_i16x8(), count.as_i16x8())) }
7644+
unsafe {
7645+
let count = count.as_u16x8();
7646+
let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
7647+
let count = simd_select(no_overflow, transmute(count), i16x8::splat(15));
7648+
simd_shr(a.as_i16x8(), count).as_m128i()
7649+
}
76057650
}
76067651

76077652
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -11657,33 +11702,12 @@ unsafe extern "C" {
1165711702
#[link_name = "llvm.x86.avx512.psll.w.512"]
1165811703
fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
1165911704

11660-
#[link_name = "llvm.x86.avx512.psllv.w.512"]
11661-
fn vpsllvw(a: i16x32, b: i16x32) -> i16x32;
11662-
#[link_name = "llvm.x86.avx512.psllv.w.256"]
11663-
fn vpsllvw256(a: i16x16, b: i16x16) -> i16x16;
11664-
#[link_name = "llvm.x86.avx512.psllv.w.128"]
11665-
fn vpsllvw128(a: i16x8, b: i16x8) -> i16x8;
11666-
1166711705
#[link_name = "llvm.x86.avx512.psrl.w.512"]
1166811706
fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
1166911707

11670-
#[link_name = "llvm.x86.avx512.psrlv.w.512"]
11671-
fn vpsrlvw(a: i16x32, b: i16x32) -> i16x32;
11672-
#[link_name = "llvm.x86.avx512.psrlv.w.256"]
11673-
fn vpsrlvw256(a: i16x16, b: i16x16) -> i16x16;
11674-
#[link_name = "llvm.x86.avx512.psrlv.w.128"]
11675-
fn vpsrlvw128(a: i16x8, b: i16x8) -> i16x8;
11676-
1167711708
#[link_name = "llvm.x86.avx512.psra.w.512"]
1167811709
fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
1167911710

11680-
#[link_name = "llvm.x86.avx512.psrav.w.512"]
11681-
fn vpsravw(a: i16x32, count: i16x32) -> i16x32;
11682-
#[link_name = "llvm.x86.avx512.psrav.w.256"]
11683-
fn vpsravw256(a: i16x16, count: i16x16) -> i16x16;
11684-
#[link_name = "llvm.x86.avx512.psrav.w.128"]
11685-
fn vpsravw128(a: i16x8, count: i16x8) -> i16x8;
11686-
1168711711
#[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
1168811712
fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
1168911713
#[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]

0 commit comments

Comments
 (0)