Skip to content

Commit 837dcaa

Browse files
committed
Start adding some avx512 intrinsics
First one is the quite simple `_mm512_abs_epi32` intrinsic!
1 parent e3cdea8 commit 837dcaa

File tree

8 files changed

+229
-0
lines changed

8 files changed

+229
-0
lines changed

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ exclude = [
1010
[profile.release]
1111
debug = true
1212
opt-level = 3
13+
incremental = true
1314

1415
[profile.bench]
1516
debug = 1
1617
opt-level = 3
18+
incremental = true

coresimd/simd.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,3 +181,11 @@ simd_ty!(i32x8[i32]:
181181
i32, i32, i32, i32, i32, i32, i32, i32
182182
| x0, x1, x2, x3, x4, x5, x6, x7);
183183
simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3);
184+
185+
// 512-bit wide types:
186+
187+
simd_ty!(i32x16[i32]:
188+
i32, i32, i32, i32, i32, i32, i32, i32,
189+
i32, i32, i32, i32, i32, i32, i32, i32
190+
| x0, x1, x2, x3, x4, x5, x6, x7,
191+
x8, x9, x10, x11, x12, x13, x14, x15);

coresimd/x86/avx512f.rs

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
2+
3+
use coresimd::simd::*;
4+
use coresimd::x86::*;
5+
use mem;
6+
7+
#[cfg(test)]
8+
use stdsimd_test::assert_instr;
9+
10+
/// Computes the absolute values of packed 32-bit integers in `a`.
11+
///
12+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
13+
#[inline]
14+
#[target_feature(enable = "avx512f")]
15+
#[cfg_attr(test, assert_instr(vpabsd))]
16+
pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
17+
mem::transmute(pabsd(
18+
a.as_i32x16(),
19+
_mm512_setzero_si512().as_i32x16(),
20+
-1,
21+
))
22+
}
23+
24+
/// Compute the absolute value of packed 32-bit integers in `a`, and store the
25+
/// unsigned results in `dst` using writemask `k` (elements are copied from
26+
/// `src` when the corresponding mask bit is not set).
27+
///
28+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
29+
#[inline]
30+
#[target_feature(enable = "avx512f")]
31+
#[cfg_attr(test, assert_instr(vpabsd))]
32+
pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
33+
mem::transmute(pabsd(
34+
a.as_i32x16(),
35+
src.as_i32x16(),
36+
k,
37+
))
38+
}
39+
40+
/// Compute the absolute value of packed 32-bit integers in `a`, and store the
41+
/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
42+
/// the corresponding mask bit is not set).
43+
///
44+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33,34,35,35&text=_mm512_maskz_abs_epi32)
45+
#[inline]
46+
#[target_feature(enable = "avx512f")]
47+
#[cfg_attr(test, assert_instr(vpabsd))]
48+
pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
49+
mem::transmute(pabsd(
50+
a.as_i32x16(),
51+
_mm512_setzero_si512().as_i32x16(),
52+
k,
53+
))
54+
}
55+
56+
/// Return vector of type `__m512i` with all elements set to zero.
57+
///
58+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_si512)
59+
#[inline]
60+
#[target_feature(enable = "avx512f")]
61+
#[cfg_attr(test, assert_instr(vxorps))]
62+
pub unsafe fn _mm512_setzero_si512() -> __m512i {
63+
mem::zeroed()
64+
}
65+
66+
/// Set packed 32-bit integers in `dst` with the supplied values in reverse
67+
/// order.
68+
#[inline]
69+
#[target_feature(enable = "avx512f")]
70+
pub unsafe fn _mm512_setr_epi32(
71+
e15: i32,
72+
e14: i32,
73+
e13: i32,
74+
e12: i32,
75+
e11: i32,
76+
e10: i32,
77+
e9: i32,
78+
e8: i32,
79+
e7: i32,
80+
e6: i32,
81+
e5: i32,
82+
e4: i32,
83+
e3: i32,
84+
e2: i32,
85+
e1: i32,
86+
e0: i32,
87+
) -> __m512i {
88+
let r = i32x16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0);
89+
mem::transmute(r)
90+
}
91+
92+
#[allow(improper_ctypes)]
93+
extern "C" {
94+
#[link_name = "llvm.x86.avx2.pabs.b"]
95+
fn pabsd(a: i32x16, b: i32x16, c: i16) -> i32x16;
96+
}
97+
98+
#[cfg(test)]
99+
mod tests {
100+
use std;
101+
use stdsimd_test::simd_test;
102+
103+
use coresimd::x86::*;
104+
105+
#[simd_test(enable = "avx512f")]
106+
unsafe fn test_mm512_abs_epi32() {
107+
#[rustfmt::skip]
108+
let a = _mm512_setr_epi32(
109+
0, 1, -1, std::i32::MAX,
110+
std::i32::MIN, 100, -100, -32,
111+
0, 1, -1, std::i32::MAX,
112+
std::i32::MIN, 100, -100, -32,
113+
);
114+
let r = _mm512_abs_epi32(a);
115+
let e = _mm512_setr_epi32(
116+
0, 1, 1, std::i32::MAX,
117+
std::i32::MAX.wrapping_add(1), 100, 100, 32,
118+
0, 1, 1, std::i32::MAX,
119+
std::i32::MAX.wrapping_add(1), 100, 100, 32,
120+
);
121+
assert_eq_m512i(r, e);
122+
}
123+
124+
#[simd_test(enable = "avx512f")]
125+
unsafe fn test_mm512_mask_abs_epi32() {
126+
#[rustfmt::skip]
127+
let a = _mm512_setr_epi32(
128+
0, 1, -1, std::i32::MAX,
129+
std::i32::MIN, 100, -100, -32,
130+
0, 1, -1, std::i32::MAX,
131+
std::i32::MIN, 100, -100, -32,
132+
);
133+
let r = _mm512_mask_abs_epi32(a, 0, a);
134+
assert_eq_m512i(r, a);
135+
let r = _mm512_mask_abs_epi32(a, 0b11111111, a);
136+
let e = _mm512_setr_epi32(
137+
0, 1, 1, std::i32::MAX,
138+
std::i32::MAX.wrapping_add(1), 100, 100, 32,
139+
0, 1, -1, std::i32::MAX,
140+
std::i32::MIN, 100, -100, -32,
141+
);
142+
assert_eq_m512i(r, e);
143+
}
144+
145+
#[simd_test(enable = "avx512f")]
146+
unsafe fn test_mm512_maskz_abs_epi32() {
147+
#[rustfmt::skip]
148+
let a = _mm512_setr_epi32(
149+
0, 1, -1, std::i32::MAX,
150+
std::i32::MIN, 100, -100, -32,
151+
0, 1, -1, std::i32::MAX,
152+
std::i32::MIN, 100, -100, -32,
153+
);
154+
let r = _mm512_maskz_abs_epi32(0, a);
155+
assert_eq_m512i(r, _mm512_setzero_si512());
156+
let r = _mm512_maskz_abs_epi32(0b11111111, a);
157+
let e = _mm512_setr_epi32(
158+
0, 1, 1, std::i32::MAX,
159+
std::i32::MAX.wrapping_add(1), 100, 100, 32,
160+
0, 0, 0, 0, 0, 0, 0, 0,
161+
);
162+
assert_eq_m512i(r, e);
163+
}
164+
}

coresimd/x86/mod.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,10 @@ types! {
391391
pub struct __m512d(f64, f64, f64, f64, f64, f64, f64, f64);
392392
}
393393

394+
/// The `__mmask16` type used in AVX-512 intrinsics, a 16-bit integer
395+
#[allow(non_camel_case_types)]
396+
pub type __mmask16 = i16;
397+
394398
#[cfg(test)]
395399
mod test;
396400
#[cfg(test)]
@@ -502,6 +506,24 @@ impl m256iExt for __m256i {
502506
}
503507
}
504508

509+
#[allow(non_camel_case_types)]
510+
#[unstable(feature = "stdimd_internal", issue = "0")]
511+
pub(crate) trait m512iExt: Sized {
512+
fn as_m512i(self) -> __m512i;
513+
514+
#[inline]
515+
fn as_i32x16(self) -> ::coresimd::simd::i32x16 {
516+
unsafe { mem::transmute(self.as_m512i()) }
517+
}
518+
}
519+
520+
impl m512iExt for __m512i {
521+
#[inline]
522+
fn as_m512i(self) -> Self {
523+
self
524+
}
525+
}
526+
505527
mod eflags;
506528
pub use self::eflags::*;
507529

@@ -580,3 +602,6 @@ use stdsimd_test::assert_instr;
580602
pub unsafe fn ud2() -> ! {
581603
::intrinsics::abort()
582604
}
605+
606+
mod avx512f;
607+
pub use self::avx512f::*;

coresimd/x86/test.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,3 +137,11 @@ mod x86_polyfill {
137137
pub use coresimd::x86_64::{_mm256_insert_epi64, _mm_insert_epi64};
138138
}
139139
pub use self::x86_polyfill::*;
140+
141+
pub unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) {
142+
union A {
143+
a: __m512i,
144+
b: [i32; 16],
145+
}
146+
assert_eq!(A { a }.b, A { a: b }.b)
147+
}

crates/coresimd/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
sse4a_target_feature,
3434
arm_target_feature,
3535
aarch64_target_feature,
36+
avx512_target_feature,
3637
mips_target_feature,
3738
powerpc_target_feature
3839
)]

crates/stdsimd-verify/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
100100
"__m256" => quote! { &M256 },
101101
"__m256d" => quote! { &M256D },
102102
"__m256i" => quote! { &M256I },
103+
"__m512" => quote! { &M512 },
104+
"__m512d" => quote! { &M512D },
105+
"__m512i" => quote! { &M512I },
106+
"__mmask16" => quote! { &MMASK16 },
103107
"__m64" => quote! { &M64 },
104108
"bool" => quote! { &BOOL },
105109
"f32" => quote! { &F32 },

crates/stdsimd-verify/tests/x86-intel.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ static M128D: Type = Type::M128D;
5454
static M256: Type = Type::M256;
5555
static M256I: Type = Type::M256I;
5656
static M256D: Type = Type::M256D;
57+
static M512: Type = Type::M512;
58+
static M512I: Type = Type::M512I;
59+
static M512D: Type = Type::M512D;
60+
static MMASK16: Type = Type::MMASK16;
5761

5862
static TUPLE: Type = Type::Tuple;
5963
static CPUID: Type = Type::CpuidResult;
@@ -72,6 +76,10 @@ enum Type {
7276
M256,
7377
M256D,
7478
M256I,
79+
M512,
80+
M512D,
81+
M512I,
82+
MMASK16,
7583
Tuple,
7684
CpuidResult,
7785
Never,
@@ -430,6 +438,15 @@ fn equate(
430438
| (&Type::M256, "__m256")
431439
| (&Type::Ptr(&Type::M256), "__m256*") => {}
432440

441+
(&Type::M512I, "__m512i")
442+
| (&Type::Ptr(&Type::M512I), "__m512i*")
443+
| (&Type::M512D, "__m512d")
444+
| (&Type::Ptr(&Type::M512D), "__m512d*")
445+
| (&Type::M512, "__m512")
446+
| (&Type::Ptr(&Type::M512), "__m512*") => {}
447+
448+
(&Type::MMASK16, "__mmask16") => {}
449+
433450
// This is a macro (?) in C which seems to mutate its arguments, but
434451
// that means that we're taking pointers to arguments in rust
435452
// as we're not exposing it as a macro.

0 commit comments

Comments
 (0)