Skip to content

Commit ce7f5e0

Browse files
TDeckingAmanieu
authored andcommitted
Use generic simd for avx512 leading zeros
1 parent 5ccd76c commit ce7f5e0

File tree

1 file changed

+6
-20
lines changed

1 file changed

+6
-20
lines changed

crates/core_arch/src/x86/avx512cd.rs

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ pub unsafe fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i {
294294
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
295295
#[cfg_attr(test, assert_instr(vplzcntd))]
296296
pub unsafe fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i {
297-
transmute(vplzcntd(a.as_i32x16(), false))
297+
transmute(simd_ctlz(a.as_i32x16()))
298298
}
299299

300300
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -330,7 +330,7 @@ pub unsafe fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
330330
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
331331
#[cfg_attr(test, assert_instr(vplzcntd))]
332332
pub unsafe fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i {
333-
transmute(vplzcntd256(a.as_i32x8(), false))
333+
transmute(simd_ctlz(a.as_i32x8()))
334334
}
335335

336336
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -366,7 +366,7 @@ pub unsafe fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
366366
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
367367
#[cfg_attr(test, assert_instr(vplzcntd))]
368368
pub unsafe fn _mm_lzcnt_epi32(a: __m128i) -> __m128i {
369-
transmute(vplzcntd128(a.as_i32x4(), false))
369+
transmute(simd_ctlz(a.as_i32x4()))
370370
}
371371

372372
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -402,7 +402,7 @@ pub unsafe fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
402402
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
403403
#[cfg_attr(test, assert_instr(vplzcntq))]
404404
pub unsafe fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i {
405-
transmute(vplzcntq(a.as_i64x8(), false))
405+
transmute(simd_ctlz(a.as_i64x8()))
406406
}
407407

408408
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -438,7 +438,7 @@ pub unsafe fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
438438
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
439439
#[cfg_attr(test, assert_instr(vplzcntq))]
440440
pub unsafe fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i {
441-
transmute(vplzcntq256(a.as_i64x4(), false))
441+
transmute(simd_ctlz(a.as_i64x4()))
442442
}
443443

444444
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -474,7 +474,7 @@ pub unsafe fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
474474
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
475475
#[cfg_attr(test, assert_instr(vplzcntq))]
476476
pub unsafe fn _mm_lzcnt_epi64(a: __m128i) -> __m128i {
477-
transmute(vplzcntq128(a.as_i64x2(), false))
477+
transmute(simd_ctlz(a.as_i64x2()))
478478
}
479479

480480
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -517,20 +517,6 @@ extern "C" {
517517
fn vpconflictq256(a: i64x4) -> i64x4;
518518
#[link_name = "llvm.x86.avx512.conflict.q.128"]
519519
fn vpconflictq128(a: i64x2) -> i64x2;
520-
521-
#[link_name = "llvm.ctlz.v16i32"]
522-
fn vplzcntd(a: i32x16, nonzero: bool) -> i32x16;
523-
#[link_name = "llvm.ctlz.v8i32"]
524-
fn vplzcntd256(a: i32x8, nonzero: bool) -> i32x8;
525-
#[link_name = "llvm.ctlz.v4i32"]
526-
fn vplzcntd128(a: i32x4, nonzero: bool) -> i32x4;
527-
528-
#[link_name = "llvm.ctlz.v8i64"]
529-
fn vplzcntq(a: i64x8, nonzero: bool) -> i64x8;
530-
#[link_name = "llvm.ctlz.v4i64"]
531-
fn vplzcntq256(a: i64x4, nonzero: bool) -> i64x4;
532-
#[link_name = "llvm.ctlz.v2i64"]
533-
fn vplzcntq128(a: i64x2, nonzero: bool) -> i64x2;
534520
}
535521

536522
#[cfg(test)]

0 commit comments

Comments
 (0)