Skip to content

Commit 62fcc88

Browse files
committed
add bmi1
1 parent 2781219 commit 62fcc88

File tree

6 files changed

+8
-3
lines changed

6 files changed

+8
-3
lines changed

.github/workflows/checks.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -479,7 +479,7 @@ jobs:
479479
- name: Test public C api with NULL arguments
480480
run: "cargo +nightly miri nextest run -j4 -p test-libz-rs-sys --target ${{ matrix.target }} null::"
481481
env:
482-
RUSTFLAGS: "-Ctarget-feature=+avx2,+bmi2"
482+
RUSTFLAGS: "-Ctarget-feature=+avx2,+bmi2,+bmi1"
483483
- name: Test allocator with miri
484484
run: "cargo +nightly miri nextest run -j4 -p zlib-rs --target ${{ matrix.target }} allocate::"
485485
- name: Test gz logic with miri

zlib-rs/src/adler32/avx2.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ pub fn adler32_avx2(adler: u32, src: &[u8]) -> u32 {
7070

7171
#[target_feature(enable = "avx2")]
7272
#[target_feature(enable = "bmi2")]
73+
#[target_feature(enable = "bmi1")]
7374
unsafe fn adler32_avx2_help(adler: u32, src: &[u8]) -> u32 {
7475
if src.is_empty() {
7576
return adler;

zlib-rs/src/cpu_features.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@ pub fn is_enabled_avx2_and_bmi2() -> bool {
3939
0 => false,
4040
1 => true,
4141
_ => {
42-
let detected =
43-
std::is_x86_feature_detected!("avx2") && std::is_x86_feature_detected!("bmi2");
42+
let detected = std::is_x86_feature_detected!("avx2")
43+
&& std::is_x86_feature_detected!("bmi1")
44+
&& std::is_x86_feature_detected!("bmi2");
4445
CACHE.store(u32::from(detected), Ordering::Relaxed);
4546
detected
4647
}

zlib-rs/src/deflate/compare256.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ mod avx2 {
181181
/// Behavior is undefined if the `avx` target feature is not enabled
182182
#[target_feature(enable = "avx2")]
183183
#[target_feature(enable = "bmi2")]
184+
#[target_feature(enable = "bmi1")]
184185
pub unsafe fn compare256(src0: &[u8; 256], src1: &[u8; 256]) -> usize {
185186
let src0 = src0.chunks_exact(32);
186187
let src1 = src1.chunks_exact(32);

zlib-rs/src/deflate/slide_hash.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ mod avx2 {
5555
/// Behavior is undefined if the `avx2` target feature is not enabled
5656
#[target_feature(enable = "avx2")]
5757
#[target_feature(enable = "bmi2")]
58+
#[target_feature(enable = "bmi1")]
5859
pub unsafe fn slide_hash_chain(table: &mut [u16], wsize: u16) {
5960
// 64 means that 4 256-bit values can be processed per iteration.
6061
// That appear to be the optimal amount for avx2.

zlib-rs/src/inflate.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1827,6 +1827,7 @@ fn inflate_fast_help(state: &mut State, start: usize) {
18271827
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
18281828
#[target_feature(enable = "avx2")]
18291829
#[target_feature(enable = "bmi2")]
1830+
#[target_feature(enable = "bmi1")]
18301831
unsafe fn inflate_fast_help_avx2(state: &mut State, start: usize) {
18311832
inflate_fast_help_impl::<{ CpuFeatures::AVX2 }>(state, start);
18321833
}

0 commit comments

Comments
 (0)