Skip to content

Commit dd67290

Browse files
committed
check for and enable the bmi2 target feature
1 parent 56b56b4 commit dd67290

File tree

7 files changed

+32
-16
lines changed

7 files changed

+32
-16
lines changed

zlib-rs/src/adler32.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ mod wasm;
1010

1111
pub fn adler32(start_checksum: u32, data: &[u8]) -> u32 {
1212
#[cfg(target_arch = "x86_64")]
13-
if crate::cpu_features::is_enabled_avx2() {
13+
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
1414
return avx2::adler32_avx2(start_checksum, data);
1515
}
1616

zlib-rs/src/adler32/avx2.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,13 @@ unsafe fn partial_hsum256(x: __m256i) -> u32 {
6363
}
6464

6565
pub fn adler32_avx2(adler: u32, src: &[u8]) -> u32 {
66-
assert!(crate::cpu_features::is_enabled_avx2());
66+
assert!(crate::cpu_features::is_enabled_avx2_and_bmi2());
6767
// SAFETY: the assertion above ensures this code is not executed unless the CPU has AVX2.
6868
unsafe { adler32_avx2_help(adler, src) }
6969
}
7070

7171
#[target_feature(enable = "avx2")]
72+
#[target_feature(enable = "bmi2")]
7273
unsafe fn adler32_avx2_help(adler: u32, src: &[u8]) -> u32 {
7374
if src.is_empty() {
7475
return adler;

zlib-rs/src/cpu_features.rs

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,24 @@ pub fn is_enabled_sse42() -> bool {
2727
}
2828

2929
#[inline(always)]
30-
pub fn is_enabled_avx2() -> bool {
30+
pub fn is_enabled_avx2_and_bmi2() -> bool {
3131
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
3232
#[cfg(feature = "std")]
33-
return std::is_x86_feature_detected!("avx2");
33+
{
34+
use std::sync::atomic::{AtomicU32, Ordering};
35+
36+
static CACHE: AtomicU32 = AtomicU32::new(2);
37+
38+
return match CACHE.load(Ordering::Relaxed) {
39+
0 => false,
40+
1 => true,
41+
_ => {
42+
let detected = std::is_x86_feature_detected!("avx2");
43+
CACHE.store(u32::from(detected), Ordering::Relaxed);
44+
detected
45+
}
46+
};
47+
}
3448

3549
false
3650
}
@@ -48,9 +62,7 @@ pub fn is_enabled_avx512() -> bool {
4862
pub fn is_enabled_pclmulqdq() -> bool {
4963
#[cfg(target_arch = "x86_64")]
5064
#[cfg(feature = "std")]
51-
return std::is_x86_feature_detected!("pclmulqdq")
52-
&& std::is_x86_feature_detected!("sse2")
53-
&& std::is_x86_feature_detected!("sse4.1");
65+
return std::is_x86_feature_detected!("pclmulqdq") && std::is_x86_feature_detected!("sse4.1");
5466

5567
false
5668
}

zlib-rs/src/deflate/compare256.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ pub fn compare256_slice(src0: &[u8], src1: &[u8]) -> usize {
1010

1111
fn compare256(src0: &[u8; 256], src1: &[u8; 256]) -> usize {
1212
#[cfg(target_arch = "x86_64")]
13-
if crate::cpu_features::is_enabled_avx2() {
13+
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
1414
return unsafe { avx2::compare256(src0, src1) };
1515
}
1616

@@ -180,6 +180,7 @@ mod avx2 {
180180
///
181181
/// Behavior is undefined if the `avx` target feature is not enabled
182182
#[target_feature(enable = "avx2")]
183+
#[target_feature(enable = "bmi2")]
183184
pub unsafe fn compare256(src0: &[u8; 256], src1: &[u8; 256]) -> usize {
184185
let src0 = src0.chunks_exact(32);
185186
let src1 = src1.chunks_exact(32);
@@ -212,7 +213,7 @@ mod avx2 {
212213

213214
#[test]
214215
fn test_compare256() {
215-
if crate::cpu_features::is_enabled_avx2() {
216+
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
216217
let str1 = [b'a'; super::MAX_COMPARE_SIZE];
217218
let mut str2 = [b'a'; super::MAX_COMPARE_SIZE];
218219

zlib-rs/src/deflate/slide_hash.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ pub fn slide_hash(state: &mut crate::deflate::State) {
1010

1111
fn slide_hash_chain(table: &mut [u16], wsize: u16) {
1212
#[cfg(target_arch = "x86_64")]
13-
if crate::cpu_features::is_enabled_avx2() {
14-
// SAFETY: the avx2 target feature is enabled.
13+
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
14+
// SAFETY: the avx2 and bmi2 target feature are enabled.
1515
return unsafe { avx2::slide_hash_chain(table, wsize) };
1616
}
1717

@@ -54,6 +54,7 @@ mod avx2 {
5454
///
5555
/// Behavior is undefined if the `avx2` target feature is not enabled
5656
#[target_feature(enable = "avx2")]
57+
#[target_feature(enable = "bmi2")]
5758
pub unsafe fn slide_hash_chain(table: &mut [u16], wsize: u16) {
5859
// 64 means that 4 256-bit values can be processed per iteration.
5960
// That appear to be the optimal amount for avx2.
@@ -155,7 +156,7 @@ mod tests {
155156
#[test]
156157
#[cfg(target_arch = "x86_64")]
157158
fn test_slide_hash_avx2() {
158-
if crate::cpu_features::is_enabled_avx2() {
159+
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
159160
let mut input = INPUT;
160161

161162
unsafe { avx2::slide_hash_chain(&mut input, WSIZE) };

zlib-rs/src/inflate.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1816,7 +1816,7 @@ impl State<'_> {
18161816

18171817
fn inflate_fast_help(state: &mut State, start: usize) {
18181818
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
1819-
if crate::cpu_features::is_enabled_avx2() {
1819+
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
18201820
// SAFETY: we've verified the target features
18211821
return unsafe { inflate_fast_help_avx2(state, start) };
18221822
}
@@ -1826,6 +1826,7 @@ fn inflate_fast_help(state: &mut State, start: usize) {
18261826

18271827
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
18281828
#[target_feature(enable = "avx2")]
1829+
#[target_feature(enable = "bmi2")]
18291830
unsafe fn inflate_fast_help_avx2(state: &mut State, start: usize) {
18301831
inflate_fast_help_impl::<{ CpuFeatures::AVX2 }>(state, start);
18311832
}

zlib-rs/src/inflate/writer.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ impl<'a> Writer<'a> {
106106
// }
107107

108108
#[cfg(target_arch = "x86_64")]
109-
if crate::cpu_features::is_enabled_avx2() {
109+
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
110110
return self.extend_from_window_help::<32>(window, range);
111111
}
112112

@@ -186,7 +186,7 @@ impl<'a> Writer<'a> {
186186
// }
187187

188188
#[cfg(target_arch = "x86_64")]
189-
if crate::cpu_features::is_enabled_avx2() {
189+
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
190190
return self.copy_match_help::<32>(offset_from_end, length);
191191
}
192192

@@ -379,7 +379,7 @@ mod test {
379379
}
380380

381381
#[cfg(target_arch = "x86_64")]
382-
if crate::cpu_features::is_enabled_avx2() {
382+
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
383383
helper!(Writer::copy_match_help::<32>);
384384
}
385385

0 commit comments

Comments
 (0)