Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ jobs:
- name: Test public C api with NULL arguments
run: "cargo +nightly miri nextest run -j4 -p test-libz-rs-sys --target ${{ matrix.target }} null::"
env:
RUSTFLAGS: "-Ctarget-feature=+avx2"
RUSTFLAGS: "-Ctarget-feature=+avx2,+bmi2,+bmi1"
- name: Test allocator with miri
run: "cargo +nightly miri nextest run -j4 -p zlib-rs --target ${{ matrix.target }} allocate::"
- name: Test gz logic with miri
Expand Down
2 changes: 1 addition & 1 deletion zlib-rs/src/adler32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ mod wasm;

pub fn adler32(start_checksum: u32, data: &[u8]) -> u32 {
#[cfg(target_arch = "x86_64")]
if crate::cpu_features::is_enabled_avx2() {
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
return avx2::adler32_avx2(start_checksum, data);
}

Expand Down
4 changes: 3 additions & 1 deletion zlib-rs/src/adler32/avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,14 @@ unsafe fn partial_hsum256(x: __m256i) -> u32 {
}

pub fn adler32_avx2(adler: u32, src: &[u8]) -> u32 {
assert!(crate::cpu_features::is_enabled_avx2());
assert!(crate::cpu_features::is_enabled_avx2_and_bmi2());
// SAFETY: the assertion above ensures this code is not executed unless the CPU has AVX2.
unsafe { adler32_avx2_help(adler, src) }
}

#[target_feature(enable = "avx2")]
#[target_feature(enable = "bmi2")]
#[target_feature(enable = "bmi1")]
unsafe fn adler32_avx2_help(adler: u32, src: &[u8]) -> u32 {
if src.is_empty() {
return adler;
Expand Down
35 changes: 29 additions & 6 deletions zlib-rs/src/cpu_features.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,35 @@ pub fn is_enabled_sse42() -> bool {
}

#[inline(always)]
pub fn is_enabled_avx2() -> bool {
pub fn is_enabled_avx2_and_bmi2() -> bool {
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[cfg(feature = "std")]
return std::is_x86_feature_detected!("avx2");
{
#[cfg(all(
target_feature = "avx2",
target_feature = "bmi1",
target_feature = "bmi2"
))]
return true;

#[cfg(feature = "std")]
{
use std::sync::atomic::{AtomicU32, Ordering};

static CACHE: AtomicU32 = AtomicU32::new(2);

return match CACHE.load(Ordering::Relaxed) {
0 => false,
1 => true,
_ => {
let detected = std::is_x86_feature_detected!("avx2")
&& std::is_x86_feature_detected!("bmi1")
&& std::is_x86_feature_detected!("bmi2");
CACHE.store(u32::from(detected), Ordering::Relaxed);
detected
}
};
}
}

false
}
Expand All @@ -48,9 +73,7 @@ pub fn is_enabled_avx512() -> bool {
pub fn is_enabled_pclmulqdq() -> bool {
#[cfg(target_arch = "x86_64")]
#[cfg(feature = "std")]
return std::is_x86_feature_detected!("pclmulqdq")
&& std::is_x86_feature_detected!("sse2")
&& std::is_x86_feature_detected!("sse4.1");
return std::is_x86_feature_detected!("pclmulqdq") && std::is_x86_feature_detected!("sse4.1");
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


false
}
Expand Down
6 changes: 4 additions & 2 deletions zlib-rs/src/deflate/compare256.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pub fn compare256_slice(src0: &[u8], src1: &[u8]) -> usize {

fn compare256(src0: &[u8; 256], src1: &[u8; 256]) -> usize {
#[cfg(target_arch = "x86_64")]
if crate::cpu_features::is_enabled_avx2() {
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
return unsafe { avx2::compare256(src0, src1) };
}

Expand Down Expand Up @@ -180,6 +180,8 @@ mod avx2 {
///
/// Behavior is undefined if the `avx` target feature is not enabled
#[target_feature(enable = "avx2")]
#[target_feature(enable = "bmi2")]
#[target_feature(enable = "bmi1")]
pub unsafe fn compare256(src0: &[u8; 256], src1: &[u8; 256]) -> usize {
let src0 = src0.chunks_exact(32);
let src1 = src1.chunks_exact(32);
Expand Down Expand Up @@ -212,7 +214,7 @@ mod avx2 {

#[test]
fn test_compare256() {
if crate::cpu_features::is_enabled_avx2() {
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
let str1 = [b'a'; super::MAX_COMPARE_SIZE];
let mut str2 = [b'a'; super::MAX_COMPARE_SIZE];

Expand Down
8 changes: 5 additions & 3 deletions zlib-rs/src/deflate/slide_hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ pub fn slide_hash(state: &mut crate::deflate::State) {

fn slide_hash_chain(table: &mut [u16], wsize: u16) {
#[cfg(target_arch = "x86_64")]
if crate::cpu_features::is_enabled_avx2() {
// SAFETY: the avx2 target feature is enabled.
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
// SAFETY: the avx2 and bmi2 target feature are enabled.
return unsafe { avx2::slide_hash_chain(table, wsize) };
}

Expand Down Expand Up @@ -54,6 +54,8 @@ mod avx2 {
///
/// Behavior is undefined if the `avx2` target feature is not enabled
#[target_feature(enable = "avx2")]
#[target_feature(enable = "bmi2")]
#[target_feature(enable = "bmi1")]
pub unsafe fn slide_hash_chain(table: &mut [u16], wsize: u16) {
// 64 means that 4 256-bit values can be processed per iteration.
// That appear to be the optimal amount for avx2.
Expand Down Expand Up @@ -155,7 +157,7 @@ mod tests {
#[test]
#[cfg(target_arch = "x86_64")]
fn test_slide_hash_avx2() {
if crate::cpu_features::is_enabled_avx2() {
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
let mut input = INPUT;

unsafe { avx2::slide_hash_chain(&mut input, WSIZE) };
Expand Down
4 changes: 3 additions & 1 deletion zlib-rs/src/inflate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1816,7 +1816,7 @@ impl State<'_> {

fn inflate_fast_help(state: &mut State, start: usize) {
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
if crate::cpu_features::is_enabled_avx2() {
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
// SAFETY: we've verified the target features
return unsafe { inflate_fast_help_avx2(state, start) };
}
Expand All @@ -1826,6 +1826,8 @@ fn inflate_fast_help(state: &mut State, start: usize) {

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2")]
#[target_feature(enable = "bmi2")]
#[target_feature(enable = "bmi1")]
unsafe fn inflate_fast_help_avx2(state: &mut State, start: usize) {
inflate_fast_help_impl::<{ CpuFeatures::AVX2 }>(state, start);
}
Expand Down
6 changes: 3 additions & 3 deletions zlib-rs/src/inflate/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ impl<'a> Writer<'a> {
// }

#[cfg(target_arch = "x86_64")]
if crate::cpu_features::is_enabled_avx2() {
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
return self.extend_from_window_help::<32>(window, range);
}

Expand Down Expand Up @@ -186,7 +186,7 @@ impl<'a> Writer<'a> {
// }

#[cfg(target_arch = "x86_64")]
if crate::cpu_features::is_enabled_avx2() {
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
return self.copy_match_help::<32>(offset_from_end, length);
}

Expand Down Expand Up @@ -379,7 +379,7 @@ mod test {
}

#[cfg(target_arch = "x86_64")]
if crate::cpu_features::is_enabled_avx2() {
if crate::cpu_features::is_enabled_avx2_and_bmi2() {
helper!(Writer::copy_match_help::<32>);
}

Expand Down
Loading