Skip to content

Commit c6d28e2

Browse files
committed
centralize cpu feature detection
in the future we can add feature flags for these too (that override runtime detection)
1 parent e83fcef commit c6d28e2

File tree

11 files changed

+113
-52
lines changed

11 files changed

+113
-52
lines changed

zlib-rs/src/adler32.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@ mod generic;
77
mod neon;
88

99
pub fn adler32(start_checksum: u32, data: &[u8]) -> u32 {
10-
#[cfg(all(target_arch = "x86_64", feature = "std"))]
11-
if std::is_x86_feature_detected!("avx2") {
10+
#[cfg(target_arch = "x86_64")]
11+
if crate::cpu_features::is_enabled_avx2() {
1212
return avx2::adler32_avx2(start_checksum, data);
1313
}
1414

15-
#[cfg(all(target_arch = "aarch64", feature = "std"))]
16-
if std::arch::is_aarch64_feature_detected!("neon") {
15+
#[cfg(target_arch = "aarch64")]
16+
if crate::cpu_features::is_enabled_neon() {
1717
return self::neon::adler32_neon(start_checksum, data);
1818
}
1919

@@ -23,8 +23,8 @@ pub fn adler32(start_checksum: u32, data: &[u8]) -> u32 {
2323
pub fn adler32_fold_copy(start_checksum: u32, dst: &mut [MaybeUninit<u8>], src: &[u8]) -> u32 {
2424
debug_assert!(dst.len() >= src.len(), "{} < {}", dst.len(), src.len());
2525

26-
#[cfg(all(target_arch = "x86_64", feature = "std"))]
27-
if std::is_x86_feature_detected!("avx2") {
26+
#[cfg(target_arch = "x86_64")]
27+
if crate::cpu_features::is_enabled_avx2() {
2828
return avx2::adler32_fold_copy_avx2(start_checksum, dst, src);
2929
}
3030

zlib-rs/src/adler32/avx2.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,12 @@ unsafe fn partial_hsum256(x: __m256i) -> u32 {
6262
}
6363

6464
pub fn adler32_avx2(adler: u32, src: &[u8]) -> u32 {
65-
assert!(std::is_x86_feature_detected!("avx2"));
65+
assert!(crate::cpu_features::is_enabled_avx2());
6666
unsafe { adler32_avx2_help::<false>(adler, &mut [], src) }
6767
}
6868

6969
pub fn adler32_fold_copy_avx2(adler: u32, dst: &mut [MaybeUninit<u8>], src: &[u8]) -> u32 {
70-
assert!(std::is_x86_feature_detected!("avx2"));
70+
assert!(crate::cpu_features::is_enabled_avx2());
7171
unsafe { adler32_avx2_help::<true>(adler, dst, src) }
7272
}
7373

zlib-rs/src/adler32/neon.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ const TAPS: [uint16x8x4_t; 2] = unsafe {
1919
};
2020

2121
pub fn adler32_neon(adler: u32, buf: &[u8]) -> u32 {
22-
assert!(std::arch::is_aarch64_feature_detected!("neon"));
22+
assert!(crate::cpu_features::is_enabled_neon());
2323
unsafe { adler32_neon_internal(adler, buf) }
2424
}
2525

zlib-rs/src/cpu_features.rs

+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#![allow(dead_code)]
2+
#![allow(unreachable_code)]
3+
4+
#[inline(always)]
5+
pub fn is_enabled_sse() -> bool {
6+
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
7+
#[cfg(feature = "std")]
8+
return std::is_x86_feature_detected!("sse");
9+
10+
false
11+
}
12+
13+
#[inline(always)]
14+
pub fn is_enabled_sse42() -> bool {
15+
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
16+
#[cfg(feature = "std")]
17+
return std::is_x86_feature_detected!("sse4.2");
18+
19+
false
20+
}
21+
22+
#[inline(always)]
23+
pub fn is_enabled_avx2() -> bool {
24+
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
25+
#[cfg(feature = "std")]
26+
return std::is_x86_feature_detected!("avx2");
27+
28+
false
29+
}
30+
31+
#[inline(always)]
32+
pub fn is_enabled_avx512() -> bool {
33+
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
34+
#[cfg(feature = "std")]
35+
return std::is_x86_feature_detected!("avx512f");
36+
37+
false
38+
}
39+
40+
#[inline(always)]
41+
pub fn is_enabled_pclmulqdq() -> bool {
42+
#[cfg(target_arch = "x86_64")]
43+
#[cfg(feature = "std")]
44+
return std::is_x86_feature_detected!("pclmulqdq")
45+
&& std::is_x86_feature_detected!("sse2")
46+
&& std::is_x86_feature_detected!("sse4.1");
47+
48+
false
49+
}
50+
51+
#[inline(always)]
52+
pub fn is_enabled_neon() -> bool {
53+
#[cfg(target_arch = "aarch64")]
54+
#[cfg(feature = "std")]
55+
return std::arch::is_aarch64_feature_detected!("neon");
56+
57+
false
58+
}
59+
60+
#[inline(always)]
61+
pub fn is_enabled_crc() -> bool {
62+
#[cfg(target_arch = "aarch64")]
63+
#[cfg(feature = "std")]
64+
return std::arch::is_aarch64_feature_detected!("crc");
65+
66+
false
67+
}

zlib-rs/src/crc32.rs

+8-10
Original file line numberDiff line numberDiff line change
@@ -71,25 +71,23 @@ impl Crc32Fold {
7171
}
7272
}
7373

74-
#[cfg(all(target_arch = "x86_64", feature = "std"))]
74+
#[cfg_attr(not(target_arch = "x86_64"), allow(unused))]
7575
pub(crate) fn is_pclmulqdq_enabled() -> bool {
76-
std::is_x86_feature_detected!("pclmulqdq")
77-
&& std::is_x86_feature_detected!("sse2")
78-
&& std::is_x86_feature_detected!("sse4.1")
76+
crate::cpu_features::is_enabled_pclmulqdq()
7977
}
8078

81-
#[cfg(all(target_arch = "aarch64", feature = "std"))]
79+
#[cfg_attr(not(target_arch = "aarch64"), allow(unused))]
8280
pub(crate) fn is_crc_enabled() -> bool {
83-
std::arch::is_aarch64_feature_detected!("crc")
81+
crate::cpu_features::is_enabled_crc()
8482
}
8583

8684
pub fn fold(&mut self, src: &[u8], _start: u32) {
87-
#[cfg(all(target_arch = "x86_64", feature = "std"))]
85+
#[cfg(target_arch = "x86_64")]
8886
if Self::is_pclmulqdq_enabled() {
8987
return self.fold.fold(src, _start);
9088
}
9189

92-
#[cfg(all(target_arch = "aarch64", feature = "std"))]
90+
#[cfg(target_arch = "aarch64")]
9391
if Self::is_crc_enabled() {
9492
self.value = self::acle::crc32_acle_aarch64(self.value, src);
9593
return;
@@ -100,7 +98,7 @@ impl Crc32Fold {
10098
}
10199

102100
pub fn fold_copy(&mut self, dst: &mut [MaybeUninit<u8>], src: &[u8]) {
103-
#[cfg(all(target_arch = "x86_64", feature = "std"))]
101+
#[cfg(target_arch = "x86_64")]
104102
if Self::is_pclmulqdq_enabled() {
105103
return self.fold.fold_copy(dst, src);
106104
}
@@ -110,7 +108,7 @@ impl Crc32Fold {
110108
}
111109

112110
pub fn finish(self) -> u32 {
113-
#[cfg(all(target_arch = "x86_64", feature = "std"))]
111+
#[cfg(target_arch = "x86_64")]
114112
if Self::is_pclmulqdq_enabled() {
115113
return unsafe { self.fold.finish() };
116114
}

zlib-rs/src/deflate/compare256.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ pub fn compare256_slice(src0: &[u8], src1: &[u8]) -> usize {
1010
}
1111

1212
fn compare256(src0: &[u8; 256], src1: &[u8; 256]) -> usize {
13-
#[cfg(all(target_arch = "x86_64", feature = "std"))]
14-
if std::is_x86_feature_detected!("avx2") {
13+
#[cfg(target_arch = "x86_64")]
14+
if crate::cpu_features::is_enabled_avx2() {
1515
return unsafe { avx2::compare256(src0, src1) };
1616
}
1717

18-
#[cfg(all(target_arch = "aarch64", feature = "std"))]
19-
if std::arch::is_aarch64_feature_detected!("neon") {
18+
#[cfg(target_arch = "aarch64")]
19+
if crate::cpu_features::is_enabled_neon() {
2020
return unsafe { neon::compare256(src0, src1) };
2121
}
2222

@@ -150,7 +150,7 @@ mod neon {
150150

151151
#[test]
152152
fn test_compare256() {
153-
if std::arch::is_aarch64_feature_detected!("neon") {
153+
if crate::cpu_features::is_enabled_neon() {
154154
let str1 = [b'a'; super::MAX_COMPARE_SIZE];
155155
let mut str2 = [b'a'; super::MAX_COMPARE_SIZE];
156156

@@ -204,7 +204,7 @@ mod avx2 {
204204

205205
#[test]
206206
fn test_compare256() {
207-
if std::arch::is_x86_feature_detected!("avx2") {
207+
if crate::cpu_features::is_enabled_avx2() {
208208
let str1 = [b'a'; super::MAX_COMPARE_SIZE];
209209
let mut str2 = [b'a'; super::MAX_COMPARE_SIZE];
210210

zlib-rs/src/deflate/hash_calc.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -135,12 +135,12 @@ pub struct Crc32HashCalc;
135135

136136
impl Crc32HashCalc {
137137
fn is_supported() -> bool {
138-
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "std"))]
139-
return std::arch::is_x86_feature_detected!("sse4.2");
138+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
139+
return crate::cpu_features::is_enabled_sse42();
140140

141141
// NOTE: more recent versions of zlib-ng no longer use the crc instructions on aarch64
142-
#[cfg(all(target_arch = "aarch64", feature = "std"))]
143-
return std::arch::is_aarch64_feature_detected!("crc");
142+
#[cfg(target_arch = "aarch64")]
143+
return crate::cpu_features::is_enabled_crc();
144144

145145
#[allow(unreachable_code)]
146146
false

zlib-rs/src/deflate/slide_hash.rs

+8-8
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@ pub fn slide_hash(state: &mut crate::deflate::State) {
66
}
77

88
fn slide_hash_chain(table: &mut [u16], wsize: u16) {
9-
#[cfg(all(target_arch = "x86_64", feature = "std"))]
10-
if std::is_x86_feature_detected!("avx2") {
9+
#[cfg(target_arch = "x86_64")]
10+
if crate::cpu_features::is_enabled_avx2() {
1111
return avx2::slide_hash_chain(table, wsize);
1212
}
1313

14-
#[cfg(all(target_arch = "aarch64", feature = "std"))]
15-
if std::arch::is_aarch64_feature_detected!("neon") {
14+
#[cfg(target_arch = "aarch64")]
15+
if crate::cpu_features::is_enabled_neon() {
1616
return neon::slide_hash_chain(table, wsize);
1717
}
1818

@@ -34,7 +34,7 @@ mod neon {
3434
};
3535

3636
pub fn slide_hash_chain(table: &mut [u16], wsize: u16) {
37-
assert!(std::arch::is_aarch64_feature_detected!("neon"));
37+
assert!(crate::cpu_features::is_enabled_neon());
3838
unsafe { slide_hash_chain_internal(table, wsize) }
3939
}
4040

@@ -71,7 +71,7 @@ mod avx2 {
7171
};
7272

7373
pub fn slide_hash_chain(table: &mut [u16], wsize: u16) {
74-
assert!(std::is_x86_feature_detected!("avx2"));
74+
assert!(crate::cpu_features::is_enabled_avx2());
7575
unsafe { slide_hash_chain_internal(table, wsize) }
7676
}
7777

@@ -123,7 +123,7 @@ mod tests {
123123
#[test]
124124
#[cfg(target_arch = "x86_64")]
125125
fn test_slide_hash_avx2() {
126-
if std::arch::is_x86_feature_detected!("avx2") {
126+
if crate::cpu_features::is_enabled_avx2() {
127127
let mut input = INPUT;
128128

129129
avx2::slide_hash_chain(&mut input, WSIZE);
@@ -135,7 +135,7 @@ mod tests {
135135
#[test]
136136
#[cfg(target_arch = "aarch64")]
137137
fn test_slide_hash_neon() {
138-
if std::arch::is_aarch64_feature_detected!("neon") {
138+
if crate::cpu_features::is_enabled_neon() {
139139
let mut input = INPUT;
140140

141141
neon::slide_hash_chain(&mut input, WSIZE);

zlib-rs/src/deflate/window.rs

+4-9
Original file line numberDiff line numberDiff line change
@@ -132,15 +132,10 @@ impl<'a> Window<'a> {
132132

133133
// padding required so that SIMD operations going out-of-bounds are not a problem
134134
pub fn padding() -> usize {
135-
#[cfg(feature = "std")]
136-
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
137-
if std::is_x86_feature_detected!("pclmulqdq")
138-
&& std::is_x86_feature_detected!("sse2")
139-
&& std::is_x86_feature_detected!("sse4.1")
140-
{
141-
return 8;
135+
if crate::cpu_features::is_enabled_pclmulqdq() {
136+
8
137+
} else {
138+
0
142139
}
143-
144-
0
145140
}
146141
}

zlib-rs/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ extern crate alloc;
77
mod adler32;
88
pub mod allocate;
99
pub mod c_api;
10+
mod cpu_features;
1011
pub mod crc32;
1112
pub mod deflate;
1213
pub mod inflate;

zlib-rs/src/read_buf.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -199,18 +199,18 @@ impl<'a> ReadBuf<'a> {
199199

200200
#[inline(always)]
201201
pub fn copy_match(&mut self, offset_from_end: usize, length: usize) {
202-
#[cfg(all(target_arch = "x86_64", feature = "std"))]
203-
if std::is_x86_feature_detected!("avx512f") {
202+
#[cfg(target_arch = "x86_64")]
203+
if crate::cpu_features::is_enabled_avx512() {
204204
return self.copy_match_help::<core::arch::x86_64::__m512i>(offset_from_end, length);
205205
}
206206

207-
#[cfg(all(target_arch = "x86_64", feature = "std"))]
208-
if std::is_x86_feature_detected!("avx2") {
207+
#[cfg(target_arch = "x86_64")]
208+
if crate::cpu_features::is_enabled_avx2() {
209209
return self.copy_match_help::<core::arch::x86_64::__m256i>(offset_from_end, length);
210210
}
211211

212-
#[cfg(all(target_arch = "x86_64", feature = "std"))]
213-
if std::is_x86_feature_detected!("sse") {
212+
#[cfg(target_arch = "x86_64")]
213+
if crate::cpu_features::is_enabled_sse() {
214214
return self.copy_match_help::<core::arch::x86_64::__m128i>(offset_from_end, length);
215215
}
216216

0 commit comments

Comments
 (0)