Skip to content

Commit 9aa36da

Browse files
committed
optimize must_be_2_3_continuation as in simdjson to avoid comparison and
enable ternary logic optimization. Kudos to @Validark (see simdjson/simdjson#2113)
1 parent 69e70ed commit 9aa36da

File tree

1 file changed

+7
-21
lines changed

1 file changed

+7
-21
lines changed

src/implementation/x86/avx512.rs

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
//! Contains the x86-64 AVX512 UTF-8 validation implementation.
22
33
use core::arch::x86_64::{
4-
__m512i, _mm512_alignr_epi8, _mm512_and_si512, _mm512_cmpgt_epi8_mask, _mm512_loadu_si512,
5-
_mm512_maskz_abs_epi8, _mm512_maskz_loadu_epi8, _mm512_or_si512, _mm512_permutex2var_epi64,
6-
_mm512_set1_epi8, _mm512_set_epi64, _mm512_setzero_si512, _mm512_shuffle_epi8,
7-
_mm512_srli_epi16, _mm512_subs_epu8, _mm512_test_epi8_mask, _mm512_xor_si512, _mm_prefetch,
8-
_MM_HINT_T0,
4+
__m512i, _mm512_alignr_epi8, _mm512_and_si512, _mm512_loadu_si512, _mm512_maskz_loadu_epi8,
5+
_mm512_or_si512, _mm512_permutex2var_epi64, _mm512_set1_epi8, _mm512_set_epi64,
6+
_mm512_setzero_si512, _mm512_shuffle_epi8, _mm512_srli_epi16, _mm512_subs_epu8,
7+
_mm512_test_epi8_mask, _mm512_xor_si512, _mm_prefetch, _MM_HINT_T0,
98
};
109
use core::arch::x86_64::{_mm512_movepi8_mask, _mm512_set_epi8};
1110

@@ -224,16 +223,6 @@ impl SimdU8Value {
224223
));
225224
}
226225

227-
#[flexpect::e(clippy::cast_possible_wrap)]
228-
#[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
229-
#[inline]
230-
unsafe fn signed_gt(self, other: Self) -> Self {
231-
Self::from(_mm512_maskz_abs_epi8(
232-
_mm512_cmpgt_epi8_mask(self.0, other.0),
233-
_mm512_set1_epi8(0x80u8 as i8),
234-
))
235-
}
236-
237226
#[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
238227
#[inline]
239228
unsafe fn any_bit_set(self) -> bool {
@@ -258,12 +247,9 @@ impl Utf8CheckAlgorithm<SimdU8Value> {
258247
#[target_feature(enable = "avx512f,avx512bw,avx512vbmi")]
259248
#[inline]
260249
unsafe fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value {
261-
let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0b1110_0000 - 1));
262-
let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0b1111_0000 - 1));
263-
264-
is_third_byte
265-
.or(is_fourth_byte)
266-
.signed_gt(SimdU8Value::splat0())
250+
let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0xe0 - 0x80));
251+
let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0xf0 - 0x80));
252+
is_third_byte.or(is_fourth_byte)
267253
}
268254
}
269255

0 commit comments

Comments
 (0)