Skip to content

Commit 27c8f9d

Browse files
authored
Unrolled build for #146699
Rollup merge of #146699 - heiher:is-ascii-lsx, r=Mark-Simulacrum Add `is_ascii` function optimized for LoongArch64 for [u8] Similar to x86_64, on LoongArch64 we use the `vmskltz.b` instruction to test the high bit in a lane. For longer input cases, the performance improvement is significant. For unaligned cases close to 32 bytes in length, there's some regression, but it seems acceptable. | core benches (MB/s) | Before | After | % | |--------------------------------------------------------|--------|--------|---------| | ascii::is_ascii::short::case00_libcore | 1000 | 1000 | 0.00 | | ascii::is_ascii::medium::case00_libcore | 8000 | 8000 | 0.00 | | ascii::is_ascii::long::case00_libcore | 183947 | 436875 | +137.50 | | ascii::is_ascii::unaligned_head_medium::case00_libcore | 7750 | 2818 | -63.64 | | ascii::is_ascii::unaligned_head_long::case00_libcore | 317681 | 436812 | +37.50 | | ascii::is_ascii::unaligned_tail_medium::case00_libcore | 7750 | 3444 | -55.56 | | ascii::is_ascii::unaligned_tail_long::case00_libcore | 155311 | 436812 | +181.25 | | ascii::is_ascii::unaligned_both_medium::case00_libcore | 7500 | 3333 | -55.56 | | ascii::is_ascii::unaligned_both_long::case00_libcore | 174700 | 436750 | +150.00 |
2 parents b15a874 + 73c3905 commit 27c8f9d

File tree

1 file changed

+14
-5
lines changed

1 file changed

+14
-5
lines changed

library/core/src/slice/ascii.rs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
use core::ascii::EscapeDefault;
44

55
use crate::fmt::{self, Write};
6-
#[cfg(not(all(target_arch = "x86_64", target_feature = "sse2")))]
6+
#[cfg(not(any(
7+
all(target_arch = "x86_64", target_feature = "sse2"),
8+
all(target_arch = "loongarch64", target_feature = "lsx")
9+
)))]
710
use crate::intrinsics::const_eval_select;
811
use crate::{ascii, iter, ops};
912

@@ -359,7 +362,10 @@ pub const fn is_ascii_simple(mut bytes: &[u8]) -> bool {
359362
///
360363
/// If any of these loads produces something for which `contains_nonascii`
361364
/// (above) returns true, then we know the answer is false.
362-
#[cfg(not(all(target_arch = "x86_64", target_feature = "sse2")))]
365+
#[cfg(not(any(
366+
all(target_arch = "x86_64", target_feature = "sse2"),
367+
all(target_arch = "loongarch64", target_feature = "lsx")
368+
)))]
363369
#[inline]
364370
#[rustc_allow_const_fn_unstable(const_eval_select)] // fallback impl has same behavior
365371
const fn is_ascii(s: &[u8]) -> bool {
@@ -457,12 +463,15 @@ const fn is_ascii(s: &[u8]) -> bool {
457463
)
458464
}
459465

460-
/// ASCII test optimized to use the `pmovmskb` instruction available on `x86-64`
461-
/// platforms.
466+
/// ASCII test optimized to use the `pmovmskb` instruction on `x86-64` and the
467+
/// `vmskltz.b` instruction on `loongarch64`.
462468
///
463469
/// Other platforms are not likely to benefit from this code structure, so they
464470
/// use SWAR techniques to test for ASCII in `usize`-sized chunks.
465-
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
471+
#[cfg(any(
472+
all(target_arch = "x86_64", target_feature = "sse2"),
473+
all(target_arch = "loongarch64", target_feature = "lsx")
474+
))]
466475
#[inline]
467476
const fn is_ascii(bytes: &[u8]) -> bool {
468477
// Process chunks of 32 bytes at a time in the fast path to enable

0 commit comments

Comments
 (0)