Skip to content

Commit

Permalink
fix node issue 49960 (#519)
Browse files Browse the repository at this point in the history
* fix node issue 49960

* add: slow path for short strings.

* fixing alignment/sse

* format

---------

Co-authored-by: Daniel Lemire <dlemire@lemire.me>
  • Loading branch information
lemire and Daniel Lemire authored Sep 30, 2023
1 parent 60b5c51 commit a944dd2
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 7 deletions.
2 changes: 1 addition & 1 deletion benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ endif(ADA_BOOST_URL)
endif(Boost_FOUND)

# Zuri
find_package(ZURI)
find_package(ZURI QUIET)
if(ZURI_FOUND)
message(STATUS "Zuri found")
target_link_libraries(bench PRIVATE zuri)
Expand Down
32 changes: 26 additions & 6 deletions src/unicode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,17 @@ constexpr bool to_lower_ascii(char* input, size_t length) noexcept {
#if ADA_NEON
ada_really_inline bool has_tabs_or_newline(
std::string_view user_input) noexcept {
// first check for short strings in which case we do it naively.
if (user_input.size() < 16) { // slow path
for (size_t i = 0; i < user_input.size(); i++) {
if (user_input[i] == '\r' || user_input[i] == '\n' ||
user_input[i] == '\t') {
return true;
}
}
return false;
}
// fast path for long strings (expected to be common)
size_t i = 0;
const uint8x16_t mask1 = vmovq_n_u8('\r');
const uint8x16_t mask2 = vmovq_n_u8('\n');
Expand All @@ -60,9 +71,8 @@ ada_really_inline bool has_tabs_or_newline(
vceqq_u8(word, mask3));
}
if (i < user_input.size()) {
uint8_t buffer[16]{};
memcpy(buffer, user_input.data() + i, user_input.size() - i);
uint8x16_t word = vld1q_u8((const uint8_t*)user_input.data() + i);
uint8x16_t word =
vld1q_u8((const uint8_t*)user_input.data() + user_input.length() - 16);
running = vorrq_u8(vorrq_u8(running, vorrq_u8(vceqq_u8(word, mask1),
vceqq_u8(word, mask2))),
vceqq_u8(word, mask3));
Expand All @@ -72,6 +82,17 @@ ada_really_inline bool has_tabs_or_newline(
#elif ADA_SSE2
ada_really_inline bool has_tabs_or_newline(
std::string_view user_input) noexcept {
// first check for short strings in which case we do it naively.
if (user_input.size() < 16) { // slow path
for (size_t i = 0; i < user_input.size(); i++) {
if (user_input[i] == '\r' || user_input[i] == '\n' ||
user_input[i] == '\t') {
return true;
}
}
return false;
}
// fast path for long strings (expected to be common)
size_t i = 0;
const __m128i mask1 = _mm_set1_epi8('\r');
const __m128i mask2 = _mm_set1_epi8('\n');
Expand All @@ -85,9 +106,8 @@ ada_really_inline bool has_tabs_or_newline(
_mm_cmpeq_epi8(word, mask3));
}
if (i < user_input.size()) {
alignas(16) uint8_t buffer[16]{};
memcpy(buffer, user_input.data() + i, user_input.size() - i);
__m128i word = _mm_load_si128((const __m128i*)buffer);
__m128i word = _mm_loadu_si128(
(const __m128i*)(user_input.data() + user_input.length() - 16));
running = _mm_or_si128(
_mm_or_si128(running, _mm_or_si128(_mm_cmpeq_epi8(word, mask1),
_mm_cmpeq_epi8(word, mask2))),
Expand Down

0 comments on commit a944dd2

Please sign in to comment.