Skip to content

Commit

Permalink
[Optimization](String) Optimize q20 q21 q22 q23 LIKE_SUBSTRING (like …
Browse files Browse the repository at this point in the history
…'%xxx%') (apache#18309)

Optimize q20, q21, q22, q23 LIKE_SUBSTRING (like '%xxxx%'). Idea is from clickhouse stringsearcher:

Stringsearcher is about 10%~20% faster than volnitsky algorithm when needle size is less than 10 using two chars at beginning search in SIMD .
Stringsearcher is faster than volnitsky algorithm, when needle size is less than 21.
The changes are as follows:

Using first two chars of needle at beginning search. We can compare two chars of needle and [n:n+17) chars in haystack in SIMD in one loop. Filter efficiency will be higher.
When env support SIMD, we use stringsearcher.
Test result in clickbench:

q20 is about 15% up.
q20: SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%';
q21, q22 is about 1%~5% up.
q21: SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
q22: SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
q23 is about 30%~40% up and not stable.
q23: SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10;
  • Loading branch information
ZhangYu0123 authored Apr 3, 2023
1 parent eb6dbc0 commit b627088
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 11 deletions.
68 changes: 57 additions & 11 deletions be/src/vec/common/string_searcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,10 @@ class StringSearcher<true, ASCII> : public StringSearcherBase {
uint8_t first {};

#ifdef __SSE4_1__
/// vector filled `first` for determining leftmost position of the first symbol
__m128i pattern;
uint8_t second {};
/// vector filled `first` or `second` for determining leftmost position of the first and second symbols
__m128i first_pattern;
__m128i second_pattern;
/// vector of first 16 characters of `needle`
__m128i cache = _mm_setzero_si128();
int cachemask {};
Expand All @@ -95,8 +97,11 @@ class StringSearcher<true, ASCII> : public StringSearcherBase {
first = *needle;

#ifdef __SSE4_1__
pattern = _mm_set1_epi8(first);

first_pattern = _mm_set1_epi8(first);
if (needle + 1 < needle_end) {
second = *(needle + 1);
second_pattern = _mm_set1_epi8(second);
}
const auto* needle_pos = needle;

//for (const auto i : collections::range(0, n))
Expand Down Expand Up @@ -155,16 +160,57 @@ class StringSearcher<true, ASCII> : public StringSearcherBase {
const CharT* search(const CharT* haystack, const CharT* const haystack_end) const {
if (needle == needle_end) return haystack;

while (haystack < haystack_end) {
const auto needle_size = needle_end - needle;
#ifdef __SSE4_1__
if (haystack + n <= haystack_end && page_safe(haystack)) {
/// find first character
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i*>(haystack));
const auto v_against_pattern = _mm_cmpeq_epi8(v_haystack, pattern);
/// Here is the quick path when needle_size is 1.
if (needle_size == 1) {
while (haystack < haystack_end) {
if (haystack + n <= haystack_end && page_safe(haystack)) {
const auto v_haystack =
_mm_loadu_si128(reinterpret_cast<const __m128i*>(haystack));
const auto v_against_pattern = _mm_cmpeq_epi8(v_haystack, first_pattern);
const auto mask = _mm_movemask_epi8(v_against_pattern);
if (mask == 0) {
haystack += n;
continue;
}

const auto offset = __builtin_ctz(mask);
haystack += offset;

return haystack;
}

const auto mask = _mm_movemask_epi8(v_against_pattern);
if (haystack == haystack_end) {
return haystack_end;
}

/// first character not present in 16 octets starting at `haystack`
if (*haystack == first) {
return haystack;
}
++haystack;
}
return haystack_end;
}
#endif

while (haystack < haystack_end && haystack_end - haystack >= needle_size) {
#ifdef __SSE4_1__
if ((haystack + 1 + n) <= haystack_end && page_safe(haystack)) {
/// find first and second characters
const auto v_haystack_block_first =
_mm_loadu_si128(reinterpret_cast<const __m128i*>(haystack));
const auto v_haystack_block_second =
_mm_loadu_si128(reinterpret_cast<const __m128i*>(haystack + 1));

const auto v_against_pattern_first =
_mm_cmpeq_epi8(v_haystack_block_first, first_pattern);
const auto v_against_pattern_second =
_mm_cmpeq_epi8(v_haystack_block_second, second_pattern);

const auto mask = _mm_movemask_epi8(
_mm_and_si128(v_against_pattern_first, v_against_pattern_second));
/// first and second characters not present in 16 octets starting at `haystack`
if (mask == 0) {
haystack += n;
continue;
Expand Down
4 changes: 4 additions & 0 deletions be/src/vec/common/volnitsky.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,10 @@ class VolnitskyBase {

const auto* haystack_end = haystack + haystack_size;

#ifdef __SSE4_1__
return fallback_searcher.search(haystack, haystack_end);
#endif

if (fallback || haystack_size <= needle_size || fallback_searcher.force_fallback)
return fallback_searcher.search(haystack, haystack_end);

Expand Down

0 comments on commit b627088

Please sign in to comment.