Skip to content

Commit

Permalink
[Optimization](string) optimize constant empty string compare ( colum…
Browse files Browse the repository at this point in the history
…n='', column!='') (apache#18321)

Optimize constant empty string compare:
(1) When the constant empy string '' (size is 0), we can compare offsets in SIMD directly.

q10: SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
q11: SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
q12: SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
q13: SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
q14: SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
Issue Number: close #xxx
  • Loading branch information
ZhangYu0123 authored Apr 8, 2023
1 parent 0517616 commit 58bbd46
Showing 1 changed file with 17 additions and 9 deletions.
26 changes: 17 additions & 9 deletions be/src/vec/functions/functions_comparison.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,15 +211,23 @@ struct StringEqualsImpl {
ColumnString::Offset b_size,
PaddedPODArray<UInt8>& c) {
size_t size = a_offsets.size();
ColumnString::Offset prev_a_offset = 0;

for (size_t i = 0; i < size; ++i) {
auto a_size = a_offsets[i] - prev_a_offset;

c[i] = positive == memequal_small_allow_overflow15(a_data.data() + prev_a_offset,
a_size, b_data.data(), b_size);

prev_a_offset = a_offsets[i];
if (b_size == 0) {
auto* __restrict data = c.data();
auto* __restrict offsets = a_offsets.data();
for (size_t i = 0; i < size; ++i) {
data[i] =
positive ? (offsets[i] == offsets[i - 1]) : (offsets[i] != offsets[i - 1]);
}
} else {
ColumnString::Offset prev_a_offset = 0;
const auto* a_pos = a_data.data();
const auto* b_pos = b_data.data();
for (size_t i = 0; i < size; ++i) {
auto a_size = a_offsets[i] - prev_a_offset;
c[i] = positive == memequal_small_allow_overflow15(a_pos + prev_a_offset, a_size,
b_pos, b_size);
prev_a_offset = a_offsets[i];
}
}
}

Expand Down

0 comments on commit 58bbd46

Please sign in to comment.