Skip to content

Commit

Permalink
[Optimize](Function) Add fast path for col like '%%' or col like '%' …
Browse files Browse the repository at this point in the history
…or regexp '\\.*' (apache#20143)

Add fast path for col like '%%' or col like '%' or regexp '\\.*'
(1) like about 34% speed up when use count() test
support col like '%%' , col like '%', col not like '%%' , col not like '%'

(2) regexp about 37% speed up when use count() test
support col regexp '\\.', col not regexp '\\.'

Q1: select count() From hits where url like '%';
Q2: select count() From hits where url regexp '\\.*';
  • Loading branch information
ZhangYu0123 authored Jun 2, 2023
1 parent 422fcd6 commit 78c37b5
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 2 deletions.
46 changes: 44 additions & 2 deletions be/src/vec/functions/like.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,15 @@ static const RE2 STARTS_WITH_RE("\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\

// A regex to match any regex pattern which is equivalent to a constant string match.
static const RE2 EQUALS_RE("\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)\\$");
// A regex to match .*
static const RE2 ALLPASS_RE("(\\\\.\\*)+");

// Like patterns
static const re2::RE2 LIKE_SUBSTRING_RE("(?:%+)(((\\\\_)|([^%_\\\\]))+)(?:%+)");
static const re2::RE2 LIKE_ENDS_WITH_RE("(?:%+)(((\\\\_)|([^%_]))+)");
static const re2::RE2 LIKE_STARTS_WITH_RE("(((\\\\%)|(\\\\_)|([^%_\\\\]))+)(?:%+)");
static const re2::RE2 LIKE_EQUALS_RE("(((\\\\_)|([^%_]))+)");
static const re2::RE2 LIKE_ALLPASS_RE("%+");

Status LikeSearchState::clone(LikeSearchState& cloned) {
cloned.escape_char = escape_char;
Expand Down Expand Up @@ -88,6 +91,16 @@ Status LikeSearchState::clone(LikeSearchState& cloned) {
return Status::OK();
}

Status FunctionLikeBase::constant_allpass_fn(LikeSearchState* state, const ColumnString& val,
const StringRef& pattern,
ColumnUInt8::Container& result) {
auto sz = val.size();
for (size_t i = 0; i < sz; i++) {
result[i] = 1;
}
return Status::OK();
}

Status FunctionLikeBase::constant_starts_with_fn(LikeSearchState* state, const ColumnString& val,
const StringRef& pattern,
ColumnUInt8::Container& result) {
Expand Down Expand Up @@ -135,6 +148,17 @@ Status FunctionLikeBase::constant_substring_fn(LikeSearchState* state, const Col
return Status::OK();
}

Status FunctionLikeBase::constant_allpass_fn_predicate(LikeSearchState* state,
const PredicateColumnType<TYPE_STRING>& val,
const StringRef& pattern,
ColumnUInt8::Container& result,
const uint16_t* sel, size_t sz) {
for (size_t i = 0; i < sz; i++) {
result[i] = 1;
}
return Status::OK();
}

Status FunctionLikeBase::constant_starts_with_fn_predicate(
LikeSearchState* state, const PredicateColumnType<TYPE_STRING>& val,
const StringRef& pattern, ColumnUInt8::Container& result, const uint16_t* sel, size_t sz) {
Expand Down Expand Up @@ -186,6 +210,13 @@ Status FunctionLikeBase::constant_substring_fn_predicate(
return Status::OK();
}

Status FunctionLikeBase::constant_allpass_fn_scalar(LikeSearchState* state, const StringRef& val,
const StringRef& pattern,
unsigned char* result) {
*result = 1;
return Status::OK();
}

Status FunctionLikeBase::constant_starts_with_fn_scalar(LikeSearchState* state,
const StringRef& val,
const StringRef& pattern,
Expand Down Expand Up @@ -671,7 +702,13 @@ Status FunctionLike::open(FunctionContext* context, FunctionContext::FunctionSta
state->search_state.pattern_str = pattern_str;
std::string search_string;

if (pattern_str.empty() || RE2::FullMatch(pattern_str, LIKE_EQUALS_RE, &search_string)) {
if (!pattern_str.empty() && RE2::FullMatch(pattern_str, LIKE_ALLPASS_RE)) {
state->search_state.set_search_string("");
state->function = constant_allpass_fn;
state->predicate_like_function = constant_allpass_fn_predicate;
state->scalar_function = constant_allpass_fn_scalar;
} else if (pattern_str.empty() ||
RE2::FullMatch(pattern_str, LIKE_EQUALS_RE, &search_string)) {
if (VLOG_DEBUG_IS_ON) {
verbose_log_match(pattern_str, "LIKE_EQUALS_RE", LIKE_EQUALS_RE);
VLOG_DEBUG << "search_string : " << search_string
Expand Down Expand Up @@ -784,7 +821,12 @@ Status FunctionRegexp::open(FunctionContext* context, FunctionContext::FunctionS

std::string pattern_str = pattern.to_string();
std::string search_string;
if (RE2::FullMatch(pattern_str, EQUALS_RE, &search_string)) {
if (RE2::FullMatch(pattern_str, ALLPASS_RE)) {
state->search_state.set_search_string("");
state->function = constant_allpass_fn;
state->predicate_like_function = constant_allpass_fn_predicate;
state->scalar_function = constant_allpass_fn_scalar;
} else if (RE2::FullMatch(pattern_str, EQUALS_RE, &search_string)) {
state->search_state.set_search_string(search_string);
state->function = constant_equals_fn;
state->predicate_like_function = constant_equals_fn_predicate;
Expand Down
12 changes: 12 additions & 0 deletions be/src/vec/functions/like.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@ class FunctionLikeBase : public IFunction {
const ColumnString::Offsets& value_offsets,
ColumnUInt8::Container& result, LikeSearchState* search_state);

static Status constant_allpass_fn(LikeSearchState* state, const ColumnString& val,
const StringRef& pattern, ColumnUInt8::Container& result);

static Status constant_starts_with_fn(LikeSearchState* state, const ColumnString& val,
const StringRef& pattern, ColumnUInt8::Container& result);

Expand Down Expand Up @@ -182,6 +185,12 @@ class FunctionLikeBase : public IFunction {
const StringRef& pattern, ColumnUInt8::Container& result,
const uint16_t* sel, size_t sz);

static Status constant_allpass_fn_predicate(LikeSearchState* state,
const PredicateColumnType<TYPE_STRING>& val,
const StringRef& pattern,
ColumnUInt8::Container& result, const uint16_t* sel,
size_t sz);

static Status constant_starts_with_fn_predicate(LikeSearchState* state,
const PredicateColumnType<TYPE_STRING>& val,
const StringRef& pattern,
Expand All @@ -206,6 +215,9 @@ class FunctionLikeBase : public IFunction {
ColumnUInt8::Container& result,
const uint16_t* sel, size_t sz);

static Status constant_allpass_fn_scalar(LikeSearchState* state, const StringRef& val,
const StringRef& pattern, unsigned char* result);

static Status constant_starts_with_fn_scalar(LikeSearchState* state, const StringRef& val,
const StringRef& pattern, unsigned char* result);

Expand Down

0 comments on commit 78c37b5

Please sign in to comment.