Skip to content

Commit

Permalink
[Bugfix](MV) Fix insert negative value to table with bitmap_union MV …
Browse files Browse the repository at this point in the history
…will cause count distinct result incorrect (apache#13507)
  • Loading branch information
yangzhg authored Oct 21, 2022
1 parent 847b80e commit 3e92f74
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 19 deletions.
24 changes: 24 additions & 0 deletions be/src/exprs/bitmap_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,30 @@ StringVal BitmapFunctions::to_bitmap(doris_udf::FunctionContext* ctx,
return serialize(ctx, &bitmap);
}

StringVal BitmapFunctions::to_bitmap_with_check(doris_udf::FunctionContext* ctx,
const doris_udf::StringVal& src) {
BitmapValue bitmap;

if (!src.is_null) {
StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
reinterpret_cast<char*>(src.ptr), src.len, &parse_result);
if (parse_result == StringParser::PARSE_SUCCESS) {
bitmap.add(int_value);
} else {
std::stringstream ss;
ss << "The input: " << src.to_string()
<< " is not valid, to_bitmap only support bigint value from 0 to "
"18446744073709551615 currently, cannot load negative values to column with"
" to_bitmap MV on it.";
ctx->set_error(ss.str().c_str());
return StringVal::null();
}
}

return serialize(ctx, &bitmap);
}

StringVal BitmapFunctions::bitmap_hash(doris_udf::FunctionContext* ctx,
const doris_udf::StringVal& src) {
BitmapValue bitmap;
Expand Down
1 change: 1 addition & 0 deletions be/src/exprs/bitmap_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ class BitmapFunctions {

static StringVal bitmap_serialize(FunctionContext* ctx, const StringVal& src);
static StringVal to_bitmap(FunctionContext* ctx, const StringVal& src);
static StringVal to_bitmap_with_check(FunctionContext* ctx, const StringVal& src);
static StringVal bitmap_hash(FunctionContext* ctx, const StringVal& src);
static StringVal bitmap_hash64(FunctionContext* ctx, const StringVal& src);
static StringVal bitmap_or(FunctionContext* ctx, const StringVal& src, const StringVal& dst);
Expand Down
35 changes: 22 additions & 13 deletions be/src/olap/schema_change.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -437,9 +437,11 @@ bool to_bitmap(RowCursor* read_helper, RowCursor* write_helper, const TabletColu
switch (ref_column.type()) {
case OLAP_FIELD_TYPE_TINYINT:
if (*(int8_t*)src < 0) {
LOG(WARNING) << "The input: " << *(int8_t*)src
<< " is not valid, to_bitmap only support bigint value from 0 to "
"18446744073709551615 currently";
LOG(WARNING)
<< "The input: " << *(int8_t*)src
<< " is not valid, to_bitmap only support bigint value from 0 to "
"18446744073709551615 currently, cannot create MV with to_bitmap on "
"column with negative values.";
return false;
}
origin_value = *(int8_t*)src;
Expand All @@ -449,9 +451,11 @@ bool to_bitmap(RowCursor* read_helper, RowCursor* write_helper, const TabletColu
break;
case OLAP_FIELD_TYPE_SMALLINT:
if (*(int16_t*)src < 0) {
LOG(WARNING) << "The input: " << *(int16_t*)src
<< " is not valid, to_bitmap only support bigint value from 0 to "
"18446744073709551615 currently";
LOG(WARNING)
<< "The input: " << *(int16_t*)src
<< " is not valid, to_bitmap only support bigint value from 0 to "
"18446744073709551615 currently, cannot create MV with to_bitmap on "
"column with negative values.";
return false;
}
origin_value = *(int16_t*)src;
Expand All @@ -461,9 +465,11 @@ bool to_bitmap(RowCursor* read_helper, RowCursor* write_helper, const TabletColu
break;
case OLAP_FIELD_TYPE_INT:
if (*(int32_t*)src < 0) {
LOG(WARNING) << "The input: " << *(int32_t*)src
<< " is not valid, to_bitmap only support bigint value from 0 to "
"18446744073709551615 currently";
LOG(WARNING)
<< "The input: " << *(int32_t*)src
<< " is not valid, to_bitmap only support bigint value from 0 to "
"18446744073709551615 currently, cannot create MV with to_bitmap on "
"column with negative values.";
return false;
}
origin_value = *(int32_t*)src;
Expand All @@ -473,9 +479,11 @@ bool to_bitmap(RowCursor* read_helper, RowCursor* write_helper, const TabletColu
break;
case OLAP_FIELD_TYPE_BIGINT:
if (*(int64_t*)src < 0) {
LOG(WARNING) << "The input: " << *(int64_t*)src
<< " is not valid, to_bitmap only support bigint value from 0 to "
"18446744073709551615 currently";
LOG(WARNING)
<< "The input: " << *(int64_t*)src
<< " is not valid, to_bitmap only support bigint value from 0 to "
"18446744073709551615 currently, cannot create MV with to_bitmap on "
"column with negative values.";
return false;
}
origin_value = *(int64_t*)src;
Expand Down Expand Up @@ -1747,7 +1755,8 @@ Status SchemaChangeHandler::process_alter_tablet_v2(const TAlterTabletReqV2& req

std::shared_mutex SchemaChangeHandler::_mutex;
std::unordered_set<int64_t> SchemaChangeHandler::_tablet_ids_in_converting;
std::set<std::string> SchemaChangeHandler::_supported_functions = {"hll_hash", "to_bitmap"};
std::set<std::string> SchemaChangeHandler::_supported_functions = {"hll_hash", "to_bitmap",
"to_bitmap_with_check"};

// In the past schema change and rollup will create new tablet and will wait for txns starting before the task to finished
// It will cost a lot of time to wait and the task is very difficult to understand.
Expand Down
13 changes: 8 additions & 5 deletions be/src/vec/functions/function_bitmap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,14 @@ struct ToBitmapWithCheck {
if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
res_data[i].add(int_value);
} else {
LOG(WARNING) << "The input: " << raw_str
<< " is not valid, to_bitmap only support bigint value from 0 to "
"18446744073709551615 currently";
return Status::InternalError(
"bitmap value must be in [0, 18446744073709551615)");
std::stringstream ss;
ss << "The input: " << std::string(raw_str, str_size)
<< " is not valid, to_bitmap only support bigint value from 0 to "
"18446744073709551615 currently, cannot create MV with to_bitmap on "
"column with negative values or cannot load negative values to column "
"with to_bitmap MV on it.";
LOG(WARNING) << ss.str();
return Status::InternalError(ss.str());
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion gensrc/script/doris_builtins_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2432,7 +2432,7 @@
'_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['to_bitmap_with_check'], 'BITMAP', ['VARCHAR'],
'_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'_ZN5doris15BitmapFunctions20to_bitmap_with_checkEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['bitmap_hash'], 'BITMAP', ['VARCHAR'],
'_ZN5doris15BitmapFunctions11bitmap_hashEPN9doris_udf15FunctionContextERKNS1_9StringValE',
Expand Down
77 changes: 77 additions & 0 deletions regression-test/suites/rollup/test_materialized_view_bitmap.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_materialized_view_bitmap", "rollup") {
def tbName1 = "test_materialized_view_bitmap"

def getJobState = { tableName ->
def jobStateResult = sql """ SHOW ALTER TABLE MATERIALIZED VIEW WHERE TableName='${tableName}' ORDER BY CreateTime DESC LIMIT 1; """
return jobStateResult[0][8]
}
sql "DROP TABLE IF EXISTS ${tbName1}"
sql """
CREATE TABLE ${tbName1}(
k1 BOOLEAN NOT NULL,
k2 TINYINT NOT NULL,
k3 SMALLINT NOT NULL
)
DISTRIBUTED BY HASH(k1) properties("replication_num" = "1");
"""

sql "CREATE MATERIALIZED VIEW test_neg as select k1,bitmap_union(to_bitmap(k2)), bitmap_union(to_bitmap(k3)) FROM ${tbName1} GROUP BY k1;"
max_try_secs = 60
while (max_try_secs--) {
String res = getJobState(tbName1)
if (res == "FINISHED") {
break
} else {
Thread.sleep(2000)
if (max_try_secs < 1) {
println "test timeout," + "state:" + res
assertEquals("FINISHED",res)
}
}
}

sql "set enable_vectorized_engine=false"
explain {
sql "insert into ${tbName1} values(1,1,1);"
contains "to_bitmap_with_check"
}
sql "set enable_vectorized_engine=true"
explain {
sql "insert into ${tbName1} values(1,1,1);"
contains "to_bitmap_with_check"
}
sql "insert into ${tbName1} values(1,1,1);"
sql "set enable_vectorized_engine=false"
sql "insert into ${tbName1} values(0,1,1);"
sql "set enable_vectorized_engine=true"

test {
sql "insert into ${tbName1} values(1,-1,-1);"
// check exception message contains
exception "The input: -1 is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"
}
sql "set enable_vectorized_engine=false"
test {
sql "insert into ${tbName1} values(1,-1,-1);"
// check exception message contains
exception "The input: -1 is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"
}

sql "DROP TABLE ${tbName1} FORCE;"
}

0 comments on commit 3e92f74

Please sign in to comment.