Skip to content

Commit

Permalink
[Fix](multi-catalog) Fix hive incorrect result by disable string dict…
Browse files Browse the repository at this point in the history
… filter if exprs contain null expr.
  • Loading branch information
kaka11chen committed Aug 24, 2023
1 parent 6a49769 commit 95e55ac
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 8 deletions.
19 changes: 15 additions & 4 deletions be/src/vec/exec/format/orc/vorc_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1752,15 +1752,26 @@ bool OrcReader::_can_filter_by_dict(int slot_id) {
}

// TODO:check expr like 'a > 10 is null', 'a > 10' should can be filter by dict.
for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) {
const auto& root_expr = ctx->root();
if (root_expr->node_type() == TExprNodeType::FUNCTION_CALL) {
std::function<bool(const VExpr* expr)> visit_function_call = [&](const VExpr* expr) {
if (expr->node_type() == TExprNodeType::FUNCTION_CALL) {
std::string is_null_str;
std::string function_name = root_expr->fn().name.function_name;
std::string function_name = expr->fn().name.function_name;
if (function_name.compare("is_null_pred") == 0 ||
function_name.compare("is_not_null_pred") == 0) {
return false;
}
} else {
for (auto& child : expr->children()) {
if (!visit_function_call(child.get())) {
return false;
}
}
}
return true;
};
for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) {
if (!visit_function_call(ctx->root().get())) {
return false;
}
}
return true;
Expand Down
20 changes: 16 additions & 4 deletions be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,19 +197,31 @@ bool RowGroupReader::_can_filter_by_dict(int slot_id,
}

// TODO:check expr like 'a > 10 is null', 'a > 10' should can be filter by dict.
for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) {
const auto& root_expr = ctx->root();
if (root_expr->node_type() == TExprNodeType::FUNCTION_CALL) {
std::function<bool(const VExpr* expr)> visit_function_call = [&](const VExpr* expr) {
if (expr->node_type() == TExprNodeType::FUNCTION_CALL) {
std::string is_null_str;
std::string function_name = root_expr->fn().name.function_name;
std::string function_name = expr->fn().name.function_name;
if (function_name.compare("is_null_pred") == 0 ||
function_name.compare("is_not_null_pred") == 0) {
return false;
}
} else {
for (auto& child : expr->children()) {
if (!visit_function_call(child.get())) {
return false;
}
}
}
return true;
};
for (auto& ctx : _slot_id_to_filter_conjuncts->at(slot_id)) {
if (!visit_function_call(ctx->root().get())) {
return false;
}
}
return true;
}

// This function is copied from
// https://github.com/apache/impala/blob/master/be/src/exec/parquet/hdfs-parquet-scanner.cc#L1717
bool RowGroupReader::is_dictionary_encoded(const tparquet::ColumnMetaData& column_metadata) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,9 @@ Z6n2t4XA2n7CXTECJ,PE,iBbsCh0RE1Dd2A,z48
\N 2073732 2 13846443 596483.00 21.00 29163.75 0.10 0.08 R F 1994-12-06 1995-01-01 DELIVER IN PERSON FOB dolphins nag furiously q
\N 2479044 4 9763795 13805.00 40.00 74332.40 0.05 0.05 R F 1994-11-16 1995-01-01 COLLECT COD RAIL equests hinder qu

-- !null_expr_dict_filter_orc --
4844 4363

-- !null_expr_dict_filter_parquet --
4844 4363

Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ suite("test_external_catalog_hive", "p2,external,hive,external_remote,external_r
qt_not_single_slot_filter_conjuncts_orc """ select * from multi_catalog.lineitem_string_date_orc where l_commitdate < l_receiptdate and l_receiptdate = '1995-01-01' order by l_orderkey, l_partkey, l_suppkey, l_linenumber limit 10; """
qt_not_single_slot_filter_conjuncts_parquet """ select * from multi_catalog.lineitem_string_date_orc where l_commitdate < l_receiptdate and l_receiptdate = '1995-01-01' order by l_orderkey, l_partkey, l_suppkey, l_linenumber limit 10; """

// test null expr with dict filter issue
qt_null_expr_dict_filter_orc """ select count(*), count(distinct user_no) from multi_catalog.dict_fitler_test_orc WHERE partitions in ('2023-08-21') and actual_intf_type = 'type1' and (REUSE_FLAG<> 'y' or REUSE_FLAG is null); """
qt_null_expr_dict_filter_parquet """ select count(*), count(distinct user_no) from multi_catalog.dict_fitler_test_parquet WHERE partitions in ('2023-08-21') and actual_intf_type = 'type1' and (REUSE_FLAG<> 'y' or REUSE_FLAG is null); """


// test remember last used database after switch / rename catalog
sql """switch ${catalog_name};"""

Expand Down

0 comments on commit 95e55ac

Please sign in to comment.