Skip to content

Commit

Permalink
[bugfix](topn) fix topn optimzation wrong result for NULL values (apa…
Browse files Browse the repository at this point in the history
…che#18121)

1. add PassNullPredicate to fix topn wrong result for NULL values
2. refactor RuntimePredicate to avoid using TCondition
3. refactor using ordering_exprs in fe and vsort_node
  • Loading branch information
xiaokang authored Mar 31, 2023
1 parent 8be4385 commit 4e1e0ce
Show file tree
Hide file tree
Showing 369 changed files with 3,193 additions and 161 deletions.
168 changes: 168 additions & 0 deletions be/src/olap/accept_null_predicate.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <cstdint>

#include "olap/column_predicate.h"
#include "olap/rowset/segment_v2/bloom_filter.h"
#include "olap/rowset/segment_v2/inverted_index_reader.h"
#include "olap/wrapper_field.h"
#include "vec/columns/column_dictionary.h"

namespace doris {

/**
* A wrapper predicate that delegate to nested predicate
* but pass (set/return true) for NULL value rows.
*
* At parent, it's used for topn runtime predicate.
*/
class AcceptNullPredicate : public ColumnPredicate {
public:
AcceptNullPredicate(ColumnPredicate* nested)
: ColumnPredicate(nested->column_id(), nested->opposite()), _nested {nested} {}

PredicateType type() const override { return _nested->type(); }

Status evaluate(BitmapIndexIterator* iterator, uint32_t num_rows,
roaring::Roaring* roaring) const override {
return _nested->evaluate(iterator, num_rows, roaring);
}

Status evaluate(const Schema& schema, InvertedIndexIterator* iterator, uint32_t num_rows,
roaring::Roaring* bitmap) const override {
return _nested->evaluate(schema, iterator, num_rows, bitmap);
}

uint16_t evaluate(const vectorized::IColumn& column, uint16_t* sel,
uint16_t size) const override {
LOG(FATAL) << "evaluate without flags not supported";
// return _nested->evaluate(column, sel, size);
}

void evaluate_and(const vectorized::IColumn& column, const uint16_t* sel, uint16_t size,
bool* flags) const override {
if (column.has_null()) {
// copy original flags
bool* original_flags = new bool[size];
memcpy(original_flags, flags, size * sizeof(bool));

// call evaluate_and and restore true for NULL rows
_nested->evaluate_and(column, sel, size, flags);
for (uint16_t i = 0; i < size; ++i) {
uint16_t idx = sel[i];
if (original_flags[idx] && !flags[idx] && column.is_null_at(idx)) {
flags[i] = true;
}
}
} else {
_nested->evaluate_and(column, sel, size, flags);
}
}

void evaluate_or(const vectorized::IColumn& column, const uint16_t* sel, uint16_t size,
bool* flags) const override {
if (column.has_null()) {
// call evaluate_or and set true for NULL rows
_nested->evaluate_or(column, sel, size, flags);
for (uint16_t i = 0; i < size; ++i) {
uint16_t idx = sel[i];
if (!flags[idx] && column.is_null_at(idx)) {
flags[i] = true;
}
}
} else {
_nested->evaluate_or(column, sel, size, flags);
}
}

bool evaluate_and(const std::pair<WrapperField*, WrapperField*>& statistic) const override {
// there is null in range, accept it
if (statistic.first->is_null() || statistic.second->is_null()) {
return true;
}
return _nested->evaluate_and(statistic);
}

bool evaluate_del(const std::pair<WrapperField*, WrapperField*>& statistic) const override {
return _nested->evaluate_del(statistic);
}

bool evaluate_and(const BloomFilter* bf) const override { return _nested->evaluate_and(bf); }

bool can_do_bloom_filter() const override { return _nested->can_do_bloom_filter(); }

void evaluate_vec(const vectorized::IColumn& column, uint16_t size,
bool* flags) const override {
_nested->evaluate_vec(column, size, flags);
if (column.has_null()) {
for (uint16_t i = 0; i < size; ++i) {
if (!flags[i] && column.is_null_at(i)) {
// set true for NULL rows
flags[i] = true;
}
}
}
}

void evaluate_and_vec(const vectorized::IColumn& column, uint16_t size,
bool* flags) const override {
if (column.has_null()) {
// copy original flags
bool* original_flags = new bool[size];
memcpy(original_flags, flags, size * sizeof(bool));

// call evaluate_and_vec and restore true for NULL rows
_nested->evaluate_and_vec(column, size, flags);
for (uint16_t i = 0; i < size; ++i) {
if (original_flags[i] && !flags[i] && column.is_null_at(i)) {
flags[i] = true;
}
}
} else {
_nested->evaluate_and_vec(column, size, flags);
}
}

std::string get_search_str() const override { return _nested->get_search_str(); }

std::string debug_string() const override {
return "passnull predicate for " + _nested->debug_string();
}

/// Some predicates need to be cloned for each segment.
bool need_to_clone() const override { return _nested->need_to_clone(); }

void clone(ColumnPredicate** to) const override {
if (need_to_clone()) {
ColumnPredicate* clone_nested;
_nested->clone(&clone_nested);
*to = new AcceptNullPredicate(clone_nested);
}
}

private:
std::string _debug_string() const override {
return "passnull predicate for " + _nested->debug_string();
}

ColumnPredicate* _nested;
};

} //namespace doris
2 changes: 2 additions & 0 deletions be/src/olap/column_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ class ColumnPredicate {
}
uint32_t column_id() const { return _column_id; }

bool opposite() const { return _opposite; }

virtual std::string debug_string() const {
return _debug_string() + ", column_id=" + std::to_string(_column_id) +
", opposite=" + (_opposite ? "true" : "false");
Expand Down
45 changes: 33 additions & 12 deletions be/src/runtime/runtime_predicate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,22 @@

#include "runtime/runtime_predicate.h"

#include "olap/accept_null_predicate.h"
#include "olap/predicate_creator.h"

namespace doris {

namespace vectorized {

Status RuntimePredicate::init(const PrimitiveType type) {
Status RuntimePredicate::init(const PrimitiveType type, const bool nulls_first) {
std::unique_lock<std::shared_mutex> wlock(_rwlock);

if (_inited) {
return Status::OK();
}

_nulls_first = nulls_first;

_predicate_arena.reset(new Arena());

// set get value function
Expand Down Expand Up @@ -111,6 +114,11 @@ Status RuntimePredicate::init(const PrimitiveType type) {
}

Status RuntimePredicate::update(const Field& value, const String& col_name, bool is_reverse) {
// skip null value
if (value.is_null()) {
return Status::OK();
}

std::unique_lock<std::shared_mutex> wlock(_rwlock);

// TODO why null
Expand Down Expand Up @@ -139,18 +147,31 @@ Status RuntimePredicate::update(const Field& value, const String& col_name, bool
return Status::OK();
}

TCondition condition;
condition.__set_column_name(col_name);
condition.__set_column_unique_id(_tablet_schema->column(col_name).unique_id());
condition.__set_condition_op(is_reverse ? ">=" : "<=");

// get value string from _orderby_extrem and push back to condition_values
condition.condition_values.push_back(_get_value_fn(_orderby_extrem));

VLOG_DEBUG << "update runtime predicate condition " << condition;

// update _predictate
_predictate.reset(parse_to_predicate(_tablet_schema, condition, _predicate_arena.get(), false));
int32_t col_unique_id = _tablet_schema->column(col_name).unique_id();
const TabletColumn& column = _tablet_schema->column_by_uid(col_unique_id);
uint32_t index = _tablet_schema->field_index(col_unique_id);
auto val = _get_value_fn(_orderby_extrem);
ColumnPredicate* pred = nullptr;
if (is_reverse) {
// For DESC sort, create runtime predicate col_name >= min_top_value
// since values that < min_top_value are less than any value in current topn values
pred = create_comparison_predicate<PredicateType::GE>(column, index, val, false,
_predicate_arena.get());
} else {
// For ASC sort, create runtime predicate col_name <= max_top_value
// since values that > min_top_value are large than any value in current topn values
pred = create_comparison_predicate<PredicateType::LE>(column, index, val, false,
_predicate_arena.get());
}

// For NULLS FIRST, wrap a AcceptNullPredicate to return true for NULL
// since ORDER BY ASC/DESC should get NULL first but pred returns NULL
// and NULL in where predicate will be treated as FALSE
if (_nulls_first) {
pred = new AcceptNullPredicate(pred);
}
_predictate.reset(pred);

return Status::OK();
}
Expand Down
3 changes: 2 additions & 1 deletion be/src/runtime/runtime_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class RuntimePredicate {
public:
RuntimePredicate() = default;

Status init(const PrimitiveType type);
Status init(const PrimitiveType type, const bool nulls_first);

bool inited() {
std::unique_lock<std::shared_mutex> wlock(_rwlock);
Expand All @@ -71,6 +71,7 @@ class RuntimePredicate {
TabletSchemaSPtr _tablet_schema;
std::unique_ptr<Arena> _predicate_arena;
std::function<std::string(const Field&)> _get_value_fn;
bool _nulls_first = true;
bool _inited = false;

static std::string get_bool_value(const Field& field) {
Expand Down
2 changes: 2 additions & 0 deletions be/src/vec/exec/scan/new_olap_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,8 @@ Status NewOlapScanner::_init_tablet_reader_params(

if (!_state->skip_storage_engine_merge()) {
TOlapScanNode& olap_scan_node = ((NewOlapScanNode*)_parent)->_olap_scan_node;
// order by table keys optimization for topn
// will only read head/tail of data file since it's already sorted by keys
if (olap_scan_node.__isset.sort_info && olap_scan_node.sort_info.is_asc_order.size() > 0) {
_limit = _parent->_limit_per_scanner;
_tablet_reader_params.read_orderby_key = true;
Expand Down
8 changes: 4 additions & 4 deletions be/src/vec/exec/vsort_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,17 @@ Status VSortNode::init(const TPlanNode& tnode, RuntimeState* state) {
// init runtime predicate
if (_use_topn_opt) {
auto query_ctx = state->get_query_fragments_ctx();
auto first_sort_expr_node = tnode.sort_node.sort_info.sort_tuple_slot_exprs[0].nodes[0];
auto first_sort_expr_node = tnode.sort_node.sort_info.ordering_exprs[0].nodes[0];
if (first_sort_expr_node.node_type == TExprNodeType::SLOT_REF) {
auto first_sort_slot = first_sort_expr_node.slot_ref;
for (auto tuple_desc : row_desc.tuple_descriptors()) {
for (auto tuple_desc : this->row_desc().tuple_descriptors()) {
if (tuple_desc->id() != first_sort_slot.tuple_id) {
continue;
}
for (auto slot : tuple_desc->slots()) {
if (slot->id() == first_sort_slot.slot_id) {
RETURN_IF_ERROR(
query_ctx->get_runtime_predicate().init(slot->type().type));
RETURN_IF_ERROR(query_ctx->get_runtime_predicate().init(
slot->type().type, _nulls_first[0]));
break;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1004,7 +1004,9 @@ public boolean checkPushSort(SortNode sortNode) {
// olap order by key: a.b.c.d
// sort key: (a) (a,b) (a,b,c) (a,b,c,d) is ok
// (a,c) (a,c,d), (a,c,b) (a,c,f) (a,b,c,d,e)is NOT ok
List<Expr> sortExprs = sortNode.getSortInfo().getMaterializedOrderingExprs();
List<Expr> sortExprs = sortNode.getSortInfo().getOrigOrderingExprs();
List<Boolean> nullsFirsts = sortNode.getSortInfo().getNullsFirst();
List<Boolean> isAscOrders = sortNode.getSortInfo().getIsAscOrder();
if (sortExprs.size() > olapTable.getDataSortInfo().getColNum()) {
return false;
}
Expand All @@ -1013,7 +1015,18 @@ public boolean checkPushSort(SortNode sortNode) {
Column tableKey = olapTable.getFullSchema().get(i);
// sort slot.
Expr sortExpr = sortExprs.get(i);
if (!(sortExpr instanceof SlotRef) || !tableKey.equals(((SlotRef) sortExpr).getColumn())) {
if (sortExpr instanceof SlotRef) {
SlotRef slotRef = (SlotRef) sortExpr;
if (tableKey.equals(slotRef.getColumn())) {
// ORDER BY DESC NULLS FIRST can not be optimized to only read file tail,
// since NULLS is at file head but data is at tail
if (tableKey.isAllowNull() && nullsFirsts.get(i) && !isAscOrders.get(i)) {
return false;
}
} else {
return false;
}
} else {
return false;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -603,8 +603,8 @@ private void checkAndSetTopnOpt(PlanNode node) {
if (child instanceof OlapScanNode && sortNode.getLimit() > 0
&& ConnectContext.get() != null && ConnectContext.get().getSessionVariable() != null
&& sortNode.getLimit() <= ConnectContext.get().getSessionVariable().topnOptLimitThreshold
&& sortNode.getSortInfo().getMaterializedOrderingExprs().size() > 0) {
Expr firstSortExpr = sortNode.getSortInfo().getMaterializedOrderingExprs().get(0);
&& sortNode.getSortInfo().getOrigOrderingExprs().size() > 0) {
Expr firstSortExpr = sortNode.getSortInfo().getOrigOrderingExprs().get(0);
if (firstSortExpr instanceof SlotRef && !firstSortExpr.getType().isStringType()
&& !firstSortExpr.getType().isFloatingPointType()) {
OlapScanNode scanNode = (OlapScanNode) child;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !dup_key_topn_q01_asc --
\N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N
2022-01-10T16:04:49 -744222108 64562066838726736.278 false 85 25637 -21148 -593493650 -11450.651 -1.167904321387118E9 32585085266732240.402 2022-09-10 2022-08-08T18:00:04 2022-11-03 54.143.193.60 vitae@Oyoloo.mil Ruskin Court 16
2022-01-10T16:06:02 1769095688 95858385508825939.586 false -61 -24768 8981 -1811059535 26550.553 1.898948089429888E9 6302075696085095.930 2022-04-12 2022-05-06T18:56:26 2022-01-18 70.75.107.213 JacquelineRamirez@Edgetag.edu Schlimgen Way 58
2022-01-10T16:11:53 1906472648 30323570439371776.353 true -64 23204 -27723 1753599187 -28017.578 -8.7412093534361E7 34175877199258167.909 2023-01-07 2022-07-02T15:40:57 2022-10-09 60.230.5.23 asperiores_aut@Chatterpoint.name Summit Plaza 70
Expand All @@ -8,6 +10,4 @@
2022-01-10T16:19:43 -331752585 78238523737497869.470 true 0 -18857 26728 -1593527511 -3940.6157 1.011426330009696E9 30383576870508748.746 2022-10-18 2022-12-13T04:03:22 2022-04-25 114.191.125.61 et_natus@Chatterpoint.edu Merchant Circle 4
2022-01-10T16:26:01 694891812 89716246258986314.603 false -81 -31141 -32045 -1462025516 1286.6216 1.408000357577549E9 82173119133223782.614 2022-06-28 2022-08-30T06:48:08 2022-06-16 6.202.144.1 yFox@Flashpoint.biz Barnett Drive 12
2022-01-10T16:34:44 1969608603 60215082108365548.380 true 77 10466 7024 -1111293463 -3119.6628 -1.98697170405241E9 47421181841164225.577 2022-01-27 2022-06-05T03:27:17 2022-05-18 150.153.19.134 StephanieEdwards@Topicstorm.gov Onsgard Trail 10
2022-01-10T16:38:44 397842904 11471760929161672.212 false 48 22098 -23304 -647850805 -28931.523 -2.2117236773336E8 45351058658553553.851 2022-11-28 2022-08-13T12:22:09 2022-03-18 142.44.9.89 KathrynHarris@Riffpedia.biz Iowa Lane 43
2022-01-10T16:42:51 -914436742 2714763078802815.280 true -77 30692 -23058 1557401365 -25909.229 1.807346678696413E9 68043251987061973.806 2022-11-09 2022-08-04T09:16:13 2022-04-17 88.83.49.144 DouglasBailey@Skyndu.info Macpherson Center 25

Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !dup_key_topn_q01_asc_nulls_last --
2022-01-10T16:04:49 -744222108 64562066838726736.278 false 85 25637 -21148 -593493650 -11450.651 -1.167904321387118E9 32585085266732240.402 2022-09-10 2022-08-08T18:00:04 2022-11-03 54.143.193.60 vitae@Oyoloo.mil Ruskin Court 16
2022-01-10T16:06:02 1769095688 95858385508825939.586 false -61 -24768 8981 -1811059535 26550.553 1.898948089429888E9 6302075696085095.930 2022-04-12 2022-05-06T18:56:26 2022-01-18 70.75.107.213 JacquelineRamirez@Edgetag.edu Schlimgen Way 58
2022-01-10T16:11:53 1906472648 30323570439371776.353 true -64 23204 -27723 1753599187 -28017.578 -8.7412093534361E7 34175877199258167.909 2023-01-07 2022-07-02T15:40:57 2022-10-09 60.230.5.23 asperiores_aut@Chatterpoint.name Summit Plaza 70
2022-01-10T16:13:51 2105767399 29408006992196347.603 true 86 18588 13544 -679723410 21493.826 1.885770666296631E9 85804384237665278.429 2023-01-07 2022-04-09T20:48:14 2022-06-22 98.145.190.138 yHarvey@Flipstorm.edu Southridge Street 43
2022-01-10T16:17:55 1863090100 90399389135889759.150 false -45 1656 -29181 -40413678 -32730.72 4.51575752697888E8 33932341853184569.730 2022-05-23 2022-04-24T13:12:53 2022-05-06 31.182.27.9 qui@Oodoo.mil Glendale Park 70
2022-01-10T16:19:43 -331752585 78238523737497869.470 true 0 -18857 26728 -1593527511 -3940.6157 1.011426330009696E9 30383576870508748.746 2022-10-18 2022-12-13T04:03:22 2022-04-25 114.191.125.61 et_natus@Chatterpoint.edu Merchant Circle 4
2022-01-10T16:26:01 694891812 89716246258986314.603 false -81 -31141 -32045 -1462025516 1286.6216 1.408000357577549E9 82173119133223782.614 2022-06-28 2022-08-30T06:48:08 2022-06-16 6.202.144.1 yFox@Flashpoint.biz Barnett Drive 12
2022-01-10T16:34:44 1969608603 60215082108365548.380 true 77 10466 7024 -1111293463 -3119.6628 -1.98697170405241E9 47421181841164225.577 2022-01-27 2022-06-05T03:27:17 2022-05-18 150.153.19.134 StephanieEdwards@Topicstorm.gov Onsgard Trail 10
2022-01-10T16:38:44 397842904 11471760929161672.212 false 48 22098 -23304 -647850805 -28931.523 -2.2117236773336E8 45351058658553553.851 2022-11-28 2022-08-13T12:22:09 2022-03-18 142.44.9.89 KathrynHarris@Riffpedia.biz Iowa Lane 43
2022-01-10T16:42:51 -914436742 2714763078802815.280 true -77 30692 -23058 1557401365 -25909.229 1.807346678696413E9 68043251987061973.806 2022-11-09 2022-08-04T09:16:13 2022-04-17 88.83.49.144 DouglasBailey@Skyndu.info Macpherson Center 25

Loading

0 comments on commit 4e1e0ce

Please sign in to comment.