Skip to content

Commit

Permalink
[fix](join) fix anti join incorrectly outputs null values (apache#15567)
Browse files Browse the repository at this point in the history
  • Loading branch information
luozenglin authored Jan 6, 2023
1 parent b419348 commit 05d72e8
Show file tree
Hide file tree
Showing 7 changed files with 134 additions and 22 deletions.
7 changes: 7 additions & 0 deletions be/src/vec/exec/join/vhash_join_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -865,6 +865,13 @@ Status HashJoinNode::sink(doris::RuntimeState* state, vectorized::Block* in_bloc
if (eos || (!_should_build_hash_table && !state->enable_pipeline_exec())) {
_process_hashtable_ctx_variants_init(state);
}

// Since the comparison of null values is meaningless, left anti join should not output null
// when the build side is not empty.
if (eos && !_build_blocks->empty() &&
(_join_op == TJoinOp::LEFT_ANTI_JOIN || _join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN)) {
_probe_ignore_null = true;
}
return Status::OK();
}

Expand Down
12 changes: 9 additions & 3 deletions be/src/vec/exec/join/vnested_loop_join_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ void VNestedLoopJoinNode::_do_filtering_and_update_visited_flags_impl(
}
}

template <bool SetBuildSideFlag, bool SetProbeSideFlag>
template <bool SetBuildSideFlag, bool SetProbeSideFlag, bool IgnoreNull>
Status VNestedLoopJoinNode::_do_filtering_and_update_visited_flags(Block* block, bool materialize) {
auto column_to_keep = block->columns();
// If we need to set visited flags for build side,
Expand Down Expand Up @@ -543,8 +543,14 @@ Status VNestedLoopJoinNode::_do_filtering_and_update_visited_flags(Block* block,
auto* __restrict filter_data = filter.data();

const size_t size = filter.size();
for (size_t i = 0; i < size; ++i) {
filter_data[i] &= !null_map[i];
if constexpr (IgnoreNull) {
for (size_t i = 0; i < size; ++i) {
filter_data[i] |= null_map[i];
}
} else {
for (size_t i = 0; i < size; ++i) {
filter_data[i] &= !null_map[i];
}
}
_do_filtering_and_update_visited_flags_impl<decltype(filter), SetBuildSideFlag,
SetProbeSideFlag>(
Expand Down
20 changes: 13 additions & 7 deletions be/src/vec/exec/join/vnested_loop_join_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ class VNestedLoopJoinNode final : public VJoinNodeBase {
private:
template <typename JoinOpType, bool set_build_side_flag, bool set_probe_side_flag>
Status _generate_join_block_data(RuntimeState* state, JoinOpType& join_op_variants) {
constexpr bool ignore_null = JoinOpType::value == TJoinOp::LEFT_ANTI_JOIN ||
JoinOpType::value == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
JoinOpType::value == TJoinOp::RIGHT_ANTI_JOIN;

MutableBlock mutable_join_block(&_join_block);

while (_join_block.rows() < state->batch_size() && !_matched_rows_done) {
Expand Down Expand Up @@ -101,9 +105,10 @@ class VNestedLoopJoinNode final : public VJoinNodeBase {
}

if constexpr (set_probe_side_flag) {
auto status = _do_filtering_and_update_visited_flags<set_build_side_flag,
set_probe_side_flag>(
&_join_block, !_is_left_semi_anti);
auto status =
_do_filtering_and_update_visited_flags<set_build_side_flag,
set_probe_side_flag, ignore_null>(
&_join_block, !_is_left_semi_anti);
_update_additional_flags(&_join_block);
if (!status.ok()) {
return status;
Expand Down Expand Up @@ -136,9 +141,10 @@ class VNestedLoopJoinNode final : public VJoinNodeBase {
}

if constexpr (!set_probe_side_flag) {
Status status = _do_filtering_and_update_visited_flags<set_build_side_flag,
set_probe_side_flag>(
&_join_block, !_is_right_semi_anti);
Status status =
_do_filtering_and_update_visited_flags<set_build_side_flag, set_probe_side_flag,
ignore_null>(&_join_block,
!_is_right_semi_anti);
_update_additional_flags(&_join_block);
mutable_join_block = MutableBlock(&_join_block);
if (!status.ok()) {
Expand All @@ -162,7 +168,7 @@ class VNestedLoopJoinNode final : public VJoinNodeBase {
void _process_left_child_block(MutableBlock& mutable_block,
const Block& now_process_build_block) const;

template <bool SetBuildSideFlag, bool SetProbeSideFlag>
template <bool SetBuildSideFlag, bool SetProbeSideFlag, bool IgnoreNull>
Status _do_filtering_and_update_visited_flags(Block* block, bool materialize);

// TODO: replace it as template lambda after support C++20
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
2

-- !select --
\N
2

-- !select --
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
2

-- !select --
\N
2

-- !select --
Expand Down
87 changes: 87 additions & 0 deletions regression-test/data/query_p0/join/test_join.out
Original file line number Diff line number Diff line change
Expand Up @@ -1217,6 +1217,72 @@ false 3 1989 1002 11011905 24453.325 false 2012-03-14 2000-01-01T00:00 yunlj8@nk
3

-- !left_anti_join_with_other_pred --
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

-- !left_anti_join_null_1 --
4
5
6
7
8
9
10
11
12
13
14
15

-- !left_anti_join_null_2 --
\N
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

-- !left_anti_join_null_3 --
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

-- !left_anti_join_null_4 --
\N
1
2
Expand All @@ -1239,6 +1305,27 @@ false 3 1989 1002 11011905 24453.325 false 2012-03-14 2000-01-01T00:00 yunlj8@nk
2
3

-- !right_anti_join_null_1 --
1

-- !right_anti_join_null_2 --
\N
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

-- !join_without_keyword1 --
1 1 1989 1989 1001 1001
2 2 1986 1986 1001 1001
Expand Down
28 changes: 18 additions & 10 deletions regression-test/suites/query_p0/join/test_join.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ suite("test_join", "query,p0") {
def tbName1 = "test"
def tbName2 = "baseall"
def tbName3 = "bigtable"
def empty_name = "empty"

sql"drop view if exists empty"
sql"create view empty as select * from baseall where k1 = 0"

order_sql """select j.*, d.* from ${tbName2} j full outer join ${tbName1} d on (j.k1=d.k1) order by j.k1, j.k2, j.k3, j.k4, d.k1, d.k2
limit 100"""
Expand Down Expand Up @@ -719,6 +723,14 @@ suite("test_join", "query,p0") {

qt_left_anti_join_with_other_pred "select b.k1 from ${tbName2} b left anti join ${tbName1} t on b.k1 = t.k1 and 1 = 2 order by b.k1"

qt_left_anti_join_null_1 "select b.k1 from ${tbName2} b left anti join ${tbName1} t on b.k1 = t.k1 order by b.k1"

qt_left_anti_join_null_2 "select b.k1 from ${tbName2} b left anti join ${empty_name} t on b.k1 = t.k1 order by b.k1"

qt_left_anti_join_null_3 "select b.k1 from ${tbName2} b left anti join ${tbName1} t on b.k1 > t.k2 order by b.k1"

qt_left_anti_join_null_4 "select b.k1 from ${tbName2} b left anti join ${empty_name} t on b.k1 > t.k2 order by b.k1"

// right anti join
for (s in right_selected){
def res43 = sql"""select ${s} from ${tbName2} a right anti join ${tbName1} b
Expand Down Expand Up @@ -790,6 +802,10 @@ suite("test_join", "query,p0") {

qt_right_anti_join_with_other_pred "select t.k1 from ${tbName2} b right anti join ${tbName1} t on b.k1 = t.k1 and 1 = 2 order by t.k1"

qt_right_anti_join_null_1 "select b.k1 from ${tbName1} t right anti join ${tbName2} b on b.k1 > t.k1 order by b.k1"

qt_right_anti_join_null_2 "select b.k1 from ${empty_name} t right anti join ${tbName2} b on b.k1 > t.k1 order by b.k1"

// join with no join keyword
for (s in selected){
qt_join_without_keyword1"""select ${s} from ${tbName1} a , ${tbName2} b
Expand Down Expand Up @@ -832,9 +848,6 @@ suite("test_join", "query,p0") {
}

// join with empty table
sql"drop view if exists empty"
sql"create view empty as select * from baseall where k1 = 0"
String empty_name = "empty"
qt_join_with_emptyTable1"""select a.k1, a.k2, a.k3, b.k1, b.k2, b.k3 from ${tbName2} a join ${empty_name} b on a.k1 = b.k1
order by 1, 2, 3, 4, 5"""
qt_join_with_emptyTable2"""select a.k1, a.k2, a.k3, b.k1, b.k2, b.k3 from ${tbName2} a inner join ${empty_name} b on a.k1 = b.k1
Expand Down Expand Up @@ -962,7 +975,7 @@ suite("test_join", "query,p0") {
def res71 = sql"""select * from ${tbName2} a left anti join ${tbName1} b on (a.${c} = b.${c})
order by a.k1, a.k2, a.k3"""
def res72 = sql"""select distinct a.* from ${tbName2} a left outer join ${tbName1} b on (a.${c} = b.${c})
where b.k1 is null order by a.k1, a.k2, a.k3"""
where b.k1 is null and a.k1 is not null order by a.k1, a.k2, a.k3"""
check2_doris(res71, res72)

def res73 = sql"""select * from ${tbName2} a right anti join ${tbName1} b on (a.${c} = b.${c})
Expand Down Expand Up @@ -1070,7 +1083,7 @@ suite("test_join", "query,p0") {

def res85 = sql"""select a.k1, a.k2 from ${tbName2} a left anti join ${null_name} b on a.k1 = b.n2
order by 1, 2"""
def res86 = sql"""select k1, k2 from ${tbName2} order by k1, k2"""
def res86 = sql"""select k1, k2 from ${tbName2} where k1 is not null order by k1, k2"""
check2_doris(res85, res86)

def res87 = sql"""select b.n1, b.n2 from ${tbName2} a right anti join ${null_name} b on a.k1 = b.n2
Expand All @@ -1083,11 +1096,6 @@ suite("test_join", "query,p0") {
def res90 = sql"""select k1, k2 from ${tbName2} order by k1, k2"""
check2_doris(res89, res90)

def res91 = sql"""select a.n1, a.n2 from ${null_name} a left anti join ${tbName2} b on b.k1 = a.n2
order by 1, 2"""
def res92 = sql"""select n1, n2 from ${null_name} order by n1, n2"""
check2_doris(res91, res92)

// join on predicate
qt_join_on_predicate1"""select c.k1 from ${tbName2} a join ${tbName1} b on a.k2 between 0 and 1000
join ${tbName3} c on a.k10 = c.k10 order by k1 limit 65535"""
Expand Down

0 comments on commit 05d72e8

Please sign in to comment.