Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature]Support array_contains_seq functions like trino contains_sequence and ck hasSubstr function #33929

Merged
merged 19 commits into from
Nov 9, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
aa015a8
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Oct 30, 2023
950bd8f
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Oct 30, 2023
e894887
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Oct 30, 2023
7bce46a
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Oct 31, 2023
4e564b3
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Oct 31, 2023
6fc6a75
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Oct 31, 2023
972e8ed
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 1, 2023
c574a79
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 2, 2023
5046ff4
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 2, 2023
37bcfa6
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 2, 2023
478c2de
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 2, 2023
1f73ee6
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 2, 2023
3bdd409
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 3, 2023
16d3f0c
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 3, 2023
efead1a
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 3, 2023
d7b8ace
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 7, 2023
a3724c8
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 8, 2023
6ed5253
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 8, 2023
2bbaa0d
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 9, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 62 additions & 10 deletions be/src/exprs/array_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,7 @@ class ArrayContainsImpl {
}
};

template <bool Any>
template <bool Any, bool ContainsSeq>
class ArrayHasImpl {
public:
static StatusOr<ColumnPtr> evaluate(const Column& array, const Column& element) {
Expand Down Expand Up @@ -752,6 +752,42 @@ class ArrayHasImpl {
return true;
}
}

template <bool NullableElement, bool NullableTarget, typename ElementColumn>
static uint8 __process_seq(const ElementColumn& elements, uint32 element_start, uint32 element_end,
const ElementColumn& targets, uint32 target_start, uint32 target_end,
const NullColumn::Container* null_map_elements,
const NullColumn::Container* null_map_targets) {
using ValueType = std::conditional_t<std::is_same_v<ArrayColumn, ElementColumn> ||
std::is_same_v<MapColumn, ElementColumn> ||
std::is_same_v<StructColumn, ElementColumn>,
uint8_t, typename ElementColumn::ValueType>;

[[maybe_unused]] auto is_null = [](const NullColumn::Container* null_map, size_t idx) -> bool {
return (*null_map)[idx] != 0;
};
bool found = false;
size_t i = target_start;
size_t j = element_start;
while (i < target_end && j < element_end) {
if constexpr (std::is_same_v<ArrayColumn, ElementColumn> || std::is_same_v<MapColumn, ElementColumn> ||
std::is_same_v<StructColumn, ElementColumn> || std::is_same_v<JsonColumn, ElementColumn>) {
found = (elements.equals(j, targets, i) == 1);
} else {
auto elements_ptr = (const ValueType*)(elements.raw_data());
auto targets_ptr = (const ValueType*)(targets.raw_data());
found = (elements_ptr[j] == targets_ptr[i]);
}
if (found) {
i++;
j++;
} else {
i = 0;
j++;
}
}
return i == target_end;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems not process nullable cases?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems not process nullable cases?

done

}
template <bool NullableElement, bool NullableTarget, bool ConstTarget, typename ElementColumn>
static StatusOr<ColumnPtr> _process(const ElementColumn& elements, const UInt32Column& element_offsets,
const ElementColumn& targets, const UInt32Column& target_offsets,
Expand All @@ -769,16 +805,23 @@ class ArrayHasImpl {

for (size_t i = 0; i < num_array; i++) {
uint8_t found = 0;
if constexpr (ConstTarget) {
DCHECK_EQ(num_target, 1);
found = __process<NullableElement, NullableTarget, ElementColumn>(
elements, element_offsets_ptr[i], element_offsets_ptr[i + 1], targets, target_offsets_ptr[0],
target_offsets_ptr[1], null_map_elements, null_map_targets);
} else {
if (ContainsSeq) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (ContainsSeq) {
if constexpr (ContainsSeq) {

DCHECK_EQ(num_array, num_target);
found = __process<NullableElement, NullableTarget, ElementColumn>(
found = __process_seq<NullableElement, NullableTarget, ElementColumn>(
elements, element_offsets_ptr[i], element_offsets_ptr[i + 1], targets, target_offsets_ptr[i],
target_offsets_ptr[i + 1], null_map_elements, null_map_targets);
} else {
if constexpr (ConstTarget) {
DCHECK_EQ(num_target, 1);
found = __process<NullableElement, NullableTarget, ElementColumn>(
elements, element_offsets_ptr[i], element_offsets_ptr[i + 1], targets, target_offsets_ptr[0],
target_offsets_ptr[1], null_map_elements, null_map_targets);
} else {
DCHECK_EQ(num_array, num_target);
found = __process<NullableElement, NullableTarget, ElementColumn>(
elements, element_offsets_ptr[i], element_offsets_ptr[i + 1], targets, target_offsets_ptr[i],
target_offsets_ptr[i + 1], null_map_elements, null_map_targets);
}
}
result_ptr[i] = found;
}
Expand Down Expand Up @@ -962,7 +1005,7 @@ StatusOr<ColumnPtr> ArrayFunctions::array_contains_any([[maybe_unused]] Function
const ColumnPtr& arg0 = ColumnHelper::unpack_and_duplicate_const_column(columns[0]->size(), columns[0]); // array
const ColumnPtr& arg1 = ColumnHelper::unpack_and_duplicate_const_column(columns[1]->size(), columns[1]); // element

return ArrayHasImpl<true>::evaluate(*arg0, *arg1);
return ArrayHasImpl<true, false>::evaluate(*arg0, *arg1);
}

StatusOr<ColumnPtr> ArrayFunctions::array_contains_all([[maybe_unused]] FunctionContext* context,
Expand All @@ -971,7 +1014,16 @@ StatusOr<ColumnPtr> ArrayFunctions::array_contains_all([[maybe_unused]] Function
const ColumnPtr& arg0 = ColumnHelper::unpack_and_duplicate_const_column(columns[0]->size(), columns[0]); // array
const ColumnPtr& arg1 = ColumnHelper::unpack_and_duplicate_const_column(columns[1]->size(), columns[1]); // element

return ArrayHasImpl<false>::evaluate(*arg0, *arg1);
return ArrayHasImpl<false, false>::evaluate(*arg0, *arg1);
}

StatusOr<ColumnPtr> ArrayFunctions::array_contains_seq([[maybe_unused]] FunctionContext* context,
const Columns& columns) {
RETURN_IF_COLUMNS_ONLY_NULL(columns);
const ColumnPtr& arg0 = ColumnHelper::unpack_and_duplicate_const_column(columns[0]->size(), columns[0]); // array
const ColumnPtr& arg1 = ColumnHelper::unpack_and_duplicate_const_column(columns[1]->size(), columns[1]); // element

return ArrayHasImpl<false, true>::evaluate(*arg0, *arg1);
}

// cannot be called anymore
Expand Down
1 change: 1 addition & 0 deletions be/src/exprs/array_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ class ArrayFunctions {
DEFINE_VECTORIZED_FN(array_filter);
DEFINE_VECTORIZED_FN(all_match);
DEFINE_VECTORIZED_FN(any_match);
DEFINE_VECTORIZED_FN(array_contains_seq);

// array function for nested type(Array/Map/Struct)
DEFINE_VECTORIZED_FN(array_distinct_any_type);
Expand Down
2 changes: 1 addition & 1 deletion gensrc/script/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -967,7 +967,7 @@

# reserve 150281
[150282, 'array_contains_all', 'BOOLEAN', ['ANY_ARRAY', 'ANY_ARRAY'], 'ArrayFunctions::array_contains_all'],

[150283, 'array_contains_seq', 'BOOLEAN', ['ANY_ARRAY', 'ANY_ARRAY'], 'ArrayFunctions::array_contains_seq'],
[150300, 'array_filter', 'ANY_ARRAY', ['ANY_ARRAY', 'ARRAY_BOOLEAN'], 'ArrayFunctions::array_filter'],
[150301, 'all_match', 'BOOLEAN', ['ARRAY_BOOLEAN'], 'ArrayFunctions::all_match'],
[150302, 'any_match', 'BOOLEAN', ['ARRAY_BOOLEAN'], 'ArrayFunctions::any_match'],
Expand Down
60 changes: 60 additions & 0 deletions test/sql/test_array_fn/R/test_array_fn
Original file line number Diff line number Diff line change
Expand Up @@ -4394,4 +4394,64 @@ select array_filter([row(1,2,3), row(3,4,5), row(4,5,6)], [0,1,0]);
select cardinality([row(1,2,3), row(3,4,5)]);
-- result:
2
-- !result
select array_contains_seq([1,2,3,4], [2,3]);
-- result:
0
-- !result
select array_contains_seq([1,2,3,4], [3,2]);
-- result:
1
-- !result
select array_contains_seq([1,2,3,4], [1,2,3]);
-- result:
0
-- !result
select array_contains_seq([1,2,3,4], [1,2,4]);
-- result:
1
-- !result
select array_contains_seq([], []);
-- result:
1
-- !result
select array_contains_seq([1,null], [null]);
-- result:
1
-- !result
select array_contains_seq([1.0,2,3,4], [1]);
-- result:
1
-- !result
select array_contains_seq([cast(1.0 as decimal),2,3,4], [cast(1 as int)]);
-- result:
1
-- !result
select array_contains_seq(['a','b','c'], ['a','b']);
-- result:
1
-- !result
select array_contains_seq(['a','b','c'], ['a','c']);
-- result:
0
-- !result
select array_contains_seq([[1, 2], [3, 4], [5, 6]], [[1, 2], [3, 4]]);
-- result:
1
-- !result
select array_contains_seq([json_keys('{"a":1,"b":2}')], [json_keys('{"a":1}')]);
-- result:
0
-- !result
select array_contains_seq([json_keys('{"a":1,"b":2}')], [json_keys('{"a":1,"b":2}')]);
-- result:
1
-- !result
select array_contains_seq([map([1,2,5],[2,4,5])], [map([1,2,5],[2,4,5])]);
-- result:
1
-- !result
select array_contains_seq([map([1,2,5],[2,4,5])], [map([1,2],[2,4])]);
-- result:
0
-- !result
16 changes: 16 additions & 0 deletions test/sql/test_array_fn/T/test_array_fn
Original file line number Diff line number Diff line change
Expand Up @@ -825,3 +825,19 @@ select array_intersect([row(1,2,3), row(3,4,5)], [row(3,4,5)]);
select array_contains_all([row(1,2,3), row(3,4,5)], [row(3,4,5)]);
select array_filter([row(1,2,3), row(3,4,5), row(4,5,6)], [0,1,0]);
select cardinality([row(1,2,3), row(3,4,5)]);

select array_contains_seq([1,2,3,4], [2,3]);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what if SELECT array_contains_seq([1, 2, NULL, 3, 4], ['a'])

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SELECT array_contains_seq([1, 2, NULL, 3, 4], [2,3]);
SELECT array_contains_seq([1, 2, NULL, 3, 4], null);
SELECT array_contains_seq(null, [2,3])
SELECT array_contains_seq([1, 2, NULL, 3, 4], [null,null])
SELECT array_contains_seq([1, 2, NULL], [null,2])
SELECT array_contains_seq(null, null)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we prefer keep the same behavior with trino

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what if SELECT array_contains_seq([1, 2, NULL, 3, 4], ['a'])

MySQL [(none)]> select array_contains_all([1, 2, NULL, 3, 4], ['a']);
+-----------------------------------------------+
| array_contains_all([1, 2, NULL, 3, 4], ['a']) |
+-----------------------------------------------+
| 0 |
+-----------------------------------------------+
1 row in set (0.18 sec)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SELECT array_contains_seq([1, 2, NULL, 3, 4], [2,3]); SELECT array_contains_seq([1, 2, NULL, 3, 4], null); SELECT array_contains_seq(null, [2,3]) SELECT array_contains_seq([1, 2, NULL, 3, 4], [null,null]) SELECT array_contains_seq([1, 2, NULL], [null,2]) SELECT array_contains_seq(null, null)

all add to sqltest

Copy link
Contributor

@fzhedu fzhedu Nov 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here are all cases that just consider single array, please add some cases for array columns from a table, which would get some bugs. like following cases

CREATE TABLE array_test ( 
pk bigint not null ,
i_0   Array<BigInt>,
i_1   Array<BigInt>,
ai_0  Array<Array<BigInt>>,
ai_1  Array<Array<BigInt>>
) ENGINE=OLAP
DUPLICATE KEY(`pk`)
DISTRIBUTED BY HASH(`pk`) BUCKETS 3
PROPERTIES (
"replication_num" = "1",
"in_memory" = "false"
);

insert into array_test values
(1,[null,1],[null],[null],[[]]),
(2,[null],[1,3,4,5,1,2],[[null,1],null],[[1,null]]),
(3,[],[],[[],null,[1,1]],[],[1,1]),
(4,null,[],[[1,1]],[[1,1],null),
(5,[4,4,4],[4,null],[null],[null]),
(6,[1,1,2,1,1,2,3,3],[1,2,3],[[1]],[[1],[2],null,[null]]);

select array_contains_seq(i_0,i_1),array_contains_seq(i_0,i_0),array_contains_seq(i_1,i_0), array_contains_seq(ai_0, ai_1),array_contains_seq(ai_1, ai_1),array_contains_seq(ai_1, ai_0),array_contains_seq(ai_0, i_1),array_contains_seq(ai_0,null),,array_contains_seq(ai_0, [1,2]) from array_test;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add this case to the Test file

select array_contains_seq([1,2,3,4], [3,2]);
select array_contains_seq([1,2,3,4], [1,2,3]);
select array_contains_seq([1,2,3,4], [1,2,4]);
select array_contains_seq([], []);
select array_contains_seq([1,null], [null]);
select array_contains_seq([1.0,2,3,4], [1]);
select array_contains_seq([cast(1.0 as decimal),2,3,4], [cast(1 as int)]);
select array_contains_seq(['a','b','c'], ['a','b']);
select array_contains_seq(['a','b','c'], ['a','c']);
select array_contains_seq([[1, 2], [3, 4], [5, 6]], [[1, 2], [3, 4]]);
select array_contains_seq([json_keys('{"a":1,"b":2}')], [json_keys('{"a":1}')]);
select array_contains_seq([json_keys('{"a":1,"b":2}')], [json_keys('{"a":1,"b":2}')]);
select array_contains_seq([map([1,2,5],[2,4,5])], [map([1,2,5],[2,4,5])]);
select array_contains_seq([map([1,2,5],[2,4,5])], [map([1,2],[2,4])]);