Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature]Support array_contains_seq functions like trino contains_sequence and ck hasSubstr function #33929

Merged
merged 19 commits into from
Nov 9, 2023
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
aa015a8
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Oct 30, 2023
950bd8f
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Oct 30, 2023
e894887
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Oct 30, 2023
7bce46a
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Oct 31, 2023
4e564b3
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Oct 31, 2023
6fc6a75
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Oct 31, 2023
972e8ed
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 1, 2023
c574a79
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 2, 2023
5046ff4
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 2, 2023
37bcfa6
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 2, 2023
478c2de
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 2, 2023
1f73ee6
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 2, 2023
3bdd409
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 3, 2023
16d3f0c
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 3, 2023
efead1a
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 3, 2023
d7b8ace
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 7, 2023
a3724c8
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 8, 2023
6ed5253
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 8, 2023
2bbaa0d
support array_contains_seq functions like trino contains_sequence and…
leoyy0316 Nov 9, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 93 additions & 10 deletions be/src/exprs/array_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,7 @@ class ArrayContainsImpl {
}
};

template <bool Any>
template <bool Any, bool ContainsSeq>
class ArrayHasImpl {
public:
static StatusOr<ColumnPtr> evaluate(const Column& array, const Column& element) {
Expand Down Expand Up @@ -752,6 +752,73 @@ class ArrayHasImpl {
return true;
}
}

template <bool NullableElement, bool NullableTarget, typename ElementColumn>
static uint8 __process_seq(const ElementColumn& elements, uint32 element_start, uint32 element_end,
const ElementColumn& targets, uint32 target_start, uint32 target_end,
const NullColumn::Container* null_map_elements,
const NullColumn::Container* null_map_targets) {
using ValueType = std::conditional_t<std::is_same_v<ArrayColumn, ElementColumn> ||
std::is_same_v<MapColumn, ElementColumn> ||
std::is_same_v<StructColumn, ElementColumn>,
uint8_t, typename ElementColumn::ValueType>;
[[maybe_unused]] auto is_null = [](const NullColumn::Container* null_map, size_t idx) -> bool {
return (*null_map)[idx] != 0;
};
if (element_end - element_start < target_end - target_start) {
return false;
}
if (target_end == target_start) {
return true;
}
if (element_end == element_start) {
return false;
}
bool found = false;
size_t i = target_start;
size_t j = element_start;
while (j < element_end) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

j <= element_end - (target_end - target_start), no need to loop if the left elements are less than target's elements

int k = j;
int l = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove k,l;
i = target_start;

while (i < target_end) {
bool null_target = false;
if constexpr (NullableTarget) {
null_target = is_null(null_map_targets, i);
}
bool null_element = false;
if constexpr (NullableElement) {
null_element = is_null(null_map_elements, k);
}
if (null_target && null_element) {
found = true;
} else if (null_target || null_element) {
found = false;
} else {
if constexpr (std::is_same_v<ArrayColumn, ElementColumn> ||
std::is_same_v<MapColumn, ElementColumn> ||
std::is_same_v<StructColumn, ElementColumn> ||
std::is_same_v<JsonColumn, ElementColumn>) {
found = (elements.equals(k, targets, i) == 1);
} else {
auto elements_ptr = (const ValueType*)(elements.raw_data());
auto targets_ptr = (const ValueType*)(targets.raw_data());
found = (elements_ptr[k] == targets_ptr[i]);
}
}
i++;
if (found) {
k++;
l++;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

else break;

}
if (l == target_end) {
return true;
}
i = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove

j++;
}
return false;
}
template <bool NullableElement, bool NullableTarget, bool ConstTarget, typename ElementColumn>
static StatusOr<ColumnPtr> _process(const ElementColumn& elements, const UInt32Column& element_offsets,
const ElementColumn& targets, const UInt32Column& target_offsets,
Expand All @@ -769,16 +836,23 @@ class ArrayHasImpl {

for (size_t i = 0; i < num_array; i++) {
uint8_t found = 0;
if constexpr (ConstTarget) {
DCHECK_EQ(num_target, 1);
found = __process<NullableElement, NullableTarget, ElementColumn>(
elements, element_offsets_ptr[i], element_offsets_ptr[i + 1], targets, target_offsets_ptr[0],
target_offsets_ptr[1], null_map_elements, null_map_targets);
} else {
if constexpr (ContainsSeq) {
DCHECK_EQ(num_array, num_target);
found = __process<NullableElement, NullableTarget, ElementColumn>(
found = __process_seq<NullableElement, NullableTarget, ElementColumn>(
elements, element_offsets_ptr[i], element_offsets_ptr[i + 1], targets, target_offsets_ptr[i],
target_offsets_ptr[i + 1], null_map_elements, null_map_targets);
} else {
if constexpr (ConstTarget) {
DCHECK_EQ(num_target, 1);
found = __process<NullableElement, NullableTarget, ElementColumn>(
elements, element_offsets_ptr[i], element_offsets_ptr[i + 1], targets,
target_offsets_ptr[0], target_offsets_ptr[1], null_map_elements, null_map_targets);
} else {
DCHECK_EQ(num_array, num_target);
found = __process<NullableElement, NullableTarget, ElementColumn>(
elements, element_offsets_ptr[i], element_offsets_ptr[i + 1], targets,
target_offsets_ptr[i], target_offsets_ptr[i + 1], null_map_elements, null_map_targets);
}
}
result_ptr[i] = found;
}
Expand Down Expand Up @@ -962,7 +1036,7 @@ StatusOr<ColumnPtr> ArrayFunctions::array_contains_any([[maybe_unused]] Function
const ColumnPtr& arg0 = ColumnHelper::unpack_and_duplicate_const_column(columns[0]->size(), columns[0]); // array
const ColumnPtr& arg1 = ColumnHelper::unpack_and_duplicate_const_column(columns[1]->size(), columns[1]); // element

return ArrayHasImpl<true>::evaluate(*arg0, *arg1);
return ArrayHasImpl<true, false>::evaluate(*arg0, *arg1);
}

StatusOr<ColumnPtr> ArrayFunctions::array_contains_all([[maybe_unused]] FunctionContext* context,
Expand All @@ -971,7 +1045,16 @@ StatusOr<ColumnPtr> ArrayFunctions::array_contains_all([[maybe_unused]] Function
const ColumnPtr& arg0 = ColumnHelper::unpack_and_duplicate_const_column(columns[0]->size(), columns[0]); // array
const ColumnPtr& arg1 = ColumnHelper::unpack_and_duplicate_const_column(columns[1]->size(), columns[1]); // element

return ArrayHasImpl<false>::evaluate(*arg0, *arg1);
return ArrayHasImpl<false, false>::evaluate(*arg0, *arg1);
}

StatusOr<ColumnPtr> ArrayFunctions::array_contains_seq([[maybe_unused]] FunctionContext* context,
const Columns& columns) {
RETURN_IF_COLUMNS_ONLY_NULL(columns);
const ColumnPtr& arg0 = ColumnHelper::unpack_and_duplicate_const_column(columns[0]->size(), columns[0]); // array
const ColumnPtr& arg1 = ColumnHelper::unpack_and_duplicate_const_column(columns[1]->size(), columns[1]); // element

return ArrayHasImpl<false, true>::evaluate(*arg0, *arg1);
}

// cannot be called anymore
Expand Down
1 change: 1 addition & 0 deletions be/src/exprs/array_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ class ArrayFunctions {
DEFINE_VECTORIZED_FN(array_filter);
DEFINE_VECTORIZED_FN(all_match);
DEFINE_VECTORIZED_FN(any_match);
DEFINE_VECTORIZED_FN(array_contains_seq);

// array function for nested type(Array/Map/Struct)
DEFINE_VECTORIZED_FN(array_distinct_any_type);
Expand Down
51 changes: 51 additions & 0 deletions be/test/exprs/array_functions_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5467,4 +5467,55 @@ TEST_F(ArrayFunctionsTest, array_match_only_null) {
ASSERT_TRUE(dest_column->get(0).get_int8());
}
}
// NOLINTNEXTLINE
TEST_F(ArrayFunctionsTest, array_contains_seq) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here is the case nullable array1 and nullable array2, pls refer to array_contains_no_null to add some cases from nullable array1, not-nullable array2, not-nullable array1, nullable array2, not_nullable array1, not_nullable array2.

// array_contains_seq(["a", "b", "c"], ["c"]) -> 1
// array_contains_seq(NULL, ["c"]) -> NULL
// array_contains_seq(["a", "b", "c"], NULL) -> NULL
// array_contains_seq(["a", "b", NULL], NULL) -> NULL
// array_contains_seq(["a", "b", NULL], ["a", NULL]) -> 0
// array_contains_seq(NULL, ["a", NULL]) -> NULL
// array_contains_seq(["a", "b", NULL], [NULL]) -> 1
// array_contains_seq(["a", "b", "c"], ["d"]) -> 0
// array_contains_seq(["a", "b", "c"], ["a", "d"]) -> 0
// array_contains_all(["a", "b", "c"], ["a", "c"]) -> 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

??

{
auto array = ColumnHelper::create_column(TYPE_ARRAY_VARCHAR, true);
array->append_datum(DatumArray{"a", "b", "c"});
array->append_datum(Datum());
array->append_datum(DatumArray{"a", "b", "c"});
array->append_datum(DatumArray{"a", "b", Datum()});
array->append_datum(DatumArray{"a", "b", Datum()});
array->append_datum(Datum());
array->append_datum(DatumArray{"a", "b", Datum()});
array->append_datum(DatumArray{"a", "b", "c"});
array->append_datum(DatumArray{"a", "b", "c"});
array->append_datum(DatumArray{"a", "b", "c"});

auto target = ColumnHelper::create_column(TYPE_ARRAY_VARCHAR, true);
target->append_datum(DatumArray{"c"});
target->append_datum(DatumArray{"c"});
target->append_datum(Datum());
target->append_datum(Datum());
target->append_datum(DatumArray{"a", Datum()});
target->append_datum(DatumArray{"a", Datum()});
target->append_datum(DatumArray{Datum()});
target->append_datum(DatumArray{"d"});
target->append_datum(DatumArray{"a", "d"});
target->append_datum(DatumArray{"a", "c"});

auto result = ArrayFunctions::array_contains_all(nullptr, {array, target}).value();
EXPECT_EQ(10, result->size());
EXPECT_EQ(1, result->get(0).get_int8());
EXPECT_TRUE(result->get(1).is_null());
EXPECT_TRUE(result->get(2).is_null());
EXPECT_TRUE(result->get(3).is_null());
EXPECT_EQ(0, result->get(4).get_int8());
EXPECT_TRUE(result->get(5).is_null());
EXPECT_EQ(1, result->get(6).get_int8());
EXPECT_EQ(0, result->get(7).get_int8());
EXPECT_EQ(0, result->get(8).get_int8());
EXPECT_EQ(1, result->get(9).get_int8());
}
}
} // namespace starrocks
1 change: 1 addition & 0 deletions docs/TOC.md
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,7 @@
+ [array_concat](./sql-reference/sql-functions/array-functions/array_concat.md)
+ [array_contains](./sql-reference/sql-functions/array-functions/array_contains.md)
+ [array_contains_all](./sql-reference/sql-functions/array-functions/array_contains_all.md)
+ [array_contains_seq](./sql-reference/sql-functions/array-functions/array_contains_seq.md)
+ [array_cum_sum](./sql-reference/sql-functions/array-functions/array_cum_sum.md)
+ [array_difference](./sql-reference/sql-functions/array-functions/array_difference.md)
+ [array_distinct](./sql-reference/sql-functions/array-functions/array_distinct.md)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# array_contains_seq

## Description

Checks whether all the elements of array2 appear in array1 in the same exact order. Therefore, the function will return 1, if and only if array1 = prefix + array2 + suffix.

## Syntax

~~~Haskell
BOOLEAN array_contains_all(arr1, arr2)
~~~

## Parameters

`arr`: the two arrays to compare. This syntax checks whether `arr2` is a subset of `arr1` and in the same exact order.

The data types of elements in the two arrays must be the same. For the data types of array elements supported by StarRocks, see [ARRAY](../../../sql-reference/sql-statements/data-types/Array.md).

## Return value

Returns a value of the BOOLEAN type.

1 is returned if `arr2` is a subset of `arr1`. Otherwise, 0 is returned.
Null processed as a value. In other words array_contains_seq([1, 2, NULL, 3, 4], [2,3]) will return 0. However, array_contains_seq([1, 2, NULL, 3, 4], [2,NULL,3]) will return 1
Order of values in both of arrays does matter

## Examples

Returns a value of the BOOLEAN type.

```Plaintext
MySQL [(none)]> select array_contains_seq([1,2,3,4], [1,2,3]);
+---------------------------------------------+
| array_contains_seq([1, 2, 3, 4], [1, 2, 3]) |
+---------------------------------------------+
| 1 |
+---------------------------------------------+
```

```Plaintext
MySQL [(none)]> select array_contains_seq([1,2,3,4], [3,2]);
+------------------------------------------+
| array_contains_seq([1, 2, 3, 4], [3, 2]) |
+------------------------------------------+
| 0 |
+------------------------------------------+
1 row in set (0.18 sec)
```

```Plaintext
MySQL [(none)]> select array_contains_all([1, 2, NULL, 3, 4], ['a']);
+-----------------------------------------------+
| array_contains_all([1, 2, NULL, 3, 4], ['a']) |
+-----------------------------------------------+
| 0 |
+-----------------------------------------------+
1 row in set (0.18 sec)
```

```Plaintext
MySQL [(none)]> select array_contains([1, 2, NULL, 3, 4], 'a');
+-----------------------------------------+
| array_contains([1, 2, NULL, 3, 4], 'a') |
+-----------------------------------------+
| 0 |
+-----------------------------------------+
1 row in set (0.18 sec)
```
```Plaintext
MySQL [(none)]> SELECT array_contains([1, 2,3,4,null], null);
+------------------------------------------+
| array_contains([1, 2, 3, 4, NULL], NULL) |
+------------------------------------------+
| 1 |
+------------------------------------------+
1 row in set (0.18 sec)
```
2 changes: 1 addition & 1 deletion gensrc/script/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -967,7 +967,7 @@

# reserve 150281
[150282, 'array_contains_all', 'BOOLEAN', ['ANY_ARRAY', 'ANY_ARRAY'], 'ArrayFunctions::array_contains_all'],

[150283, 'array_contains_seq', 'BOOLEAN', ['ANY_ARRAY', 'ANY_ARRAY'], 'ArrayFunctions::array_contains_seq'],
[150300, 'array_filter', 'ANY_ARRAY', ['ANY_ARRAY', 'ARRAY_BOOLEAN'], 'ArrayFunctions::array_filter'],
[150301, 'all_match', 'BOOLEAN', ['ARRAY_BOOLEAN'], 'ArrayFunctions::all_match'],
[150302, 'any_match', 'BOOLEAN', ['ARRAY_BOOLEAN'], 'ArrayFunctions::any_match'],
Expand Down
92 changes: 92 additions & 0 deletions test/sql/test_array_fn/R/test_array_fn
Original file line number Diff line number Diff line change
Expand Up @@ -4394,4 +4394,96 @@ select array_filter([row(1,2,3), row(3,4,5), row(4,5,6)], [0,1,0]);
select cardinality([row(1,2,3), row(3,4,5)]);
-- result:
2
-- !result
select array_contains_seq([1,2,3,4], [2,3]);
-- result:
1
-- !result
select array_contains_seq([1,2,3,4], [3,2]);
-- result:
0
-- !result
select array_contains_seq([1,2,3,4], [1,2,3]);
-- result:
1
-- !result
select array_contains_seq([1,2,3,4], [1,2,4]);
-- result:
0
-- !result
select array_contains_seq([], []);
-- result:
1
-- !result
select array_contains_seq([1,null], [null]);
-- result:
1
-- !result
select array_contains_seq([1.0,2,3,4], [1]);
-- result:
1
-- !result
select array_contains_seq([cast(1.0 as decimal),2,3,4], [cast(1 as int)]);
-- result:
1
-- !result
select array_contains_seq(['a','b','c'], ['a','b']);
-- result:
1
-- !result
select array_contains_seq(['a','b','c'], ['a','c']);
-- result:
0
-- !result
select array_contains_seq([[1, 2], [3, 4], [5, 6]], [[1, 2], [3, 4]]);
-- result:
1
-- !result
select array_contains_seq([json_keys('{"a":1,"b":2}')], [json_keys('{"a":1}')]);
-- result:
0
-- !result
select array_contains_seq([json_keys('{"a":1,"b":2}')], [json_keys('{"a":1,"b":2}')]);
-- result:
1
-- !result
select array_contains_seq([map([1,2,5],[2,4,5])], [map([1,2,5],[2,4,5])]);
-- result:
1
-- !result
select array_contains_seq([map([1,2,5],[2,4,5])], [map([1,2],[2,4])]);
-- result:
0
-- !result
select array_contains_seq([1, 2, NULL, 3, 4], ['a']);
-- result:
0
-- !result
select array_contains_seq([1, 2, NULL, 3, 4], [2,3]);
-- result:
0
-- !result
select array_contains_seq([1, 2, NULL, 3, 4], null);
-- result:
None
-- !result
select array_contains_seq(null, [2,3]);
-- result:
None
-- !result
select array_contains_seq([1, 2, NULL, 3, 4], [null,null]);
-- result:
0
-- !result
select array_contains_seq([1, 2, NULL], [null,2]);
-- result:
0
-- !result
select array_contains_seq(null, null);
-- result:
None
-- !result
select array_contains_seq([1, 1, 2, NULL], [1,2]);
-- result:
1
-- !result
Loading