Skip to content

Commit

Permalink
[Enhancement](func)Introduce non_nullable extraction function. #16621
Browse files Browse the repository at this point in the history
Introduced a new function non_nullable to BE, which can extract concrete data column from a nullable column. If the input argument is already not a nullable column, raise an error.
  • Loading branch information
zclllyybb authored Feb 18, 2023
1 parent 45427b8 commit d6a8414
Show file tree
Hide file tree
Showing 7 changed files with 192 additions and 3 deletions.
1 change: 1 addition & 0 deletions be/src/vec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ set(VEC_FILES
functions/function_java_udf.cpp
functions/function_rpc.cpp
functions/function_convert_tz.cpp
functions/function_nonnullable.cpp
functions/least_greast.cpp
functions/function_fake.cpp
functions/url/function_url.cpp
Expand Down
76 changes: 76 additions & 0 deletions be/src/vec/functions/function_nonnullable.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/Ifnull.h
// and modified by Doris

#include "common/logging.h"
#include "common/status.h"
#include "vec/columns/column.h"
#include "vec/columns/column_nullable.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/get_least_supertype.h"
#include "vec/functions/function_helpers.h"
#include "vec/functions/function_string.h"
#include "vec/functions/simple_function_factory.h"
#include "vec/utils/util.hpp"

namespace doris::vectorized {
class FunctionNonNullable : public IFunction {
public:
static constexpr auto name = "non_nullable";

static FunctionPtr create() { return std::make_shared<FunctionNonNullable>(); }

String get_name() const override { return name; }

size_t get_number_of_arguments() const override { return 1; }

DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return remove_nullable(arguments[0]);
}

bool use_default_implementation_for_constants() const override { return true; }
bool use_default_implementation_for_nulls() const override { return false; }

// trans nullable column to non-nullable column. If argument is already non-nullable, raise error.
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
auto& data = block.get_by_position(arguments[0]);
const ColumnNullable* column = check_and_get_column<ColumnNullable>(data.column);

if (column == nullptr) // raise error if input is not nullable.
{
return Status::RuntimeError(
"Try to use originally non-nullable column {} in nullable's non-nullable \
convertion.",
data.column->get_name(), get_name());
} else { // column is ColumnNullable
const ColumnPtr& type_ptr = column->get_nested_column_ptr();
block.replace_by_position(result, type_ptr->clone_resized(type_ptr->size()));
}
return Status::OK();
}
};

void register_function_non_nullable(SimpleFunctionFactory& factory) {
factory.register_function<FunctionNonNullable>();
}

} // namespace doris::vectorized
2 changes: 2 additions & 0 deletions be/src/vec/functions/simple_function_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ void register_function_bitmap(SimpleFunctionFactory& factory);
void register_function_bitmap_variadic(SimpleFunctionFactory& factory);
void register_function_is_null(SimpleFunctionFactory& factory);
void register_function_is_not_null(SimpleFunctionFactory& factory);
void register_function_non_nullable(SimpleFunctionFactory& factory);
void register_function_to_time_function(SimpleFunctionFactory& factory);
void register_function_time_of_function(SimpleFunctionFactory& factory);
void register_function_string(SimpleFunctionFactory& factory);
Expand Down Expand Up @@ -194,6 +195,7 @@ class SimpleFunctionFactory {
register_function_bit(instance);
register_function_is_null(instance);
register_function_is_not_null(instance);
register_function_non_nullable(instance);
register_function_to_time_function(instance);
register_function_time_of_function(instance);
register_function_string(instance);
Expand Down
42 changes: 40 additions & 2 deletions gensrc/script/doris_builtins_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
# It contains all the meta data that describes the function.

# The format is:
# [sql aliases], <return_type>, [<args>], <backend symbol>, <nullable mode>
# [sql aliases], <return_type>, [<args>], <nullable mode>
#
# 'sql aliases' are the function names that can be used from sql. There must be at least
# one per function.
Expand Down Expand Up @@ -140,7 +140,6 @@
[['array_contains'], 'BOOLEAN', ['ARRAY_VARCHAR', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['array_contains'], 'BOOLEAN', ['ARRAY_STRING', 'STRING'], 'ALWAYS_NULLABLE'],


[['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_BOOLEAN'], ''],
[['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_TINYINT'], ''],
[['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_SMALLINT'], ''],
Expand Down Expand Up @@ -1526,6 +1525,45 @@
[['multi_match_any'], 'TINYINT', ['STRING', 'ARRAY_STRING'], 'ALWAYS_NOT_NULLABLE'],

[['uuid'], 'VARCHAR', [], 'ALWAYS_NOT_NULLABLE'],

[['non_nullable'], 'BOOLEAN', ['BOOLEAN'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'TINYINT', ['TINYINT'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'SMALLINT', ['SMALLINT'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'INT', ['INT'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'BIGINT', ['BIGINT'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'LARGEINT', ['LARGEINT'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'FLOAT', ['FLOAT'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'DOUBLE', ['DOUBLE'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'DATE', ['DATE'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'DATEV2', ['DATEV2'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'DATETIME', ['DATETIME'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'DATETIMEV2', ['DATETIMEV2'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'DECIMALV2', ['DECIMALV2'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'DECIMAL32', ['DECIMAL32'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'DECIMAL64', ['DECIMAL64'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'DECIMAL128', ['DECIMAL128'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'VARCHAR', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'STRING', ['STRING'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'BITMAP', ['BITMAP'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'JSONB', ['JSONB'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'ARRAY_BOOLEAN', ['ARRAY_BOOLEAN'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'ARRAY_TINYINT', ['ARRAY_TINYINT'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'ARRAY_SMALLINT', ['ARRAY_SMALLINT'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'ARRAY_INT', ['ARRAY_INT'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'ARRAY_BIGINT', ['ARRAY_BIGINT'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'ARRAY_LARGEINT', ['ARRAY_LARGEINT'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'ARRAY_DATETIME', ['ARRAY_DATETIME'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'ARRAY_DATE', ['ARRAY_DATE'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'ARRAY_DATETIMEV2', ['ARRAY_DATETIMEV2'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'ARRAY_DATEV2', ['ARRAY_DATEV2'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'ARRAY_FLOAT', ['ARRAY_FLOAT'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'ARRAY_DOUBLE', ['ARRAY_DOUBLE'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'ARRAY_DECIMALV2', ['ARRAY_DECIMALV2'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'ARRAY_DECIMAL32', ['ARRAY_DECIMAL32'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'ARRAY_DECIMAL64', ['ARRAY_DECIMAL64'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'ARRAY_DECIMAL128', ['ARRAY_DECIMAL128'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'ARRAY_VARCHAR', ['ARRAY_VARCHAR'], 'ALWAYS_NOT_NULLABLE'],
[['non_nullable'], 'ARRAY_STRING', ['ARRAY_STRING'], 'ALWAYS_NOT_NULLABLE']
]

# Except the following functions, other function will directly return
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !nullable --
\N 0
1 1
2 2
3 3
4 4

-- !nullable --
\N []
1 [1, 2, 3]
2 []
3 [1, 2, 3]
4 []

-- !nullable --
\N []
1 ['a', 'b', 'c']
2 []
3 []
4 ['a', 'b', 'c']

Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_non_nullable_function", "query") {
def tableName = "tbl_test_non_nullable_function"

sql """DROP TABLE IF EXISTS ${tableName}"""
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
`k1` int(11) NULL COMMENT "",
`k2` ARRAY<int(11)> NULL COMMENT "",
`k3` ARRAY<VARCHAR(11)> NULL COMMENT "",
`k4` ARRAY<decimal(27,9)> NOT NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`k1`)
DISTRIBUTED BY HASH(`k1`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"storage_format" = "V2"
)
"""
sql """ INSERT INTO ${tableName} VALUES(1, [1, 2, 3], ["a", "b", "c"], [1.3, 2.14]) """
sql """ INSERT INTO ${tableName} VALUES(2, [], [], [1.3, 2.14]) """
sql """ INSERT INTO ${tableName} VALUES(3, [1, 2, 3], [], [1.3, 2.14]) """
sql """ INSERT INTO ${tableName} VALUES(4, [], ["a", "b", "c"], [1.3, 2.14]) """
sql """ INSERT INTO ${tableName} VALUES(null, null, null, [1.1,2.2,3.3]) """

qt_nullable "SELECT k1, non_nullable(k1) FROM ${tableName} ORDER BY k1"
qt_nullable "SELECT k1, non_nullable(k2) FROM ${tableName} ORDER BY k1"
qt_nullable "SELECT k1, non_nullable(k3) FROM ${tableName} ORDER BY k1"
try {
def result = "SELECT k1, non_nullable(k4) FROM ${tableName} ORDER BY k1"
} catch (Exception e) {
assertTrue(e.getMessage().contains("Try to use originally non-nullable column"))
}
}
2 changes: 1 addition & 1 deletion run-regression-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ Usage: $0 <shell_options> <framework_options>
-xg exclude the specified group
-xd exclude the specified directory
-genOut generate .out file if not exist
-forceGenOut delete and generate .out file if not exist
-forceGenOut delete and generate .out file
-parallel run tests using specified threads
-randomOrder run tests in a random order
-times rum tests {times} times
Expand Down

0 comments on commit d6a8414

Please sign in to comment.