diff --git a/.github/config/uncovered_files.csv b/.github/config/uncovered_files.csv index 5d27e881b3f6..b0ce34ed431b 100644 --- a/.github/config/uncovered_files.csv +++ b/.github/config/uncovered_files.csv @@ -126,6 +126,7 @@ core_functions/scalar/date/date_part.cpp 17 core_functions/scalar/date/date_sub.cpp 209 core_functions/scalar/date/date_trunc.cpp 23 core_functions/scalar/date/strftime.cpp 10 +core_functions/scalar/date/time_bucket.cpp 3 core_functions/scalar/enum/enum_functions.cpp 10 core_functions/scalar/generic/current_setting.cpp 1 core_functions/scalar/generic/least.cpp 2 @@ -543,6 +544,7 @@ optimizer/pushdown/pushdown_inner_join.cpp 2 optimizer/pushdown/pushdown_set_operation.cpp 2 optimizer/regex_range_filter.cpp 2 optimizer/remove_unused_columns.cpp 2 +optimizer/rule/arithmetic_simplification.cpp 1 optimizer/rule/date_part_simplification.cpp 2 optimizer/rule/distributivity.cpp 3 optimizer/rule/empty_needle_removal.cpp 2 diff --git a/extension/json/json_functions.cpp b/extension/json/json_functions.cpp index 5de50f1d69ed..97a2cb4d3214 100644 --- a/extension/json/json_functions.cpp +++ b/extension/json/json_functions.cpp @@ -15,9 +15,6 @@ namespace duckdb { using JSONPathType = JSONCommon::JSONPathType; static JSONPathType CheckPath(const Value &path_val, string &path, size_t &len) { - if (path_val.IsNull()) { - throw InvalidInputException("JSON path cannot be NULL"); - } const auto path_str_val = path_val.DefaultCastAs(LogicalType::VARCHAR); auto path_str = path_str_val.GetValueUnsafe(); len = path_str.GetSize(); @@ -49,7 +46,7 @@ unique_ptr JSONReadFunctionData::Copy() const { } bool JSONReadFunctionData::Equals(const FunctionData &other_p) const { - auto &other = (const JSONReadFunctionData &)other_p; + auto &other = other_p.Cast(); return constant == other.constant && path == other.path && len == other.len && path_type == other.path_type; } @@ -60,7 +57,7 @@ unique_ptr JSONReadFunctionData::Bind(ClientContext &context, Scal string path = ""; size_t len = 0; JSONPathType path_type = JSONPathType::REGULAR; - if (arguments[1]->return_type.id() != LogicalTypeId::SQLNULL && arguments[1]->IsFoldable()) { + if (arguments[1]->IsFoldable()) { constant = true; const auto path_val = ExpressionExecutor::EvaluateScalar(context, *arguments[1]); path_type = CheckPath(path_val, path, len); @@ -83,7 +80,7 @@ unique_ptr JSONReadManyFunctionData::Copy() const { } bool JSONReadManyFunctionData::Equals(const FunctionData &other_p) const { - auto &other = (const JSONReadManyFunctionData &)other_p; + auto &other = other_p.Cast(); return paths == other.paths && lens == other.lens; } @@ -100,6 +97,7 @@ unique_ptr JSONReadManyFunctionData::Bind(ClientContext &context, vector paths; vector lens; auto paths_val = ExpressionExecutor::EvaluateScalar(context, *arguments[1]); + for (auto &path_val : ListValue::GetChildren(paths_val)) { paths.emplace_back(""); lens.push_back(0); diff --git a/extension/json/json_functions/json_transform.cpp b/extension/json/json_functions/json_transform.cpp index ea23187ef23f..a9b3f15c4d1e 100644 --- a/extension/json/json_functions/json_transform.cpp +++ b/extension/json/json_functions/json_transform.cpp @@ -1,6 +1,8 @@ #include "json_transform.hpp" #include "duckdb/common/enum_util.hpp" +#include "duckdb/common/serializer/deserializer.hpp" +#include "duckdb/common/serializer/serializer.hpp" #include "duckdb/common/types.hpp" #include "duckdb/execution/expression_executor.hpp" #include "duckdb/function/cast/cast_function_set.hpp" @@ -8,8 +10,6 @@ #include "duckdb/function/scalar/nested_functions.hpp" #include "json_functions.hpp" #include "json_scan.hpp" -#include "duckdb/common/serializer/serializer.hpp" -#include "duckdb/common/serializer/deserializer.hpp" namespace duckdb { @@ -72,12 +72,13 @@ static unique_ptr JSONTransformBind(ClientContext &context, Scalar if (arguments[1]->HasParameter()) { throw ParameterNotResolvedException(); } - if (arguments[1]->return_type == LogicalTypeId::SQLNULL) { - bound_function.return_type = LogicalTypeId::SQLNULL; - } else if (!arguments[1]->IsFoldable()) { + if (!arguments[1]->IsFoldable()) { throw BinderException("JSON structure must be a constant!"); + } + auto structure_val = ExpressionExecutor::EvaluateScalar(context, *arguments[1]); + if (structure_val.IsNull() || arguments[1]->return_type == LogicalTypeId::SQLNULL) { + bound_function.return_type = LogicalTypeId::SQLNULL; } else { - auto structure_val = ExpressionExecutor::EvaluateScalar(context, *arguments[1]); if (!structure_val.DefaultTryCastAs(JSONCommon::JSONType())) { throw BinderException("Cannot cast JSON structure to string"); } @@ -741,6 +742,7 @@ bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &resul switch (result_type.id()) { case LogicalTypeId::SQLNULL: + FlatVector::Validity(result).SetAllInvalid(count); return true; case LogicalTypeId::BOOLEAN: return TransformNumerical(vals, result, count, options); diff --git a/src/core_functions/function_list.cpp b/src/core_functions/function_list.cpp index 550e44f54750..8e6174ac2ed1 100644 --- a/src/core_functions/function_list.cpp +++ b/src/core_functions/function_list.cpp @@ -274,7 +274,7 @@ static StaticFunctionDefinition internal_functions[] = { DUCKDB_AGGREGATE_FUNCTION(RegrSXXFun), DUCKDB_AGGREGATE_FUNCTION(RegrSXYFun), DUCKDB_AGGREGATE_FUNCTION(RegrSYYFun), - DUCKDB_SCALAR_FUNCTION(RepeatFun), + DUCKDB_SCALAR_FUNCTION_SET(RepeatFun), DUCKDB_SCALAR_FUNCTION(ReplaceFun), DUCKDB_AGGREGATE_FUNCTION_SET(ReservoirQuantileFun), DUCKDB_SCALAR_FUNCTION(ReverseFun), diff --git a/src/core_functions/scalar/date/date_part.cpp b/src/core_functions/scalar/date/date_part.cpp index 0e11a746df23..f689814e8da8 100644 --- a/src/core_functions/scalar/date/date_part.cpp +++ b/src/core_functions/scalar/date/date_part.cpp @@ -1317,9 +1317,6 @@ static unique_ptr DatePartBind(ClientContext &context, ScalarFunct } Value part_value = ExpressionExecutor::EvaluateScalar(context, *arguments[0]); - if (part_value.IsNull()) { - return nullptr; - } const auto part_name = part_value.ToString(); switch (GetDatePartSpecifier(part_name)) { case DatePartSpecifier::JULIAN_DAY: diff --git a/src/core_functions/scalar/generic/hash.cpp b/src/core_functions/scalar/generic/hash.cpp index 277134b989c0..b99e9704923e 100644 --- a/src/core_functions/scalar/generic/hash.cpp +++ b/src/core_functions/scalar/generic/hash.cpp @@ -4,6 +4,9 @@ namespace duckdb { static void HashFunction(DataChunk &args, ExpressionState &state, Vector &result) { args.Hash(result); + if (args.AllConstant()) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + } } ScalarFunction HashFun::GetFunction() { diff --git a/src/core_functions/scalar/string/functions.json b/src/core_functions/scalar/string/functions.json index 438d6af447c2..9fe9964449d2 100644 --- a/src/core_functions/scalar/string/functions.json +++ b/src/core_functions/scalar/string/functions.json @@ -185,7 +185,7 @@ "parameters": "string,count", "description": "Repeats the string count number of times", "example": "repeat('A', 5)", - "type": "scalar_function" + "type": "scalar_function_set" }, { "name": "replace", diff --git a/src/core_functions/scalar/string/repeat.cpp b/src/core_functions/scalar/string/repeat.cpp index 494a7aac78ae..4ff356e2f9cc 100644 --- a/src/core_functions/scalar/string/repeat.cpp +++ b/src/core_functions/scalar/string/repeat.cpp @@ -1,10 +1,9 @@ -#include "duckdb/core_functions/scalar/string_functions.hpp" - #include "duckdb/common/exception.hpp" #include "duckdb/common/vector_operations/binary_executor.hpp" +#include "duckdb/core_functions/scalar/string_functions.hpp" -#include #include +#include namespace duckdb { @@ -33,8 +32,12 @@ static void RepeatFunction(DataChunk &args, ExpressionState &state, Vector &resu }); } -ScalarFunction RepeatFun::GetFunction() { - return ScalarFunction({LogicalType::VARCHAR, LogicalType::BIGINT}, LogicalType::VARCHAR, RepeatFunction); +ScalarFunctionSet RepeatFun::GetFunctions() { + ScalarFunctionSet repeat; + for (const auto &type : {LogicalType::VARCHAR, LogicalType::BLOB}) { + repeat.AddFunction(ScalarFunction({type, LogicalType::BIGINT}, type, RepeatFunction)); + } + return repeat; } } // namespace duckdb diff --git a/src/execution/reservoir_sample.cpp b/src/execution/reservoir_sample.cpp index c116b3ac37b5..3522b71a1e28 100644 --- a/src/execution/reservoir_sample.cpp +++ b/src/execution/reservoir_sample.cpp @@ -107,25 +107,19 @@ void ReservoirSamplePercentage::AddToReservoir(DataChunk &input) { if (append_to_next_sample > 0) { // we need to also add to the next sample DataChunk new_chunk; - new_chunk.Initialize(allocator, input.GetTypes()); - SelectionVector sel(append_to_current_sample_count); - for (idx_t r = 0; r < append_to_current_sample_count; r++) { - sel.set_index(r, r); - } - new_chunk.Slice(sel, append_to_current_sample_count); + new_chunk.InitializeEmpty(input.GetTypes()); + new_chunk.Slice(input, *FlatVector::IncrementalSelectionVector(), append_to_current_sample_count); new_chunk.Flatten(); - current_sample->AddToReservoir(new_chunk); } else { input.Flatten(); - input.SetCardinality(append_to_current_sample_count); current_sample->AddToReservoir(input); } } if (append_to_next_sample > 0) { // slice the input for the remainder - SelectionVector sel(STANDARD_VECTOR_SIZE); + SelectionVector sel(append_to_next_sample); for (idx_t i = 0; i < append_to_next_sample; i++) { sel.set_index(i, append_to_current_sample_count + i); } diff --git a/src/function/function_binder.cpp b/src/function/function_binder.cpp index 5c2196e685f6..e69112b188a1 100644 --- a/src/function/function_binder.cpp +++ b/src/function/function_binder.cpp @@ -1,16 +1,16 @@ #include "duckdb/function/function_binder.hpp" -#include "duckdb/common/limits.hpp" -#include "duckdb/planner/expression/bound_cast_expression.hpp" -#include "duckdb/planner/expression/bound_aggregate_expression.hpp" -#include "duckdb/planner/expression/bound_function_expression.hpp" -#include "duckdb/planner/expression/bound_constant_expression.hpp" +#include "duckdb/catalog/catalog.hpp" #include "duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp" - -#include "duckdb/planner/expression_binder.hpp" +#include "duckdb/common/limits.hpp" +#include "duckdb/execution/expression_executor.hpp" #include "duckdb/function/aggregate_function.hpp" #include "duckdb/function/cast_rules.hpp" -#include "duckdb/catalog/catalog.hpp" +#include "duckdb/planner/expression/bound_aggregate_expression.hpp" +#include "duckdb/planner/expression/bound_cast_expression.hpp" +#include "duckdb/planner/expression/bound_constant_expression.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckdb/planner/expression_binder.hpp" namespace duckdb { @@ -268,7 +268,8 @@ unique_ptr FunctionBinder::BindScalarFunction(ScalarFunctionCatalogE if (bound_function.null_handling == FunctionNullHandling::DEFAULT_NULL_HANDLING) { for (auto &child : children) { - if (child->return_type == LogicalTypeId::SQLNULL) { + if (child->return_type == LogicalTypeId::SQLNULL || + (child->IsFoldable() && ExpressionExecutor::EvaluateScalar(context, *child).IsNull())) { return make_uniq(Value(LogicalType::SQLNULL)); } } diff --git a/src/function/scalar/string/like.cpp b/src/function/scalar/string/like.cpp index 8bac7283c547..eb9f2e34e0b8 100644 --- a/src/function/scalar/string/like.cpp +++ b/src/function/scalar/string/like.cpp @@ -196,9 +196,6 @@ static unique_ptr LikeBindFunction(ClientContext &context, ScalarF D_ASSERT(arguments.size() == 2 || arguments.size() == 3); if (arguments[1]->IsFoldable()) { Value pattern_str = ExpressionExecutor::EvaluateScalar(context, *arguments[1]); - if (pattern_str.IsNull()) { - return nullptr; - } return LikeMatcher::CreateLikeMatcher(pattern_str.ToString()); } return nullptr; diff --git a/src/include/duckdb/core_functions/scalar/string_functions.hpp b/src/include/duckdb/core_functions/scalar/string_functions.hpp index d166b864c0fb..27edf6cfdfe8 100644 --- a/src/include/duckdb/core_functions/scalar/string_functions.hpp +++ b/src/include/duckdb/core_functions/scalar/string_functions.hpp @@ -285,7 +285,7 @@ struct RepeatFun { static constexpr const char *Description = "Repeats the string count number of times"; static constexpr const char *Example = "repeat('A', 5)"; - static ScalarFunction GetFunction(); + static ScalarFunctionSet GetFunctions(); }; struct ReplaceFun { diff --git a/test/fuzzer/duckfuzz/hash_constant.test b/test/fuzzer/duckfuzz/hash_constant.test new file mode 100644 index 000000000000..665571619d91 --- /dev/null +++ b/test/fuzzer/duckfuzz/hash_constant.test @@ -0,0 +1,9 @@ +# name: test/fuzzer/duckfuzz/hash_constant.test +# description: Hashing constants should yield a constant vector (duckdb-fuzzer #290) +# group: [duckfuzz] + +statement ok +create table all_types as select * exclude(small_enum, medium_enum, large_enum) from test_all_types(); + +statement ok +SELECT hash(main.list_value(main.list_value(), main.list_value(42, 999, NULL, NULL, -42), NULL, main.list_value(), main.list_value(42, 999, NULL, NULL, -42))) FROM all_types; diff --git a/test/fuzzer/duckfuzz/json_extract_null.test b/test/fuzzer/duckfuzz/json_extract_null.test index cc8dc0c553ef..4d34a819e775 100644 --- a/test/fuzzer/duckfuzz/json_extract_null.test +++ b/test/fuzzer/duckfuzz/json_extract_null.test @@ -7,7 +7,7 @@ require json statement ok PRAGMA enable_verification -statement error +query I SELECT json_extract('hello world', CAST(NULL AS json)) AS c1 ---- -JSON path cannot be NULL +NULL diff --git a/test/fuzzer/duckfuzz/json_functions_null_params.test b/test/fuzzer/duckfuzz/json_functions_null_params.test new file mode 100644 index 000000000000..9c71428bbc2b --- /dev/null +++ b/test/fuzzer/duckfuzz/json_functions_null_params.test @@ -0,0 +1,33 @@ +# name: test/fuzzer/duckfuzz/json_functions_null_params.test +# description: JSON functions with parameters that evaluate to NULL (duckdb-fuzzer #294 and #319) +# group: [duckfuzz] + +require json + +statement ok +PRAGMA enable_verification + +query I +SELECT from_json('{"duck": 42}', NULL::JSON) +---- +NULL + +query I +SELECT json_extract('{"duck": 42}', NULL::VARCHAR[]) +---- +NULL + +query I +SELECT json_keys('{"duck": 42}', NULL::VARCHAR[]) +---- +NULL + +query I +SELECT json_valid(NULL) +---- +NULL + +query I +SELECT json_extract('{"duck": 42}', NULL::VARCHAR) +---- +NULL diff --git a/test/fuzzer/duckfuzz/reservoir_issues.test_slow b/test/fuzzer/duckfuzz/reservoir_issues.test_slow new file mode 100644 index 000000000000..ff45f96bb9f9 --- /dev/null +++ b/test/fuzzer/duckfuzz/reservoir_issues.test_slow @@ -0,0 +1,14 @@ +# name: test/fuzzer/duckfuzz/reservoir_issues.test_slow +# description: Issues with reservoir sampling and uninitialized memory (duckdb-fuzzer #324 and #861) +# group: [duckfuzz] + +require tpch + +statement ok +call dbgen(sf=0.1) + +statement ok +SELECT * FROM lineitem USING SAMPLE 91.0% (Reservoir) + +statement ok +SELECT c13 FROM lineitem AS t17(c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16) GROUP BY ALL USING SAMPLE 72.0% (Reservoir) diff --git a/test/fuzzer/sqlsmith/json_contains_null.test b/test/fuzzer/sqlsmith/json_contains_null.test index 705e859f25b0..7c0e5985353e 100644 --- a/test/fuzzer/sqlsmith/json_contains_null.test +++ b/test/fuzzer/sqlsmith/json_contains_null.test @@ -11,3 +11,13 @@ query I SELECT json_contains(NULL::VARCHAR, NULL::VARCHAR); ---- NULL + +query I +WITH cte AS ( + SELECT NULL::VARCHAR j + FROM range(1) +) +SELECT json_contains(j, j) +FROM cte +---- +NULL diff --git a/test/sql/function/date/test_date_part.test b/test/sql/function/date/test_date_part.test index 4788d26228ed..16dca6999242 100644 --- a/test/sql/function/date/test_date_part.test +++ b/test/sql/function/date/test_date_part.test @@ -578,3 +578,13 @@ endloop endloop endloop + +query T +WITH cte AS ( + SELECT NULL::VARCHAR part + FROM range(1) +) +SELECT date_part(part, TIMESTAMP '2019-01-06 04:03:02') +FROM cte +---- +NULL diff --git a/test/sql/function/date/test_date_trunc.test b/test/sql/function/date/test_date_trunc.test index a852cb47709f..284b263ce855 100644 --- a/test/sql/function/date/test_date_trunc.test +++ b/test/sql/function/date/test_date_trunc.test @@ -307,3 +307,13 @@ SELECT stats(date_trunc('${daypart}', d)) FROM timestamps LIMIT 1; [Min: -infinity, Max: infinity][Has Null: false, Has No Null: true] endloop + +query T +WITH cte AS ( + SELECT NULL::VARCHAR part + FROM range(1) +) +SELECT date_trunc(part, TIMESTAMP '2019-01-06 04:03:02') +FROM cte +---- +NULL diff --git a/test/sql/function/date/test_time_bucket_date.test b/test/sql/function/date/test_time_bucket_date.test index c31f5e4d5d05..11abdec6cde9 100644 --- a/test/sql/function/date/test_time_bucket_date.test +++ b/test/sql/function/date/test_time_bucket_date.test @@ -504,3 +504,76 @@ query I select time_bucket('-1 month'::interval, '2022-12-22'::date, null::date); ---- NULL + +# non-foldable NULL testing +query T +WITH cte AS ( + SELECT NULL::INTERVAL i, + NULL::DATE d, + NULL::TIMESTAMP t + FROM range(1) +) +SELECT time_bucket(i, d) +FROM cte +---- +NULL + +query T +WITH cte AS ( + SELECT NULL::INTERVAL i, + NULL::DATE d, + NULL::TIMESTAMP t + FROM range(1) +) +SELECT time_bucket(i, t) +FROM cte +---- +NULL + +query T +WITH cte AS ( + SELECT NULL::INTERVAL i, + NULL::DATE d, + NULL::TIMESTAMP t + FROM range(1) +) +SELECT time_bucket(i, d, i) +FROM cte +---- +NULL + +query T +WITH cte AS ( + SELECT NULL::INTERVAL i, + NULL::DATE d, + NULL::TIMESTAMP t + FROM range(1) +) +SELECT time_bucket(i, t, i) +FROM cte +---- +NULL + +query T +WITH cte AS ( + SELECT NULL::INTERVAL i, + NULL::DATE d, + NULL::TIMESTAMP t + FROM range(1) +) +SELECT time_bucket(i, d, d) +FROM cte +---- +NULL + +query T +WITH cte AS ( + SELECT NULL::INTERVAL i, + NULL::DATE d, + NULL::TIMESTAMP t + FROM range(1) +) +SELECT time_bucket(i, t, t) +FROM cte +---- +NULL