From 6b00cdfbc789a6cf442f83bdb21ab58c761f791d Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Mon, 28 Oct 2024 16:29:52 +0100 Subject: [PATCH] Add AggregateDestructorType which signifies whether or not an aggregate state can be trivially destructible - only AggregateDestructorType::LEGACY can be trivially destructible --- .../aggregate/distributive/arg_min_max.cpp | 25 +++++++------ .../core_functions/aggregate/holistic/mad.cpp | 3 +- .../aggregate/holistic/mode.cpp | 12 ++++-- .../aggregate/holistic/quantile.cpp | 31 +++++++++------- .../aggregate/distributive/minmax.cpp | 2 +- .../aggregate/sorted_aggregate_function.cpp | 3 +- .../duckdb/function/aggregate_function.hpp | 37 +++++++++++++------ 7 files changed, 71 insertions(+), 42 deletions(-) diff --git a/extension/core_functions/aggregate/distributive/arg_min_max.cpp b/extension/core_functions/aggregate/distributive/arg_min_max.cpp index 35b9a77474c2..63c112b3ce3c 100644 --- a/extension/core_functions/aggregate/distributive/arg_min_max.cpp +++ b/extension/core_functions/aggregate/distributive/arg_min_max.cpp @@ -314,21 +314,22 @@ struct VectorArgMinMaxBase : ArgMinMaxBase { template AggregateFunction GetGenericArgMinMaxFunction() { using STATE = ArgMinMaxState; - return AggregateFunction({LogicalType::ANY, LogicalType::ANY}, LogicalType::ANY, - AggregateFunction::StateSize, AggregateFunction::StateInitialize, - OP::template Update, AggregateFunction::StateCombine, - AggregateFunction::StateVoidFinalize, nullptr, OP::Bind, - AggregateFunction::StateDestroy); + return AggregateFunction( + {LogicalType::ANY, LogicalType::ANY}, LogicalType::ANY, AggregateFunction::StateSize, + AggregateFunction::StateInitialize, OP::template Update, + AggregateFunction::StateCombine, AggregateFunction::StateVoidFinalize, nullptr, OP::Bind, + AggregateFunction::StateDestroy); } template AggregateFunction GetVectorArgMinMaxFunctionInternal(const LogicalType &by_type, const LogicalType &type) { #ifndef DUCKDB_SMALLER_BINARY using STATE = ArgMinMaxState; - return AggregateFunction( - {type, by_type}, type, AggregateFunction::StateSize, AggregateFunction::StateInitialize, - OP::template Update, AggregateFunction::StateCombine, - AggregateFunction::StateVoidFinalize, nullptr, OP::Bind, AggregateFunction::StateDestroy); + return AggregateFunction({type, by_type}, type, AggregateFunction::StateSize, + AggregateFunction::StateInitialize, + OP::template Update, AggregateFunction::StateCombine, + AggregateFunction::StateVoidFinalize, nullptr, OP::Bind, + AggregateFunction::StateDestroy); #else auto function = GetGenericArgMinMaxFunction(); function.arguments = {type, by_type}; @@ -380,7 +381,9 @@ template AggregateFunction GetArgMinMaxFunctionInternal(const LogicalType &by_type, const LogicalType &type) { #ifndef DUCKDB_SMALLER_BINARY using STATE = ArgMinMaxState; - auto function = AggregateFunction::BinaryAggregate(type, by_type, type); + auto function = + AggregateFunction::BinaryAggregate( + type, by_type, type); if (type.InternalType() == PhysicalType::VARCHAR || by_type.InternalType() == PhysicalType::VARCHAR) { function.destructor = AggregateFunction::StateDestroy; } @@ -618,7 +621,7 @@ static void SpecializeArgMinMaxNFunction(AggregateFunction &function) { using OP = MinMaxNOperation; function.state_size = AggregateFunction::StateSize; - function.initialize = AggregateFunction::StateInitialize; + function.initialize = AggregateFunction::StateInitialize; function.combine = AggregateFunction::StateCombine; function.destructor = AggregateFunction::StateDestroy; diff --git a/extension/core_functions/aggregate/holistic/mad.cpp b/extension/core_functions/aggregate/holistic/mad.cpp index 03c07df3f38a..93516b59f78c 100644 --- a/extension/core_functions/aggregate/holistic/mad.cpp +++ b/extension/core_functions/aggregate/holistic/mad.cpp @@ -265,7 +265,8 @@ AggregateFunction GetTypedMedianAbsoluteDeviationAggregateFunction(const Logical const LogicalType &target_type) { using STATE = QuantileState; using OP = MedianAbsoluteDeviationOperation; - auto fun = AggregateFunction::UnaryAggregateDestructor(input_type, target_type); + auto fun = AggregateFunction::UnaryAggregateDestructor(input_type, target_type); fun.bind = BindMAD; fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT; #ifndef DUCKDB_SMALLER_BINARY diff --git a/extension/core_functions/aggregate/holistic/mode.cpp b/extension/core_functions/aggregate/holistic/mode.cpp index c0bb248ebbc7..e66fa7f4e6bc 100644 --- a/extension/core_functions/aggregate/holistic/mode.cpp +++ b/extension/core_functions/aggregate/holistic/mode.cpp @@ -424,7 +424,7 @@ AggregateFunction GetFallbackModeFunction(const LogicalType &type) { using STATE = ModeState; using OP = ModeFallbackFunction; AggregateFunction aggr({type}, type, AggregateFunction::StateSize, - AggregateFunction::StateInitialize, + AggregateFunction::StateInitialize, AggregateSortKeyHelpers::UnaryUpdate, AggregateFunction::StateCombine, AggregateFunction::StateVoidFinalize, nullptr); aggr.destructor = AggregateFunction::StateDestroy; @@ -435,7 +435,9 @@ template > AggregateFunction GetTypedModeFunction(const LogicalType &type) { using STATE = ModeState; using OP = ModeFunction; - auto func = AggregateFunction::UnaryAggregateDestructor(type, type); + auto func = + AggregateFunction::UnaryAggregateDestructor( + type, type); func.window = OP::template Window; return func; } @@ -528,7 +530,9 @@ template > AggregateFunction GetTypedEntropyFunction(const LogicalType &type) { using STATE = ModeState; using OP = EntropyFunction; - auto func = AggregateFunction::UnaryAggregateDestructor(type, LogicalType::DOUBLE); + auto func = + AggregateFunction::UnaryAggregateDestructor( + type, LogicalType::DOUBLE); func.null_handling = FunctionNullHandling::SPECIAL_HANDLING; return func; } @@ -537,7 +541,7 @@ AggregateFunction GetFallbackEntropyFunction(const LogicalType &type) { using STATE = ModeState; using OP = EntropyFallbackFunction; AggregateFunction func({type}, LogicalType::DOUBLE, AggregateFunction::StateSize, - AggregateFunction::StateInitialize, + AggregateFunction::StateInitialize, AggregateSortKeyHelpers::UnaryUpdate, AggregateFunction::StateCombine, AggregateFunction::StateFinalize, nullptr); func.destructor = AggregateFunction::StateDestroy; diff --git a/extension/core_functions/aggregate/holistic/quantile.cpp b/extension/core_functions/aggregate/holistic/quantile.cpp index 879be9637bf9..f8f668af636c 100644 --- a/extension/core_functions/aggregate/holistic/quantile.cpp +++ b/extension/core_functions/aggregate/holistic/quantile.cpp @@ -420,7 +420,8 @@ struct ScalarDiscreteQuantile { static AggregateFunction GetFunction(const LogicalType &type) { using STATE = QuantileState; using OP = QuantileScalarOperation; - auto fun = AggregateFunction::UnaryAggregateDestructor(type, type); + auto fun = AggregateFunction::UnaryAggregateDestructor(type, type); #ifndef DUCKDB_SMALLER_BINARY fun.window = OP::Window; fun.window_init = OP::WindowInit; @@ -432,11 +433,12 @@ struct ScalarDiscreteQuantile { using STATE = QuantileState; using OP = QuantileScalarFallback; - AggregateFunction fun( - {type}, type, AggregateFunction::StateSize, AggregateFunction::StateInitialize, - AggregateSortKeyHelpers::UnaryUpdate, AggregateFunction::StateCombine, - AggregateFunction::StateVoidFinalize, nullptr, nullptr, - AggregateFunction::StateDestroy); + AggregateFunction fun({type}, type, AggregateFunction::StateSize, + AggregateFunction::StateInitialize, + AggregateSortKeyHelpers::UnaryUpdate, + AggregateFunction::StateCombine, + AggregateFunction::StateVoidFinalize, nullptr, nullptr, + AggregateFunction::StateDestroy); return fun; } }; @@ -445,7 +447,8 @@ template static AggregateFunction QuantileListAggregate(const LogicalType &input_type, const LogicalType &child_type) { // NOLINT LogicalType result_type = LogicalType::LIST(child_type); return AggregateFunction( - {input_type}, result_type, AggregateFunction::StateSize, AggregateFunction::StateInitialize, + {input_type}, result_type, AggregateFunction::StateSize, + AggregateFunction::StateInitialize, AggregateFunction::UnaryScatterUpdate, AggregateFunction::StateCombine, AggregateFunction::StateFinalize, AggregateFunction::UnaryUpdate, nullptr, AggregateFunction::StateDestroy); @@ -469,11 +472,12 @@ struct ListDiscreteQuantile { using STATE = QuantileState; using OP = QuantileListFallback; - AggregateFunction fun( - {type}, LogicalType::LIST(type), AggregateFunction::StateSize, - AggregateFunction::StateInitialize, AggregateSortKeyHelpers::UnaryUpdate, - AggregateFunction::StateCombine, AggregateFunction::StateFinalize, - nullptr, nullptr, AggregateFunction::StateDestroy); + AggregateFunction fun({type}, LogicalType::LIST(type), AggregateFunction::StateSize, + AggregateFunction::StateInitialize, + AggregateSortKeyHelpers::UnaryUpdate, + AggregateFunction::StateCombine, + AggregateFunction::StateFinalize, nullptr, nullptr, + AggregateFunction::StateDestroy); return fun; } }; @@ -547,7 +551,8 @@ struct ScalarContinuousQuantile { using STATE = QuantileState; using OP = QuantileScalarOperation; auto fun = - AggregateFunction::UnaryAggregateDestructor(input_type, target_type); + AggregateFunction::UnaryAggregateDestructor(input_type, target_type); fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT; #ifndef DUCKDB_SMALLER_BINARY fun.window = OP::template Window; diff --git a/src/function/aggregate/distributive/minmax.cpp b/src/function/aggregate/distributive/minmax.cpp index 537140bf6eaf..3bbeb2210107 100644 --- a/src/function/aggregate/distributive/minmax.cpp +++ b/src/function/aggregate/distributive/minmax.cpp @@ -478,7 +478,7 @@ static void SpecializeMinMaxNFunction(AggregateFunction &function) { using OP = MinMaxNOperation; function.state_size = AggregateFunction::StateSize; - function.initialize = AggregateFunction::StateInitialize; + function.initialize = AggregateFunction::StateInitialize; function.combine = AggregateFunction::StateCombine; function.destructor = AggregateFunction::StateDestroy; diff --git a/src/function/aggregate/sorted_aggregate_function.cpp b/src/function/aggregate/sorted_aggregate_function.cpp index 4e86f930bfb4..87960adb85ba 100644 --- a/src/function/aggregate/sorted_aggregate_function.cpp +++ b/src/function/aggregate/sorted_aggregate_function.cpp @@ -744,7 +744,8 @@ void FunctionBinder::BindSortedAggregate(ClientContext &context, BoundAggregateE // Replace the aggregate with the wrapper AggregateFunction ordered_aggregate( bound_function.name, arguments, bound_function.return_type, AggregateFunction::StateSize, - AggregateFunction::StateInitialize, + AggregateFunction::StateInitialize, SortedAggregateFunction::ScatterUpdate, AggregateFunction::StateCombine, SortedAggregateFunction::Finalize, bound_function.null_handling, SortedAggregateFunction::SimpleUpdate, nullptr, diff --git a/src/include/duckdb/function/aggregate_function.hpp b/src/include/duckdb/function/aggregate_function.hpp index c054040e6fa5..ab6299aa2997 100644 --- a/src/include/duckdb/function/aggregate_function.hpp +++ b/src/include/duckdb/function/aggregate_function.hpp @@ -103,6 +103,13 @@ struct AggregateFunctionInfo { } }; +enum class AggregateDestructorType { + STANDARD, + // legacy destructors allow non-trivial destructors in aggregate states + // these might not be trivial to off-load to disk + LEGACY +}; + class AggregateFunction : public BaseScalarFunction { // NOLINT: work-around bug in clang-tidy public: AggregateFunction(const string &name, const vector &arguments, const LogicalType &return_type, @@ -206,29 +213,33 @@ class AggregateFunction : public BaseScalarFunction { // NOLINT: work-around bug AggregateFunction::StateFinalize, AggregateFunction::NullaryUpdate); } - template + template static AggregateFunction UnaryAggregate(const LogicalType &input_type, LogicalType return_type, FunctionNullHandling null_handling = FunctionNullHandling::DEFAULT_NULL_HANDLING) { - return AggregateFunction( - {input_type}, return_type, AggregateFunction::StateSize, - AggregateFunction::StateInitialize, AggregateFunction::UnaryScatterUpdate, - AggregateFunction::StateCombine, AggregateFunction::StateFinalize, - null_handling, AggregateFunction::UnaryUpdate); + return AggregateFunction({input_type}, return_type, AggregateFunction::StateSize, + AggregateFunction::StateInitialize, + AggregateFunction::UnaryScatterUpdate, + AggregateFunction::StateCombine, + AggregateFunction::StateFinalize, null_handling, + AggregateFunction::UnaryUpdate); } - template + template static AggregateFunction UnaryAggregateDestructor(LogicalType input_type, LogicalType return_type) { - auto aggregate = UnaryAggregate(input_type, return_type); + auto aggregate = UnaryAggregate(input_type, return_type); aggregate.destructor = AggregateFunction::StateDestroy; return aggregate; } - template + template static AggregateFunction BinaryAggregate(const LogicalType &a_type, const LogicalType &b_type, LogicalType return_type) { return AggregateFunction({a_type, b_type}, return_type, AggregateFunction::StateSize, - AggregateFunction::StateInitialize, + AggregateFunction::StateInitialize, AggregateFunction::BinaryScatterUpdate, AggregateFunction::StateCombine, AggregateFunction::StateFinalize, @@ -241,8 +252,12 @@ class AggregateFunction : public BaseScalarFunction { // NOLINT: work-around bug return sizeof(STATE); } - template + template static void StateInitialize(const AggregateFunction &, data_ptr_t state) { + // FIXME: we should remove the "destructor_type" option in the future + static_assert(std::is_trivially_destructible::value || + destructor_type == AggregateDestructorType::LEGACY, + "Aggregate state must be trivially destructible"); OP::Initialize(*reinterpret_cast(state)); }