Skip to content

Commit b9262ec

Browse files
committed
Removes min/max/count comparison based on name in aggregate statistics
1 parent ac74cd3 commit b9262ec

File tree

4 files changed

+42
-22
lines changed

4 files changed

+42
-22
lines changed

datafusion/expr/src/udaf.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,19 @@ impl AggregateUDF {
262262
self.inner.is_descending()
263263
}
264264

265+
/// Returns true if the function is min. Used by the optimizer
266+
pub fn is_min(&self) -> bool {
267+
self.inner.is_min()
268+
}
269+
270+
/// Returns true if the function is max. Used by the optimizer
271+
pub fn is_max(&self) -> bool {
272+
self.inner.is_max()
273+
}
274+
/// Returns true if the function is count. Used by the optimizer
275+
pub fn is_count(&self) -> bool {
276+
self.inner.is_count()
277+
}
265278
/// See [`AggregateUDFImpl::default_value`] for more details.
266279
pub fn default_value(&self, data_type: &DataType) -> Result<ScalarValue> {
267280
self.inner.default_value(data_type)
@@ -575,6 +588,19 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
575588
None
576589
}
577590

591+
// Returns true if the function is min. Used by the optimizer
592+
fn is_min(&self) -> bool {
593+
false
594+
}
595+
// Returns true if the function is max. Used by the optimizer
596+
fn is_max(&self) -> bool {
597+
false
598+
}
599+
// Returns true if the function is count. Used by the optimizer
600+
fn is_count(&self) -> bool {
601+
false
602+
}
603+
578604
/// Returns default value of the function given the input is all `null`.
579605
///
580606
/// Most of the aggregate function return Null if input is Null,

datafusion/functions-aggregate/src/count.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,10 @@ impl AggregateUDFImpl for Count {
291291
fn default_value(&self, _data_type: &DataType) -> Result<ScalarValue> {
292292
Ok(ScalarValue::Int64(Some(0)))
293293
}
294+
295+
fn is_count(&self) -> bool {
296+
true
297+
}
294298
}
295299

296300
#[derive(Debug)]

datafusion/functions-aggregate/src/min_max.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,10 @@ impl AggregateUDFImpl for Max {
272272
fn is_descending(&self) -> Option<bool> {
273273
Some(true)
274274
}
275+
276+
fn is_max(&self) -> bool {
277+
true
278+
}
275279
fn order_sensitivity(&self) -> datafusion_expr::utils::AggregateOrderSensitivity {
276280
datafusion_expr::utils::AggregateOrderSensitivity::Insensitive
277281
}
@@ -1052,6 +1056,10 @@ impl AggregateUDFImpl for Min {
10521056
Some(false)
10531057
}
10541058

1059+
fn is_min(&self) -> bool {
1060+
true
1061+
}
1062+
10551063
fn order_sensitivity(&self) -> datafusion_expr::utils::AggregateOrderSensitivity {
10561064
datafusion_expr::utils::AggregateOrderSensitivity::Insensitive
10571065
}

datafusion/physical-optimizer/src/aggregate_statistics.rs

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ fn take_optimizable_min(
181181
match *num_rows {
182182
0 => {
183183
// MIN/MAX with 0 rows is always null
184-
if is_min(agg_expr) {
184+
if agg_expr.fun().is_min() {
185185
if let Ok(min_data_type) =
186186
ScalarValue::try_from(agg_expr.field().data_type())
187187
{
@@ -191,7 +191,7 @@ fn take_optimizable_min(
191191
}
192192
value if value > 0 => {
193193
let col_stats = &stats.column_statistics;
194-
if is_min(agg_expr) {
194+
if agg_expr.fun().is_min() {
195195
let exprs = agg_expr.expressions();
196196
if exprs.len() == 1 {
197197
// TODO optimize with exprs other than Column
@@ -227,7 +227,7 @@ fn take_optimizable_max(
227227
match *num_rows {
228228
0 => {
229229
// MIN/MAX with 0 rows is always null
230-
if is_max(agg_expr) {
230+
if agg_expr.fun().is_max() {
231231
if let Ok(max_data_type) =
232232
ScalarValue::try_from(agg_expr.field().data_type())
233233
{
@@ -237,7 +237,7 @@ fn take_optimizable_max(
237237
}
238238
value if value > 0 => {
239239
let col_stats = &stats.column_statistics;
240-
if is_max(agg_expr) {
240+
if agg_expr.fun().is_max() {
241241
let exprs = agg_expr.expressions();
242242
if exprs.len() == 1 {
243243
// TODO optimize with exprs other than Column
@@ -273,22 +273,4 @@ fn is_non_distinct_count(agg_expr: &AggregateFunctionExpr) -> bool {
273273
false
274274
}
275275

276-
// TODO: Move this check into AggregateUDFImpl
277-
// https://github.com/apache/datafusion/issues/11153
278-
fn is_min(agg_expr: &AggregateFunctionExpr) -> bool {
279-
if agg_expr.fun().name().to_lowercase() == "min" {
280-
return true;
281-
}
282-
false
283-
}
284-
285-
// TODO: Move this check into AggregateUDFImpl
286-
// https://github.com/apache/datafusion/issues/11153
287-
fn is_max(agg_expr: &AggregateFunctionExpr) -> bool {
288-
if agg_expr.fun().name().to_lowercase() == "max" {
289-
return true;
290-
}
291-
false
292-
}
293-
294276
// See tests in datafusion/core/tests/physical_optimizer

0 commit comments

Comments
 (0)