Skip to content

Commit b98cad6

Browse files
authored
Fix DISTINCT ON for tables with no columns (ReplaceDistinctWithAggregate: do not fail when on input without columns) (#18133)
Use LIMIT 1 instead of a GROUP BY in this case: if there are results, they are all empty, we can just take the first one We cannot use GROUP BY here because GROUP BY requires at least one variable in the grouping side or one aggregation function - Close #18132
1 parent 28a6854 commit b98cad6

File tree

1 file changed

+39
-4
lines changed

1 file changed

+39
-4
lines changed

datafusion/optimizer/src/replace_distinct_aggregate.rs

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use datafusion_common::tree_node::Transformed;
2525
use datafusion_common::{Column, Result};
2626
use datafusion_expr::expr_rewriter::normalize_cols;
2727
use datafusion_expr::utils::expand_wildcard;
28-
use datafusion_expr::{col, ExprFunctionExt, LogicalPlanBuilder};
28+
use datafusion_expr::{col, lit, ExprFunctionExt, Limit, LogicalPlanBuilder};
2929
use datafusion_expr::{Aggregate, Distinct, DistinctOn, Expr, LogicalPlan};
3030

3131
/// Optimizer that replaces logical [[Distinct]] with a logical [[Aggregate]]
@@ -54,6 +54,17 @@ use datafusion_expr::{Aggregate, Distinct, DistinctOn, Expr, LogicalPlan};
5454
/// )
5555
/// ORDER BY a DESC
5656
/// ```
57+
///
58+
/// In case there are no columns, the [[Distinct]] is replaced by a [[Limit]]
59+
///
60+
/// ```text
61+
/// SELECT DISTINCT * FROM empty_table
62+
/// ```
63+
///
64+
/// Into
65+
/// ```text
66+
/// SELECT * FROM empty_table LIMIT 1
67+
/// ```
5768
#[derive(Default, Debug)]
5869
pub struct ReplaceDistinctWithAggregate {}
5970

@@ -78,6 +89,16 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
7889
LogicalPlan::Distinct(Distinct::All(input)) => {
7990
let group_expr = expand_wildcard(input.schema(), &input, None)?;
8091

92+
if group_expr.is_empty() {
93+
// Special case: there are no columns to group by, so we can't replace it by a group by
94+
// however, we can replace it by LIMIT 1 because there is either no output or a single empty row
95+
return Ok(Transformed::yes(LogicalPlan::Limit(Limit {
96+
skip: None,
97+
fetch: Some(Box::new(lit(1i64))),
98+
input,
99+
})));
100+
}
101+
81102
let field_count = input.schema().fields().len();
82103
for dep in input.schema().functional_dependencies().iter() {
83104
// If distinct is exactly the same with a previous GROUP BY, we can
@@ -184,15 +205,17 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
184205

185206
#[cfg(test)]
186207
mod tests {
187-
use std::sync::Arc;
188-
189208
use crate::assert_optimized_plan_eq_snapshot;
190209
use crate::replace_distinct_aggregate::ReplaceDistinctWithAggregate;
191210
use crate::test::*;
211+
use arrow::datatypes::{Fields, Schema};
212+
use std::sync::Arc;
192213

193214
use crate::OptimizerContext;
194215
use datafusion_common::Result;
195-
use datafusion_expr::{col, logical_plan::builder::LogicalPlanBuilder, Expr};
216+
use datafusion_expr::{
217+
col, logical_plan::builder::LogicalPlanBuilder, table_scan, Expr,
218+
};
196219
use datafusion_functions_aggregate::sum::sum;
197220

198221
macro_rules! assert_optimized_plan_equal {
@@ -274,4 +297,16 @@ mod tests {
274297
TableScan: test
275298
")
276299
}
300+
301+
#[test]
302+
fn use_limit_1_when_no_columns() -> Result<()> {
303+
let plan = table_scan(Some("test"), &Schema::new(Fields::empty()), None)?
304+
.distinct()?
305+
.build()?;
306+
307+
assert_optimized_plan_equal!(plan, @r"
308+
Limit: skip=0, fetch=1
309+
TableScan: test
310+
")
311+
}
277312
}

0 commit comments

Comments
 (0)