@@ -25,7 +25,7 @@ use datafusion_common::tree_node::Transformed;
2525use datafusion_common:: { Column , Result } ;
2626use datafusion_expr:: expr_rewriter:: normalize_cols;
2727use datafusion_expr:: utils:: expand_wildcard;
28- use datafusion_expr:: { col, ExprFunctionExt , LogicalPlanBuilder } ;
28+ use datafusion_expr:: { col, lit , ExprFunctionExt , Limit , LogicalPlanBuilder } ;
2929use datafusion_expr:: { Aggregate , Distinct , DistinctOn , Expr , LogicalPlan } ;
3030
3131/// Optimizer that replaces logical [[Distinct]] with a logical [[Aggregate]]
@@ -54,6 +54,17 @@ use datafusion_expr::{Aggregate, Distinct, DistinctOn, Expr, LogicalPlan};
5454/// )
5555/// ORDER BY a DESC
5656/// ```
57+ ///
58+ /// In case there are no columns, the [[Distinct]] is replaced by a [[Limit]]
59+ ///
60+ /// ```text
61+ /// SELECT DISTINCT * FROM empty_table
62+ /// ```
63+ ///
64+ /// Into
65+ /// ```text
66+ /// SELECT * FROM empty_table LIMIT 1
67+ /// ```
5768#[ derive( Default , Debug ) ]
5869pub struct ReplaceDistinctWithAggregate { }
5970
@@ -78,6 +89,16 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
7889 LogicalPlan :: Distinct ( Distinct :: All ( input) ) => {
7990 let group_expr = expand_wildcard ( input. schema ( ) , & input, None ) ?;
8091
92+ if group_expr. is_empty ( ) {
93+ // Special case: there are no columns to group by, so we can't replace it by a group by
94+ // however, we can replace it by LIMIT 1 because there is either no output or a single empty row
95+ return Ok ( Transformed :: yes ( LogicalPlan :: Limit ( Limit {
96+ skip : None ,
97+ fetch : Some ( Box :: new ( lit ( 1i64 ) ) ) ,
98+ input,
99+ } ) ) ) ;
100+ }
101+
81102 let field_count = input. schema ( ) . fields ( ) . len ( ) ;
82103 for dep in input. schema ( ) . functional_dependencies ( ) . iter ( ) {
83104 // If distinct is exactly the same with a previous GROUP BY, we can
@@ -184,15 +205,17 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
184205
185206#[ cfg( test) ]
186207mod tests {
187- use std:: sync:: Arc ;
188-
189208 use crate :: assert_optimized_plan_eq_snapshot;
190209 use crate :: replace_distinct_aggregate:: ReplaceDistinctWithAggregate ;
191210 use crate :: test:: * ;
211+ use arrow:: datatypes:: { Fields , Schema } ;
212+ use std:: sync:: Arc ;
192213
193214 use crate :: OptimizerContext ;
194215 use datafusion_common:: Result ;
195- use datafusion_expr:: { col, logical_plan:: builder:: LogicalPlanBuilder , Expr } ;
216+ use datafusion_expr:: {
217+ col, logical_plan:: builder:: LogicalPlanBuilder , table_scan, Expr ,
218+ } ;
196219 use datafusion_functions_aggregate:: sum:: sum;
197220
198221 macro_rules! assert_optimized_plan_equal {
@@ -274,4 +297,16 @@ mod tests {
274297 TableScan: test
275298 " )
276299 }
300+
301+ #[ test]
302+ fn use_limit_1_when_no_columns ( ) -> Result < ( ) > {
303+ let plan = table_scan ( Some ( "test" ) , & Schema :: new ( Fields :: empty ( ) ) , None ) ?
304+ . distinct ( ) ?
305+ . build ( ) ?;
306+
307+ assert_optimized_plan_equal ! ( plan, @r"
308+ Limit: skip=0, fetch=1
309+ TableScan: test
310+ " )
311+ }
277312}
0 commit comments