@@ -25,8 +25,8 @@ use std::sync::Arc;
2525use super :: dml:: CopyTo ;
2626use super :: DdlStatement ;
2727use crate :: dml:: CopyOptions ;
28- use crate :: expr:: { Alias , Exists , InSubquery , Placeholder } ;
29- use crate :: expr_rewriter:: create_col_from_scalar_expr;
28+ use crate :: expr:: { Alias , Exists , InSubquery , Placeholder , Sort as SortExpr } ;
29+ use crate :: expr_rewriter:: { create_col_from_scalar_expr, normalize_cols } ;
3030use crate :: logical_plan:: display:: { GraphvizVisitor , IndentVisitor } ;
3131use crate :: logical_plan:: extension:: UserDefinedLogicalNode ;
3232use crate :: logical_plan:: { DmlStatement , Statement } ;
@@ -163,7 +163,8 @@ impl LogicalPlan {
163163 } ) => projected_schema,
164164 LogicalPlan :: Projection ( Projection { schema, .. } ) => schema,
165165 LogicalPlan :: Filter ( Filter { input, .. } ) => input. schema ( ) ,
166- LogicalPlan :: Distinct ( Distinct { input } ) => input. schema ( ) ,
166+ LogicalPlan :: Distinct ( Distinct :: All ( input) ) => input. schema ( ) ,
167+ LogicalPlan :: Distinct ( Distinct :: On ( DistinctOn { schema, .. } ) ) => schema,
167168 LogicalPlan :: Window ( Window { schema, .. } ) => schema,
168169 LogicalPlan :: Aggregate ( Aggregate { schema, .. } ) => schema,
169170 LogicalPlan :: Sort ( Sort { input, .. } ) => input. schema ( ) ,
@@ -367,6 +368,16 @@ impl LogicalPlan {
367368 LogicalPlan :: Unnest ( Unnest { column, .. } ) => {
368369 f ( & Expr :: Column ( column. clone ( ) ) )
369370 }
371+ LogicalPlan :: Distinct ( Distinct :: On ( DistinctOn {
372+ on_expr,
373+ select_expr,
374+ sort_expr,
375+ ..
376+ } ) ) => on_expr
377+ . iter ( )
378+ . chain ( select_expr. iter ( ) )
379+ . chain ( sort_expr. clone ( ) . unwrap_or ( vec ! [ ] ) . iter ( ) )
380+ . try_for_each ( f) ,
370381 // plans without expressions
371382 LogicalPlan :: EmptyRelation ( _)
372383 | LogicalPlan :: Subquery ( _)
@@ -377,7 +388,7 @@ impl LogicalPlan {
377388 | LogicalPlan :: Analyze ( _)
378389 | LogicalPlan :: Explain ( _)
379390 | LogicalPlan :: Union ( _)
380- | LogicalPlan :: Distinct ( _ )
391+ | LogicalPlan :: Distinct ( Distinct :: All ( _ ) )
381392 | LogicalPlan :: Dml ( _)
382393 | LogicalPlan :: Ddl ( _)
383394 | LogicalPlan :: Copy ( _)
@@ -405,7 +416,9 @@ impl LogicalPlan {
405416 LogicalPlan :: Union ( Union { inputs, .. } ) => {
406417 inputs. iter ( ) . map ( |arc| arc. as_ref ( ) ) . collect ( )
407418 }
408- LogicalPlan :: Distinct ( Distinct { input } ) => vec ! [ input] ,
419+ LogicalPlan :: Distinct (
420+ Distinct :: All ( input) | Distinct :: On ( DistinctOn { input, .. } ) ,
421+ ) => vec ! [ input] ,
409422 LogicalPlan :: Explain ( explain) => vec ! [ & explain. plan] ,
410423 LogicalPlan :: Analyze ( analyze) => vec ! [ & analyze. input] ,
411424 LogicalPlan :: Dml ( write) => vec ! [ & write. input] ,
@@ -461,8 +474,11 @@ impl LogicalPlan {
461474 Ok ( Some ( agg. group_expr . as_slice ( ) [ 0 ] . clone ( ) ) )
462475 }
463476 }
477+ LogicalPlan :: Distinct ( Distinct :: On ( DistinctOn { select_expr, .. } ) ) => {
478+ Ok ( Some ( select_expr[ 0 ] . clone ( ) ) )
479+ }
464480 LogicalPlan :: Filter ( Filter { input, .. } )
465- | LogicalPlan :: Distinct ( Distinct { input, .. } )
481+ | LogicalPlan :: Distinct ( Distinct :: All ( input) )
466482 | LogicalPlan :: Sort ( Sort { input, .. } )
467483 | LogicalPlan :: Limit ( Limit { input, .. } )
468484 | LogicalPlan :: Repartition ( Repartition { input, .. } )
@@ -823,10 +839,29 @@ impl LogicalPlan {
823839 inputs : inputs. iter ( ) . cloned ( ) . map ( Arc :: new) . collect ( ) ,
824840 schema : schema. clone ( ) ,
825841 } ) ) ,
826- LogicalPlan :: Distinct ( Distinct { .. } ) => {
827- Ok ( LogicalPlan :: Distinct ( Distinct {
828- input : Arc :: new ( inputs[ 0 ] . clone ( ) ) ,
829- } ) )
842+ LogicalPlan :: Distinct ( distinct) => {
843+ let distinct = match distinct {
844+ Distinct :: All ( _) => Distinct :: All ( Arc :: new ( inputs[ 0 ] . clone ( ) ) ) ,
845+ Distinct :: On ( DistinctOn {
846+ on_expr,
847+ select_expr,
848+ ..
849+ } ) => {
850+ let sort_expr = expr. split_off ( on_expr. len ( ) + select_expr. len ( ) ) ;
851+ let select_expr = expr. split_off ( on_expr. len ( ) ) ;
852+ Distinct :: On ( DistinctOn :: try_new (
853+ expr,
854+ select_expr,
855+ if !sort_expr. is_empty ( ) {
856+ Some ( sort_expr)
857+ } else {
858+ None
859+ } ,
860+ Arc :: new ( inputs[ 0 ] . clone ( ) ) ,
861+ ) ?)
862+ }
863+ } ;
864+ Ok ( LogicalPlan :: Distinct ( distinct) )
830865 }
831866 LogicalPlan :: Analyze ( a) => {
832867 assert ! ( expr. is_empty( ) ) ;
@@ -1064,7 +1099,9 @@ impl LogicalPlan {
10641099 LogicalPlan :: Subquery ( _) => None ,
10651100 LogicalPlan :: SubqueryAlias ( SubqueryAlias { input, .. } ) => input. max_rows ( ) ,
10661101 LogicalPlan :: Limit ( Limit { fetch, .. } ) => * fetch,
1067- LogicalPlan :: Distinct ( Distinct { input } ) => input. max_rows ( ) ,
1102+ LogicalPlan :: Distinct (
1103+ Distinct :: All ( input) | Distinct :: On ( DistinctOn { input, .. } ) ,
1104+ ) => input. max_rows ( ) ,
10681105 LogicalPlan :: Values ( v) => Some ( v. values . len ( ) ) ,
10691106 LogicalPlan :: Unnest ( _) => None ,
10701107 LogicalPlan :: Ddl ( _)
@@ -1667,9 +1704,21 @@ impl LogicalPlan {
16671704 LogicalPlan :: Statement ( statement) => {
16681705 write ! ( f, "{}" , statement. display( ) )
16691706 }
1670- LogicalPlan :: Distinct ( Distinct { .. } ) => {
1671- write ! ( f, "Distinct:" )
1672- }
1707+ LogicalPlan :: Distinct ( distinct) => match distinct {
1708+ Distinct :: All ( _) => write ! ( f, "Distinct:" ) ,
1709+ Distinct :: On ( DistinctOn {
1710+ on_expr,
1711+ select_expr,
1712+ sort_expr,
1713+ ..
1714+ } ) => write ! (
1715+ f,
1716+ "DistinctOn: on_expr=[[{}]], select_expr=[[{}]], sort_expr=[[{}]]" ,
1717+ expr_vec_fmt!( on_expr) ,
1718+ expr_vec_fmt!( select_expr) ,
1719+ if let Some ( sort_expr) = sort_expr { expr_vec_fmt!( sort_expr) } else { "" . to_string( ) } ,
1720+ ) ,
1721+ } ,
16731722 LogicalPlan :: Explain { .. } => write ! ( f, "Explain" ) ,
16741723 LogicalPlan :: Analyze { .. } => write ! ( f, "Analyze" ) ,
16751724 LogicalPlan :: Union ( _) => write ! ( f, "Union" ) ,
@@ -2132,9 +2181,93 @@ pub struct Limit {
21322181
21332182/// Removes duplicate rows from the input
21342183#[ derive( Clone , PartialEq , Eq , Hash ) ]
2135- pub struct Distinct {
2184+ pub enum Distinct {
2185+ /// Plain `DISTINCT` referencing all selection expressions
2186+ All ( Arc < LogicalPlan > ) ,
2187+ /// The `Postgres` addition, allowing separate control over DISTINCT'd and selected columns
2188+ On ( DistinctOn ) ,
2189+ }
2190+
2191+ /// Removes duplicate rows from the input
2192+ #[ derive( Clone , PartialEq , Eq , Hash ) ]
2193+ pub struct DistinctOn {
2194+ /// The `DISTINCT ON` clause expression list
2195+ pub on_expr : Vec < Expr > ,
2196+ /// The selected projection expression list
2197+ pub select_expr : Vec < Expr > ,
2198+ /// The `ORDER BY` clause, whose initial expressions must match those of the `ON` clause when
2199+ /// present. Note that those matching expressions actually wrap the `ON` expressions with
2200+ /// additional info pertaining to the sorting procedure (i.e. ASC/DESC, and NULLS FIRST/LAST).
2201+ pub sort_expr : Option < Vec < Expr > > ,
21362202 /// The logical plan that is being DISTINCT'd
21372203 pub input : Arc < LogicalPlan > ,
2204+ /// The schema description of the DISTINCT ON output
2205+ pub schema : DFSchemaRef ,
2206+ }
2207+
2208+ impl DistinctOn {
2209+ /// Create a new `DistinctOn` struct.
2210+ pub fn try_new (
2211+ on_expr : Vec < Expr > ,
2212+ select_expr : Vec < Expr > ,
2213+ sort_expr : Option < Vec < Expr > > ,
2214+ input : Arc < LogicalPlan > ,
2215+ ) -> Result < Self > {
2216+ if on_expr. is_empty ( ) {
2217+ return plan_err ! ( "No `ON` expressions provided" ) ;
2218+ }
2219+
2220+ let on_expr = normalize_cols ( on_expr, input. as_ref ( ) ) ?;
2221+
2222+ let schema = DFSchema :: new_with_metadata (
2223+ exprlist_to_fields ( & select_expr, & input) ?,
2224+ input. schema ( ) . metadata ( ) . clone ( ) ,
2225+ ) ?;
2226+
2227+ let mut distinct_on = DistinctOn {
2228+ on_expr,
2229+ select_expr,
2230+ sort_expr : None ,
2231+ input,
2232+ schema : Arc :: new ( schema) ,
2233+ } ;
2234+
2235+ if let Some ( sort_expr) = sort_expr {
2236+ distinct_on = distinct_on. with_sort_expr ( sort_expr) ?;
2237+ }
2238+
2239+ Ok ( distinct_on)
2240+ }
2241+
2242+ /// Try to update `self` with a new sort expressions.
2243+ ///
2244+ /// Validates that the sort expressions are a super-set of the `ON` expressions.
2245+ pub fn with_sort_expr ( mut self , sort_expr : Vec < Expr > ) -> Result < Self > {
2246+ let sort_expr = normalize_cols ( sort_expr, self . input . as_ref ( ) ) ?;
2247+
2248+ // Check that the left-most sort expressions are the same as the `ON` expressions.
2249+ let mut matched = true ;
2250+ for ( on, sort) in self . on_expr . iter ( ) . zip ( sort_expr. iter ( ) ) {
2251+ match sort {
2252+ Expr :: Sort ( SortExpr { expr, .. } ) => {
2253+ if on != & * * expr {
2254+ matched = false ;
2255+ break ;
2256+ }
2257+ }
2258+ _ => return plan_err ! ( "Not a sort expression: {sort}" ) ,
2259+ }
2260+ }
2261+
2262+ if self . on_expr . len ( ) > sort_expr. len ( ) || !matched {
2263+ return plan_err ! (
2264+ "SELECT DISTINCT ON expressions must match initial ORDER BY expressions"
2265+ ) ;
2266+ }
2267+
2268+ self . sort_expr = Some ( sort_expr) ;
2269+ Ok ( self )
2270+ }
21382271}
21392272
21402273/// Aggregates its input based on a set of grouping and aggregate
0 commit comments