Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions datafusion/functions-aggregate/src/string_agg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,10 @@ impl AggregateUDFImpl for StringAgg {
)))
}

fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
datafusion_expr::ReversedUDAF::Reversed(string_agg_udaf())
}

fn documentation(&self) -> Option<&Documentation> {
self.doc()
}
Expand Down
53 changes: 52 additions & 1 deletion datafusion/sqllogictest/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -6203,6 +6203,58 @@ from t;
----
a,c,d,b

# Test explain / reverse_expr for string_agg
query TT
explain select string_agg(k, ',' order by v) from t;
----
logical_plan
01)Aggregate: groupBy=[[]], aggr=[[string_agg(t.k, Utf8(",")) ORDER BY [t.v ASC NULLS LAST]]]
02)--TableScan: t projection=[k, v]
physical_plan
01)AggregateExec: mode=Single, gby=[], aggr=[string_agg(t.k,Utf8(",")) ORDER BY [t.v ASC NULLS LAST]]
02)--SortExec: expr=[v@1 ASC NULLS LAST], preserve_partitioning=[false]
03)----DataSourceExec: partitions=1, partition_sizes=[1]

query T
select string_agg(k, ',' order by v) from t;
----
c,a,b,d

query TT
explain select string_agg(k, ',' order by v desc) from t;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is single aggregation enough to exercise the reverse_expr API?
Or maybe we need two functions, one using reverse ordering of the other?

I checked that the test does not pass without changes in string_agg.rs and passes with them.
This is likely related to

if let Some(reverse_aggr_expr) = aggr_expr.reverse_expr() {
call site, but it looks it should never matter when there is only one function.

What am I missing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without reverse_expr the requirement in that function will stay None, which means get_finer_aggregate_exprs_requirement returns an empty list. Maybe @berkaysynnada can clarify the behavior.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we hitting this loop exit path as expected?

// If the common requirement is finer than the current expression's,
// we can skip this expression. If the latter is finer than the former,
// adopt it if it is satisfied by the equivalence properties. Otherwise,
// defer the analysis to the reverse expression.
let forward_finer = determine_finer(&requirement, &aggr_req);
if let Some(finer) = forward_finer {
if !finer {
continue;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't look like it, determine_finer returns true. Should it exit there? If it did I think we would have the same problem, i.e., requirement would remain None.

----
logical_plan
01)Aggregate: groupBy=[[]], aggr=[[string_agg(t.k, Utf8(",")) ORDER BY [t.v DESC NULLS FIRST]]]
02)--TableScan: t projection=[k, v]
physical_plan
01)AggregateExec: mode=Single, gby=[], aggr=[string_agg(t.k,Utf8(",")) ORDER BY [t.v DESC NULLS FIRST]]
02)--SortExec: expr=[v@1 DESC], preserve_partitioning=[false]
03)----DataSourceExec: partitions=1, partition_sizes=[1]

query T
select string_agg(k, ',' order by v desc) from t;
----
d,b,a,c

# Call string_agg with both ASC and DESC orderings, and expect only one sort
# (because the aggregate can handle reversed inputs)
query TT
explain select string_agg(k, ',' order by v asc), string_agg(k, ',' order by v desc) from t;
----
logical_plan
01)Aggregate: groupBy=[[]], aggr=[[string_agg(t.k, Utf8(",")) ORDER BY [t.v ASC NULLS LAST], string_agg(t.k, Utf8(",")) ORDER BY [t.v DESC NULLS FIRST]]]
02)--TableScan: t projection=[k, v]
physical_plan
01)AggregateExec: mode=Single, gby=[], aggr=[string_agg(t.k,Utf8(",")) ORDER BY [t.v ASC NULLS LAST], string_agg(t.k,Utf8(",")) ORDER BY [t.v DESC NULLS FIRST]]
02)--SortExec: expr=[v@1 ASC NULLS LAST], preserve_partitioning=[false]
03)----DataSourceExec: partitions=1, partition_sizes=[1]

query TT
select string_agg(k, ',' order by v asc), string_agg(k, ',' order by v desc) from t;
----
c,a,b,d d,b,a,c


statement ok
drop table t;

Expand Down Expand Up @@ -7444,4 +7496,3 @@ NULL NULL

statement ok
drop table distinct_avg;