Skip to content

Commit

Permalink
Remove redundant upper case aliases for median, first_value and `…
Browse files Browse the repository at this point in the history
…last_value` (apache#10696)

* change udaf name to lowercase

* enhance test for duplicate function name

* fix test

* fix tests

* remove redundant alias field
  • Loading branch information
goldmedal authored and findepi committed Jul 16, 2024
1 parent b9b86c3 commit ed41ea9
Show file tree
Hide file tree
Showing 10 changed files with 124 additions and 130 deletions.
12 changes: 4 additions & 8 deletions datafusion/functions-aggregate/src/first_last.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ make_udaf_expr_and_func!(

pub struct FirstValue {
signature: Signature,
aliases: Vec<String>,
requirement_satisfied: bool,
}

Expand All @@ -72,7 +71,6 @@ impl Default for FirstValue {
impl FirstValue {
pub fn new() -> Self {
Self {
aliases: vec![String::from("first_value")],
signature: Signature::one_of(
vec![
// TODO: we can introduce more strict signature that only numeric of array types are allowed
Expand All @@ -97,7 +95,7 @@ impl AggregateUDFImpl for FirstValue {
}

fn name(&self) -> &str {
"FIRST_VALUE"
"first_value"
}

fn signature(&self) -> &Signature {
Expand Down Expand Up @@ -144,7 +142,7 @@ impl AggregateUDFImpl for FirstValue {
}

fn aliases(&self) -> &[String] {
&self.aliases
&[]
}

fn with_beneficial_ordering(
Expand Down Expand Up @@ -349,7 +347,6 @@ make_udaf_expr_and_func!(

pub struct LastValue {
signature: Signature,
aliases: Vec<String>,
requirement_satisfied: bool,
}

Expand All @@ -372,7 +369,6 @@ impl Default for LastValue {
impl LastValue {
pub fn new() -> Self {
Self {
aliases: vec![String::from("last_value")],
signature: Signature::one_of(
vec![
// TODO: we can introduce more strict signature that only numeric of array types are allowed
Expand All @@ -397,7 +393,7 @@ impl AggregateUDFImpl for LastValue {
}

fn name(&self) -> &str {
"LAST_VALUE"
"last_value"
}

fn signature(&self) -> &Signature {
Expand Down Expand Up @@ -449,7 +445,7 @@ impl AggregateUDFImpl for LastValue {
}

fn aliases(&self) -> &[String] {
&self.aliases
&[]
}

fn with_beneficial_ordering(
Expand Down
4 changes: 2 additions & 2 deletions datafusion/functions-aggregate/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,13 +109,13 @@ mod tests {
let mut names = HashSet::new();
for func in all_default_aggregate_functions() {
assert!(
names.insert(func.name().to_string()),
names.insert(func.name().to_string().to_lowercase()),
"duplicate function name: {}",
func.name()
);
for alias in func.aliases() {
assert!(
names.insert(alias.to_string()),
names.insert(alias.to_string().to_lowercase()),
"duplicate function name: {}",
alias
);
Expand Down
6 changes: 2 additions & 4 deletions datafusion/functions-aggregate/src/median.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ make_udaf_expr_and_func!(
/// result, but if cardinality is low then memory usage will also be lower.
pub struct Median {
signature: Signature,
aliases: Vec<String>,
}

impl Debug for Median {
Expand All @@ -79,7 +78,6 @@ impl Default for Median {
impl Median {
pub fn new() -> Self {
Self {
aliases: vec!["median".to_string()],
signature: Signature::numeric(1, Volatility::Immutable),
}
}
Expand All @@ -91,7 +89,7 @@ impl AggregateUDFImpl for Median {
}

fn name(&self) -> &str {
"MEDIAN"
"median"
}

fn signature(&self) -> &Signature {
Expand Down Expand Up @@ -152,7 +150,7 @@ impl AggregateUDFImpl for Median {
}

fn aliases(&self) -> &[String] {
&self.aliases
&[]
}
}

Expand Down
4 changes: 2 additions & 2 deletions datafusion/functions-array/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,13 +168,13 @@ mod tests {
let mut names = HashSet::new();
for func in all_default_array_functions() {
assert!(
names.insert(func.name().to_string()),
names.insert(func.name().to_string().to_lowercase()),
"duplicate function name: {}",
func.name()
);
for alias in func.aliases() {
assert!(
names.insert(alias.to_string()),
names.insert(alias.to_string().to_lowercase()),
"duplicate function name: {}",
alias
);
Expand Down
4 changes: 2 additions & 2 deletions datafusion/functions/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,13 +191,13 @@ mod tests {
let mut names = HashSet::new();
for func in all_default_functions() {
assert!(
names.insert(func.name().to_string()),
names.insert(func.name().to_string().to_lowercase()),
"duplicate function name: {}",
func.name()
);
for alias in func.aliases() {
assert!(
names.insert(alias.to_string()),
names.insert(alias.to_string().to_lowercase()),
"duplicate function name: {}",
alias
);
Expand Down
4 changes: 2 additions & 2 deletions datafusion/optimizer/src/replace_distinct_aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,9 @@ mod tests {
)?
.build()?;

let expected = "Projection: FIRST_VALUE(test.b) ORDER BY [test.a DESC NULLS FIRST, test.c ASC NULLS LAST] AS b\
let expected = "Projection: first_value(test.b) ORDER BY [test.a DESC NULLS FIRST, test.c ASC NULLS LAST] AS b\
\n Sort: test.a DESC NULLS FIRST\
\n Aggregate: groupBy=[[test.a]], aggr=[[FIRST_VALUE(test.b) ORDER BY [test.a DESC NULLS FIRST, test.c ASC NULLS LAST]]]\
\n Aggregate: groupBy=[[test.a]], aggr=[[first_value(test.b) ORDER BY [test.a DESC NULLS FIRST, test.c ASC NULLS LAST]]]\
\n TableScan: test";

assert_optimized_plan_eq(
Expand Down
22 changes: 11 additions & 11 deletions datafusion/sqllogictest/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -879,15 +879,15 @@ query TT
explain select median(distinct c) from t;
----
logical_plan
01)Projection: MEDIAN(alias1) AS MEDIAN(DISTINCT t.c)
02)--Aggregate: groupBy=[[]], aggr=[[MEDIAN(alias1)]]
01)Projection: median(alias1) AS median(DISTINCT t.c)
02)--Aggregate: groupBy=[[]], aggr=[[median(alias1)]]
03)----Aggregate: groupBy=[[t.c AS alias1]], aggr=[[]]
04)------TableScan: t projection=[c]
physical_plan
01)ProjectionExec: expr=[MEDIAN(alias1)@0 as MEDIAN(DISTINCT t.c)]
02)--AggregateExec: mode=Final, gby=[], aggr=[MEDIAN(alias1)]
01)ProjectionExec: expr=[median(alias1)@0 as median(DISTINCT t.c)]
02)--AggregateExec: mode=Final, gby=[], aggr=[median(alias1)]
03)----CoalescePartitionsExec
04)------AggregateExec: mode=Partial, gby=[], aggr=[MEDIAN(alias1)]
04)------AggregateExec: mode=Partial, gby=[], aggr=[median(alias1)]
05)--------AggregateExec: mode=FinalPartitioned, gby=[alias1@0 as alias1], aggr=[]
06)----------CoalesceBatchesExec: target_batch_size=8192
07)------------RepartitionExec: partitioning=Hash([alias1@0], 4), input_partitions=4
Expand Down Expand Up @@ -5024,12 +5024,12 @@ query TT
explain select first_value(c1 order by c3 desc) from convert_first_last_table;
----
logical_plan
01)Aggregate: groupBy=[[]], aggr=[[FIRST_VALUE(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c3 DESC NULLS FIRST]]]
01)Aggregate: groupBy=[[]], aggr=[[first_value(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c3 DESC NULLS FIRST]]]
02)--TableScan: convert_first_last_table projection=[c1, c3]
physical_plan
01)AggregateExec: mode=Final, gby=[], aggr=[FIRST_VALUE(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c3 DESC NULLS FIRST]]
01)AggregateExec: mode=Final, gby=[], aggr=[first_value(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c3 DESC NULLS FIRST]]
02)--CoalescePartitionsExec
03)----AggregateExec: mode=Partial, gby=[], aggr=[LAST_VALUE(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c3 ASC NULLS LAST]]
03)----AggregateExec: mode=Partial, gby=[], aggr=[last_value(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c3 ASC NULLS LAST]]
04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/convert_first_last.csv]]}, projection=[c1, c3], output_orderings=[[c1@0 ASC NULLS LAST], [c3@1 ASC NULLS LAST]], has_header=true

Expand All @@ -5038,11 +5038,11 @@ query TT
explain select last_value(c1 order by c2 asc) from convert_first_last_table;
----
logical_plan
01)Aggregate: groupBy=[[]], aggr=[[LAST_VALUE(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c2 ASC NULLS LAST]]]
01)Aggregate: groupBy=[[]], aggr=[[last_value(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c2 ASC NULLS LAST]]]
02)--TableScan: convert_first_last_table projection=[c1, c2]
physical_plan
01)AggregateExec: mode=Final, gby=[], aggr=[LAST_VALUE(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c2 ASC NULLS LAST]]
01)AggregateExec: mode=Final, gby=[], aggr=[last_value(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c2 ASC NULLS LAST]]
02)--CoalescePartitionsExec
03)----AggregateExec: mode=Partial, gby=[], aggr=[FIRST_VALUE(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c2 DESC NULLS FIRST]]
03)----AggregateExec: mode=Partial, gby=[], aggr=[first_value(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c2 DESC NULLS FIRST]]
04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/convert_first_last.csv]]}, projection=[c1, c2], output_orderings=[[c1@0 ASC NULLS LAST], [c2@1 DESC]], has_header=true
10 changes: 5 additions & 5 deletions datafusion/sqllogictest/test_files/distinct_on.slt
Original file line number Diff line number Diff line change
Expand Up @@ -89,18 +89,18 @@ query TT
EXPLAIN SELECT DISTINCT ON (c1) c3, c2 FROM aggregate_test_100 ORDER BY c1, c3;
----
logical_plan
01)Projection: FIRST_VALUE(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST] AS c3, FIRST_VALUE(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST] AS c2
01)Projection: first_value(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST] AS c3, first_value(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST] AS c2
02)--Sort: aggregate_test_100.c1 ASC NULLS LAST
03)----Aggregate: groupBy=[[aggregate_test_100.c1]], aggr=[[FIRST_VALUE(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST], FIRST_VALUE(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]]]
03)----Aggregate: groupBy=[[aggregate_test_100.c1]], aggr=[[first_value(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST], first_value(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]]]
04)------TableScan: aggregate_test_100 projection=[c1, c2, c3]
physical_plan
01)ProjectionExec: expr=[FIRST_VALUE(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]@1 as c3, FIRST_VALUE(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]@2 as c2]
01)ProjectionExec: expr=[first_value(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]@1 as c3, first_value(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]@2 as c2]
02)--SortPreservingMergeExec: [c1@0 ASC NULLS LAST]
03)----SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true]
04)------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[FIRST_VALUE(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST], FIRST_VALUE(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]]
04)------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[first_value(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST], first_value(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]]
05)--------CoalesceBatchesExec: target_batch_size=8192
06)----------RepartitionExec: partitioning=Hash([c1@0], 4), input_partitions=4
07)------------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[FIRST_VALUE(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST], FIRST_VALUE(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]]
07)------------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[first_value(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST], first_value(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]]
08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
09)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3], has_header=true

Expand Down
Loading

0 comments on commit ed41ea9

Please sign in to comment.