Remove redundant upper case aliases for median, first_value and `…

…last_value` (apache#10696) * change udaf name to lowercase * enhance test for duplicate function name * fix test * fix tests * remove redundant alias field
findepi · Jul 16, 2024 · ed41ea9 · ed41ea9
1 parent b9b86c3
commit ed41ea9
Show file tree

Hide file tree

Showing 10 changed files with 124 additions and 130 deletions.
diff --git a/datafusion/functions-aggregate/src/first_last.rs b/datafusion/functions-aggregate/src/first_last.rs
@@ -49,7 +49,6 @@ make_udaf_expr_and_func!(
 
 pub struct FirstValue {
     signature: Signature,
-    aliases: Vec<String>,
     requirement_satisfied: bool,
 }
 
@@ -72,7 +71,6 @@ impl Default for FirstValue {
 impl FirstValue {
     pub fn new() -> Self {
         Self {
-            aliases: vec![String::from("first_value")],
             signature: Signature::one_of(
                 vec![
                     // TODO: we can introduce more strict signature that only numeric of array types are allowed
@@ -97,7 +95,7 @@ impl AggregateUDFImpl for FirstValue {
     }
 
     fn name(&self) -> &str {
-        "FIRST_VALUE"
+        "first_value"
     }
 
     fn signature(&self) -> &Signature {
@@ -144,7 +142,7 @@ impl AggregateUDFImpl for FirstValue {
     }
 
     fn aliases(&self) -> &[String] {
-        &self.aliases
+        &[]
     }
 
     fn with_beneficial_ordering(
@@ -349,7 +347,6 @@ make_udaf_expr_and_func!(
 
 pub struct LastValue {
     signature: Signature,
-    aliases: Vec<String>,
     requirement_satisfied: bool,
 }
 
@@ -372,7 +369,6 @@ impl Default for LastValue {
 impl LastValue {
     pub fn new() -> Self {
         Self {
-            aliases: vec![String::from("last_value")],
             signature: Signature::one_of(
                 vec![
                     // TODO: we can introduce more strict signature that only numeric of array types are allowed
@@ -397,7 +393,7 @@ impl AggregateUDFImpl for LastValue {
     }
 
     fn name(&self) -> &str {
-        "LAST_VALUE"
+        "last_value"
     }
 
     fn signature(&self) -> &Signature {
@@ -449,7 +445,7 @@ impl AggregateUDFImpl for LastValue {
     }
 
     fn aliases(&self) -> &[String] {
-        &self.aliases
+        &[]
     }
 
     fn with_beneficial_ordering(

diff --git a/datafusion/functions-aggregate/src/lib.rs b/datafusion/functions-aggregate/src/lib.rs
@@ -109,13 +109,13 @@ mod tests {
         let mut names = HashSet::new();
         for func in all_default_aggregate_functions() {
             assert!(
-                names.insert(func.name().to_string()),
+                names.insert(func.name().to_string().to_lowercase()),
                 "duplicate function name: {}",
                 func.name()
             );
             for alias in func.aliases() {
                 assert!(
-                    names.insert(alias.to_string()),
+                    names.insert(alias.to_string().to_lowercase()),
                     "duplicate function name: {}",
                     alias
                 );

diff --git a/datafusion/functions-aggregate/src/median.rs b/datafusion/functions-aggregate/src/median.rs
@@ -58,7 +58,6 @@ make_udaf_expr_and_func!(
 /// result, but if cardinality is low then memory usage will also be lower.
 pub struct Median {
     signature: Signature,
-    aliases: Vec<String>,
 }
 
 impl Debug for Median {
@@ -79,7 +78,6 @@ impl Default for Median {
 impl Median {
     pub fn new() -> Self {
         Self {
-            aliases: vec!["median".to_string()],
             signature: Signature::numeric(1, Volatility::Immutable),
         }
     }
@@ -91,7 +89,7 @@ impl AggregateUDFImpl for Median {
     }
 
     fn name(&self) -> &str {
-        "MEDIAN"
+        "median"
     }
 
     fn signature(&self) -> &Signature {
@@ -152,7 +150,7 @@ impl AggregateUDFImpl for Median {
     }
 
     fn aliases(&self) -> &[String] {
-        &self.aliases
+        &[]
     }
 }
 

diff --git a/datafusion/functions-array/src/lib.rs b/datafusion/functions-array/src/lib.rs
@@ -168,13 +168,13 @@ mod tests {
         let mut names = HashSet::new();
         for func in all_default_array_functions() {
             assert!(
-                names.insert(func.name().to_string()),
+                names.insert(func.name().to_string().to_lowercase()),
                 "duplicate function name: {}",
                 func.name()
             );
             for alias in func.aliases() {
                 assert!(
-                    names.insert(alias.to_string()),
+                    names.insert(alias.to_string().to_lowercase()),
                     "duplicate function name: {}",
                     alias
                 );

diff --git a/datafusion/functions/src/lib.rs b/datafusion/functions/src/lib.rs
@@ -191,13 +191,13 @@ mod tests {
         let mut names = HashSet::new();
         for func in all_default_functions() {
             assert!(
-                names.insert(func.name().to_string()),
+                names.insert(func.name().to_string().to_lowercase()),
                 "duplicate function name: {}",
                 func.name()
             );
             for alias in func.aliases() {
                 assert!(
-                    names.insert(alias.to_string()),
+                    names.insert(alias.to_string().to_lowercase()),
                     "duplicate function name: {}",
                     alias
                 );

diff --git a/datafusion/optimizer/src/replace_distinct_aggregate.rs b/datafusion/optimizer/src/replace_distinct_aggregate.rs
@@ -201,9 +201,9 @@ mod tests {
             )?
             .build()?;
 
-        let expected = "Projection: FIRST_VALUE(test.b) ORDER BY [test.a DESC NULLS FIRST, test.c ASC NULLS LAST] AS b\
+        let expected = "Projection: first_value(test.b) ORDER BY [test.a DESC NULLS FIRST, test.c ASC NULLS LAST] AS b\
         \n  Sort: test.a DESC NULLS FIRST\
-        \n    Aggregate: groupBy=[[test.a]], aggr=[[FIRST_VALUE(test.b) ORDER BY [test.a DESC NULLS FIRST, test.c ASC NULLS LAST]]]\
+        \n    Aggregate: groupBy=[[test.a]], aggr=[[first_value(test.b) ORDER BY [test.a DESC NULLS FIRST, test.c ASC NULLS LAST]]]\
         \n      TableScan: test";
 
         assert_optimized_plan_eq(

diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -879,15 +879,15 @@ query TT
 explain select median(distinct c) from t;
 ----
 logical_plan
-01)Projection: MEDIAN(alias1) AS MEDIAN(DISTINCT t.c)
-02)--Aggregate: groupBy=[[]], aggr=[[MEDIAN(alias1)]]
+01)Projection: median(alias1) AS median(DISTINCT t.c)
+02)--Aggregate: groupBy=[[]], aggr=[[median(alias1)]]
 03)----Aggregate: groupBy=[[t.c AS alias1]], aggr=[[]]
 04)------TableScan: t projection=[c]
 physical_plan
-01)ProjectionExec: expr=[MEDIAN(alias1)@0 as MEDIAN(DISTINCT t.c)]
-02)--AggregateExec: mode=Final, gby=[], aggr=[MEDIAN(alias1)]
+01)ProjectionExec: expr=[median(alias1)@0 as median(DISTINCT t.c)]
+02)--AggregateExec: mode=Final, gby=[], aggr=[median(alias1)]
 03)----CoalescePartitionsExec
-04)------AggregateExec: mode=Partial, gby=[], aggr=[MEDIAN(alias1)]
+04)------AggregateExec: mode=Partial, gby=[], aggr=[median(alias1)]
 05)--------AggregateExec: mode=FinalPartitioned, gby=[alias1@0 as alias1], aggr=[]
 06)----------CoalesceBatchesExec: target_batch_size=8192
 07)------------RepartitionExec: partitioning=Hash([alias1@0], 4), input_partitions=4
@@ -5024,12 +5024,12 @@ query TT
 explain select first_value(c1 order by c3 desc) from convert_first_last_table;
 ----
 logical_plan
-01)Aggregate: groupBy=[[]], aggr=[[FIRST_VALUE(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c3 DESC NULLS FIRST]]]
+01)Aggregate: groupBy=[[]], aggr=[[first_value(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c3 DESC NULLS FIRST]]]
 02)--TableScan: convert_first_last_table projection=[c1, c3]
 physical_plan
-01)AggregateExec: mode=Final, gby=[], aggr=[FIRST_VALUE(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c3 DESC NULLS FIRST]]
+01)AggregateExec: mode=Final, gby=[], aggr=[first_value(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c3 DESC NULLS FIRST]]
 02)--CoalescePartitionsExec
-03)----AggregateExec: mode=Partial, gby=[], aggr=[LAST_VALUE(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c3 ASC NULLS LAST]]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[last_value(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c3 ASC NULLS LAST]]
 04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/convert_first_last.csv]]}, projection=[c1, c3], output_orderings=[[c1@0 ASC NULLS LAST], [c3@1 ASC NULLS LAST]], has_header=true
 
@@ -5038,11 +5038,11 @@ query TT
 explain select last_value(c1 order by c2 asc) from convert_first_last_table;
 ----
 logical_plan
-01)Aggregate: groupBy=[[]], aggr=[[LAST_VALUE(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c2 ASC NULLS LAST]]]
+01)Aggregate: groupBy=[[]], aggr=[[last_value(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c2 ASC NULLS LAST]]]
 02)--TableScan: convert_first_last_table projection=[c1, c2]
 physical_plan
-01)AggregateExec: mode=Final, gby=[], aggr=[LAST_VALUE(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c2 ASC NULLS LAST]]
+01)AggregateExec: mode=Final, gby=[], aggr=[last_value(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c2 ASC NULLS LAST]]
 02)--CoalescePartitionsExec
-03)----AggregateExec: mode=Partial, gby=[], aggr=[FIRST_VALUE(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c2 DESC NULLS FIRST]]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[first_value(convert_first_last_table.c1) ORDER BY [convert_first_last_table.c2 DESC NULLS FIRST]]
 04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/convert_first_last.csv]]}, projection=[c1, c2], output_orderings=[[c1@0 ASC NULLS LAST], [c2@1 DESC]], has_header=true
diff --git a/datafusion/sqllogictest/test_files/distinct_on.slt b/datafusion/sqllogictest/test_files/distinct_on.slt
@@ -89,18 +89,18 @@ query TT
 EXPLAIN SELECT DISTINCT ON (c1) c3, c2 FROM aggregate_test_100 ORDER BY c1, c3;
 ----
 logical_plan
-01)Projection: FIRST_VALUE(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST] AS c3, FIRST_VALUE(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST] AS c2
+01)Projection: first_value(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST] AS c3, first_value(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST] AS c2
 02)--Sort: aggregate_test_100.c1 ASC NULLS LAST
-03)----Aggregate: groupBy=[[aggregate_test_100.c1]], aggr=[[FIRST_VALUE(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST], FIRST_VALUE(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]]]
+03)----Aggregate: groupBy=[[aggregate_test_100.c1]], aggr=[[first_value(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST], first_value(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]]]
 04)------TableScan: aggregate_test_100 projection=[c1, c2, c3]
 physical_plan
-01)ProjectionExec: expr=[FIRST_VALUE(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]@1 as c3, FIRST_VALUE(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]@2 as c2]
+01)ProjectionExec: expr=[first_value(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]@1 as c3, first_value(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]@2 as c2]
 02)--SortPreservingMergeExec: [c1@0 ASC NULLS LAST]
 03)----SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true]
-04)------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[FIRST_VALUE(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST], FIRST_VALUE(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]]
+04)------AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[first_value(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST], first_value(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]]
 05)--------CoalesceBatchesExec: target_batch_size=8192
 06)----------RepartitionExec: partitioning=Hash([c1@0], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[FIRST_VALUE(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST], FIRST_VALUE(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]]
+07)------------AggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[first_value(aggregate_test_100.c3) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST], first_value(aggregate_test_100.c2) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST]]
 08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 09)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3], has_header=true