Skip to content

Commit

Permalink
Minor: Convert Count's name to lowercase (apache#11028)
Browse files Browse the repository at this point in the history
* push down non-unnest only

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add doc

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* to lowercase

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix tpch

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* Update test

* fix test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
  • Loading branch information
2 people authored and findepi committed Jul 16, 2024
1 parent 208e185 commit 2f53144
Show file tree
Hide file tree
Showing 41 changed files with 359 additions and 361 deletions.
4 changes: 2 additions & 2 deletions datafusion/core/src/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2018,7 +2018,7 @@ mod tests {

assert_batches_sorted_eq!(
["+----+-----------------------------+-----------------------------+-----------------------------+-----------------------------+-------------------------------+----------------------------------------+",
"| c1 | MIN(aggregate_test_100.c12) | MAX(aggregate_test_100.c12) | AVG(aggregate_test_100.c12) | sum(aggregate_test_100.c12) | COUNT(aggregate_test_100.c12) | COUNT(DISTINCT aggregate_test_100.c12) |",
"| c1 | MIN(aggregate_test_100.c12) | MAX(aggregate_test_100.c12) | AVG(aggregate_test_100.c12) | sum(aggregate_test_100.c12) | count(aggregate_test_100.c12) | count(DISTINCT aggregate_test_100.c12) |",
"+----+-----------------------------+-----------------------------+-----------------------------+-----------------------------+-------------------------------+----------------------------------------+",
"| a | 0.02182578039211991 | 0.9800193410444061 | 0.48754517466109415 | 10.238448667882977 | 21 | 21 |",
"| b | 0.04893135681998029 | 0.9185813970744787 | 0.41040709263815384 | 7.797734760124923 | 19 | 19 |",
Expand Down Expand Up @@ -3171,7 +3171,7 @@ mod tests {

let sql = r#"
SELECT
COUNT(1)
count(1)
FROM
test
GROUP BY
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/src/execution/context/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,12 @@ mod tests {
)
.await?;
let results =
plan_and_collect(&ctx, "SELECT sum(c1), sum(c2), COUNT(*) FROM test").await?;
plan_and_collect(&ctx, "SELECT sum(c1), sum(c2), count(*) FROM test").await?;

assert_eq!(results.len(), 1);
let expected = [
"+--------------+--------------+----------+",
"| sum(test.c1) | sum(test.c2) | COUNT(*) |",
"| sum(test.c1) | sum(test.c2) | count(*) |",
"+--------------+--------------+----------+",
"| 10 | 110 | 20 |",
"+--------------+--------------+----------+",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ fn take_optimizable_column_and_table_count(
) -> Option<(ScalarValue, String)> {
let col_stats = &stats.column_statistics;
if let Some(agg_expr) = agg_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
if agg_expr.fun().name() == "COUNT" && !agg_expr.is_distinct() {
if agg_expr.fun().name() == "count" && !agg_expr.is_distinct() {
if let Precision::Exact(num_rows) = stats.num_rows {
let exprs = agg_expr.expressions();
if exprs.len() == 1 {
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/custom_sources_cases/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ async fn optimizers_catch_all_statistics() {

let expected = RecordBatch::try_new(
Arc::new(Schema::new(vec![
Field::new("COUNT(*)", DataType::Int64, false),
Field::new("count(*)", DataType::Int64, false),
Field::new("MIN(test.c1)", DataType::Int32, false),
Field::new("MAX(test.c1)", DataType::Int32, false),
])),
Expand Down
10 changes: 5 additions & 5 deletions datafusion/core/tests/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ async fn test_count_wildcard_on_window() -> Result<()> {
let ctx = create_join_context()?;

let sql_results = ctx
.sql("select COUNT(*) OVER(ORDER BY a DESC RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING) from t1")
.sql("select count(*) OVER(ORDER BY a DESC RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING) from t1")
.await?
.explain(false, false)?
.collect()
Expand Down Expand Up @@ -211,7 +211,7 @@ async fn test_count_wildcard_on_aggregate() -> Result<()> {
let sql_results = ctx
.sql("select count(*) from t1")
.await?
.select(vec![col("COUNT(*)")])?
.select(vec![col("count(*)")])?
.explain(false, false)?
.collect()
.await?;
Expand Down Expand Up @@ -604,7 +604,7 @@ async fn test_grouping_sets() -> Result<()> {

let expected = vec![
"+-----------+-----+---------------+",
"| a | b | COUNT(test.a) |",
"| a | b | count(test.a) |",
"+-----------+-----+---------------+",
"| | 100 | 1 |",
"| | 10 | 2 |",
Expand Down Expand Up @@ -645,7 +645,7 @@ async fn test_grouping_sets_count() -> Result<()> {

let expected = vec![
"+----+----+-----------------+",
"| c1 | c2 | COUNT(Int32(1)) |",
"| c1 | c2 | count(Int32(1)) |",
"+----+----+-----------------+",
"| | 5 | 14 |",
"| | 4 | 23 |",
Expand Down Expand Up @@ -1233,7 +1233,7 @@ async fn unnest_aggregate_columns() -> Result<()> {
.await?;
let expected = [
r#"+-------------+"#,
r#"| COUNT(tags) |"#,
r#"| count(tags) |"#,
r#"+-------------+"#,
r#"| 9 |"#,
r#"+-------------+"#,
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/tests/path_partition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ async fn parquet_distinct_partition_col() -> Result<()> {
//3. limit is not contained within a single partition
//The id column is included to ensure that the parquet file is actually scanned.
let results = ctx
.sql("SELECT COUNT(*) as num_rows_per_month, month, MAX(id) from t group by month order by num_rows_per_month desc")
.sql("SELECT count(*) as num_rows_per_month, month, MAX(id) from t group by month order by num_rows_per_month desc")
.await?
.collect()
.await?;
Expand Down Expand Up @@ -339,7 +339,7 @@ async fn csv_grouping_by_partition() -> Result<()> {

let expected = [
"+------------+----------+----------------------+",
"| date | COUNT(*) | COUNT(DISTINCT t.c1) |",
"| date | count(*) | count(DISTINCT t.c1) |",
"+------------+----------+----------------------+",
"| 2021-10-26 | 100 | 5 |",
"| 2021-10-27 | 100 | 5 |",
Expand Down
36 changes: 18 additions & 18 deletions datafusion/core/tests/sql/aggregates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ async fn csv_query_array_agg_distinct() -> Result<()> {
#[tokio::test]
async fn count_partitioned() -> Result<()> {
let results =
execute_with_partition("SELECT COUNT(c1), COUNT(c2) FROM test", 4).await?;
execute_with_partition("SELECT count(c1), count(c2) FROM test", 4).await?;
assert_eq!(results.len(), 1);

let expected = [
"+----------------+----------------+",
"| COUNT(test.c1) | COUNT(test.c2) |",
"| count(test.c1) | count(test.c2) |",
"+----------------+----------------+",
"| 40 | 40 |",
"+----------------+----------------+",
Expand All @@ -86,11 +86,11 @@ async fn count_partitioned() -> Result<()> {
#[tokio::test]
async fn count_aggregated() -> Result<()> {
let results =
execute_with_partition("SELECT c1, COUNT(c2) FROM test GROUP BY c1", 4).await?;
execute_with_partition("SELECT c1, count(c2) FROM test GROUP BY c1", 4).await?;

let expected = [
"+----+----------------+",
"| c1 | COUNT(test.c2) |",
"| c1 | count(test.c2) |",
"+----+----------------+",
"| 0 | 10 |",
"| 1 | 10 |",
Expand All @@ -105,14 +105,14 @@ async fn count_aggregated() -> Result<()> {
#[tokio::test]
async fn count_aggregated_cube() -> Result<()> {
let results = execute_with_partition(
"SELECT c1, c2, COUNT(c3) FROM test GROUP BY CUBE (c1, c2) ORDER BY c1, c2",
"SELECT c1, c2, count(c3) FROM test GROUP BY CUBE (c1, c2) ORDER BY c1, c2",
4,
)
.await?;

let expected = vec![
"+----+----+----------------+",
"| c1 | c2 | COUNT(test.c3) |",
"| c1 | c2 | count(test.c3) |",
"+----+----+----------------+",
"| | | 40 |",
"| | 1 | 4 |",
Expand Down Expand Up @@ -222,15 +222,15 @@ async fn run_count_distinct_integers_aggregated_scenario(
"
SELECT
c_group,
COUNT(c_uint64),
COUNT(DISTINCT c_int8),
COUNT(DISTINCT c_int16),
COUNT(DISTINCT c_int32),
COUNT(DISTINCT c_int64),
COUNT(DISTINCT c_uint8),
COUNT(DISTINCT c_uint16),
COUNT(DISTINCT c_uint32),
COUNT(DISTINCT c_uint64)
count(c_uint64),
count(DISTINCT c_int8),
count(DISTINCT c_int16),
count(DISTINCT c_int32),
count(DISTINCT c_int64),
count(DISTINCT c_uint8),
count(DISTINCT c_uint16),
count(DISTINCT c_uint32),
count(DISTINCT c_uint64)
FROM test
GROUP BY c_group
",
Expand Down Expand Up @@ -260,7 +260,7 @@ async fn count_distinct_integers_aggregated_single_partition() -> Result<()> {
let results = run_count_distinct_integers_aggregated_scenario(partitions).await?;

let expected = ["+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+",
"| c_group | COUNT(test.c_uint64) | COUNT(DISTINCT test.c_int8) | COUNT(DISTINCT test.c_int16) | COUNT(DISTINCT test.c_int32) | COUNT(DISTINCT test.c_int64) | COUNT(DISTINCT test.c_uint8) | COUNT(DISTINCT test.c_uint16) | COUNT(DISTINCT test.c_uint32) | COUNT(DISTINCT test.c_uint64) |",
"| c_group | count(test.c_uint64) | count(DISTINCT test.c_int8) | count(DISTINCT test.c_int16) | count(DISTINCT test.c_int32) | count(DISTINCT test.c_int64) | count(DISTINCT test.c_uint8) | count(DISTINCT test.c_uint16) | count(DISTINCT test.c_uint32) | count(DISTINCT test.c_uint64) |",
"+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+",
"| a | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |",
"| b | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |",
Expand All @@ -284,7 +284,7 @@ async fn count_distinct_integers_aggregated_multiple_partitions() -> Result<()>
let results = run_count_distinct_integers_aggregated_scenario(partitions).await?;

let expected = ["+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+",
"| c_group | COUNT(test.c_uint64) | COUNT(DISTINCT test.c_int8) | COUNT(DISTINCT test.c_int16) | COUNT(DISTINCT test.c_int32) | COUNT(DISTINCT test.c_int64) | COUNT(DISTINCT test.c_uint8) | COUNT(DISTINCT test.c_uint16) | COUNT(DISTINCT test.c_uint32) | COUNT(DISTINCT test.c_uint64) |",
"| c_group | count(test.c_uint64) | count(DISTINCT test.c_int8) | count(DISTINCT test.c_int16) | count(DISTINCT test.c_int32) | count(DISTINCT test.c_int64) | count(DISTINCT test.c_uint8) | count(DISTINCT test.c_uint16) | count(DISTINCT test.c_uint32) | count(DISTINCT test.c_uint64) |",
"+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+",
"| a | 5 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |",
"| b | 5 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 |",
Expand All @@ -301,7 +301,7 @@ async fn test_accumulator_row_accumulator() -> Result<()> {
let ctx = SessionContext::new_with_config(config);
register_aggregate_csv(&ctx).await?;

let sql = "SELECT c1, c2, MIN(c13) as min1, MIN(c9) as min2, MAX(c13) as max1, MAX(c9) as max2, AVG(c9) as avg1, MIN(c13) as min3, COUNT(C9) as cnt1, 0.5*SUM(c9-c8) as sum1
let sql = "SELECT c1, c2, MIN(c13) as min1, MIN(c9) as min2, MAX(c13) as max1, MAX(c9) as max2, AVG(c9) as avg1, MIN(c13) as min3, count(C9) as cnt1, 0.5*SUM(c9-c8) as sum1
FROM aggregate_test_100
GROUP BY c1, c2
ORDER BY c1, c2
Expand Down
8 changes: 4 additions & 4 deletions datafusion/core/tests/sql/explain_analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ async fn explain_analyze_baseline_metrics() {
);
assert_metrics!(
&formatted,
"ProjectionExec: expr=[COUNT(*)",
"ProjectionExec: expr=[count(*)",
"metrics=[output_rows=1, elapsed_compute="
);
assert_metrics!(
Expand Down Expand Up @@ -700,7 +700,7 @@ async fn csv_explain_analyze() {
// Only test basic plumbing and try to avoid having to change too
// many things. explain_analyze_baseline_metrics covers the values
// in greater depth
let needle = "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[COUNT(*)], metrics=[output_rows=5";
let needle = "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[count(*)], metrics=[output_rows=5";
assert_contains!(&formatted, needle);

let verbose_needle = "Output Rows";
Expand Down Expand Up @@ -793,7 +793,7 @@ async fn explain_logical_plan_only() {
let expected = vec![
vec![
"logical_plan",
"Aggregate: groupBy=[[]], aggr=[[COUNT(Int64(1)) AS COUNT(*)]]\
"Aggregate: groupBy=[[]], aggr=[[count(Int64(1)) AS count(*)]]\
\n SubqueryAlias: t\
\n Projection: \
\n Values: (Utf8(\"a\"), Int64(1), Int64(100)), (Utf8(\"a\"), Int64(2), Int64(150))"
Expand All @@ -812,7 +812,7 @@ async fn explain_physical_plan_only() {

let expected = vec![vec![
"physical_plan",
"ProjectionExec: expr=[2 as COUNT(*)]\
"ProjectionExec: expr=[2 as count(*)]\
\n PlaceholderRowExec\
\n",
]];
Expand Down
6 changes: 2 additions & 4 deletions datafusion/functions-aggregate/src/count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ pub fn count_distinct(expr: Expr) -> datafusion_expr::Expr {

pub struct Count {
signature: Signature,
aliases: Vec<String>,
}

impl Debug for Count {
Expand All @@ -98,7 +97,6 @@ impl Default for Count {
impl Count {
pub fn new() -> Self {
Self {
aliases: vec!["count".to_string()],
signature: Signature::variadic_any(Volatility::Immutable),
}
}
Expand All @@ -110,7 +108,7 @@ impl AggregateUDFImpl for Count {
}

fn name(&self) -> &str {
"COUNT"
"count"
}

fn signature(&self) -> &Signature {
Expand Down Expand Up @@ -249,7 +247,7 @@ impl AggregateUDFImpl for Count {
}

fn aliases(&self) -> &[String] {
&self.aliases
&[]
}

fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
Expand Down
Loading

0 comments on commit 2f53144

Please sign in to comment.