apache · findepi · Aug 12, 2025 · Aug 12, 2025 · Aug 12, 2025 · Aug 12, 2025
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
@@ -2744,23 +2744,20 @@ async fn test_count_wildcard_on_where_exist() -> Result<()> {
 
     assert_snapshot!(
         pretty_format_batches(&sql_results).unwrap(),
-        @r###"
-    +---------------+---------------------------------------------------------+
-    | plan_type     | plan                                                    |
-    +---------------+---------------------------------------------------------+
-    | logical_plan  | LeftSemi Join:                                          |
-    |               |   TableScan: t1 projection=[a, b]                       |
-    |               |   SubqueryAlias: __correlated_sq_1                      |
-    |               |     Projection:                                         |
-    |               |       Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] |
-    |               |         TableScan: t2 projection=[]                     |
-    | physical_plan | NestedLoopJoinExec: join_type=RightSemi                 |
-    |               |   ProjectionExec: expr=[]                               |
-    |               |     PlaceholderRowExec                                  |
-    |               |   DataSourceExec: partitions=1, partition_sizes=[1]     |
-    |               |                                                         |
-    +---------------+---------------------------------------------------------+
-    "###
+        @r"
+    +---------------+-----------------------------------------------------+
+    | plan_type     | plan                                                |
+    +---------------+-----------------------------------------------------+
+    | logical_plan  | LeftSemi Join:                                      |
+    |               |   TableScan: t1 projection=[a, b]                   |
+    |               |   SubqueryAlias: __correlated_sq_1                  |
+    |               |     EmptyRelation                                   |
+    | physical_plan | NestedLoopJoinExec: join_type=RightSemi             |
+    |               |   PlaceholderRowExec                                |
+    |               |   DataSourceExec: partitions=1, partition_sizes=[1] |
+    |               |                                                     |
+    +---------------+-----------------------------------------------------+
+    "
     );
 
     let df_results = ctx
@@ -2783,23 +2780,20 @@ async fn test_count_wildcard_on_where_exist() -> Result<()> {
 
     assert_snapshot!(
         pretty_format_batches(&df_results).unwrap(),
-        @r###"
-    +---------------+---------------------------------------------------------------------+
-    | plan_type     | plan                                                                |
-    +---------------+---------------------------------------------------------------------+
-    | logical_plan  | LeftSemi Join:                                                      |
-    |               |   TableScan: t1 projection=[a, b]                                   |
-    |               |   SubqueryAlias: __correlated_sq_1                                  |
-    |               |     Projection:                                                     |
-    |               |       Aggregate: groupBy=[[]], aggr=[[count(Int64(1)) AS count(*)]] |
-    |               |         TableScan: t2 projection=[]                                 |
-    | physical_plan | NestedLoopJoinExec: join_type=RightSemi                             |
-    |               |   ProjectionExec: expr=[]                                           |
-    |               |     PlaceholderRowExec                                              |
-    |               |   DataSourceExec: partitions=1, partition_sizes=[1]                 |
-    |               |                                                                     |
-    +---------------+---------------------------------------------------------------------+
-    "###
+        @r"
+    +---------------+-----------------------------------------------------+
+    | plan_type     | plan                                                |
+    +---------------+-----------------------------------------------------+
+    | logical_plan  | LeftSemi Join:                                      |
+    |               |   TableScan: t1 projection=[a, b]                   |
+    |               |   SubqueryAlias: __correlated_sq_1                  |
+    |               |     EmptyRelation                                   |
+    | physical_plan | NestedLoopJoinExec: join_type=RightSemi             |
+    |               |   PlaceholderRowExec                                |
+    |               |   DataSourceExec: partitions=1, partition_sizes=[1] |
+    |               |                                                     |
+    +---------------+-----------------------------------------------------+
+    "
     );
 
     Ok(())

diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
@@ -3522,7 +3522,10 @@ impl Aggregate {
     ) -> Result<Self> {
         if group_expr.is_empty() && aggr_expr.is_empty() {
             return plan_err!(
-                "Aggregate requires at least one grouping or aggregate expression"
+                "Aggregate requires at least one grouping or aggregate expression. \
+                Aggregate without grouping expressions nor aggregate expressions is \
+                logically equivalent to, but less efficient than, VALUES producing \
+                single row. Please use VALUES instead."
             );
         }
         let group_expr_count = grouping_set_expr_count(&group_expr)?;

diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -26,12 +26,12 @@ use std::sync::Arc;
 
 use datafusion_common::{
     get_required_group_by_exprs_indices, internal_datafusion_err, internal_err, Column,
-    HashMap, JoinType, Result,
+    DFSchema, HashMap, JoinType, Result,
 };
 use datafusion_expr::expr::Alias;
 use datafusion_expr::{
-    logical_plan::LogicalPlan, Aggregate, Distinct, Expr, Projection, TableScan, Unnest,
-    Window,
+    logical_plan::LogicalPlan, Aggregate, Distinct, EmptyRelation, Expr, Projection,
+    TableScan, Unnest, Window,
 };
 
 use crate::optimize_projections::required_indices::RequiredIndices;
@@ -153,23 +153,16 @@ fn optimize_projections(
 
             // Only use the absolutely necessary aggregate expressions required
             // by the parent:
-            let mut new_aggr_expr = aggregate_reqs.get_at_indices(&aggregate.aggr_expr);
-
-            // Aggregations always need at least one aggregate expression.
-            // With a nested count, we don't require any column as input, but
-            // still need to create a correct aggregate, which may be optimized
-            // out later. As an example, consider the following query:
-            //
-            // SELECT count(*) FROM (SELECT count(*) FROM [...])
-            //
-            // which always returns 1.
-            if new_aggr_expr.is_empty()
-                && new_group_bys.is_empty()
-                && !aggregate.aggr_expr.is_empty()
-            {
-                // take the old, first aggregate expression
-                new_aggr_expr = aggregate.aggr_expr;
-                new_aggr_expr.resize_with(1, || unreachable!());
+            let new_aggr_expr = aggregate_reqs.get_at_indices(&aggregate.aggr_expr);
+
+            if new_group_bys.is_empty() && new_aggr_expr.is_empty() {
+                // Global aggregation with no aggregate functions always produces 1 row and no columns.
+                return Ok(Transformed::yes(LogicalPlan::EmptyRelation(
+                    EmptyRelation {
+                        produce_one_row: true,
+                        schema: Arc::new(DFSchema::empty()),
+                    },
+                )));
             }
 
             let all_exprs_iter = new_group_bys.iter().chain(new_aggr_expr.iter());
@@ -1146,9 +1139,7 @@ mod tests {
             plan,
             @r"
         Aggregate: groupBy=[[]], aggr=[[count(Int32(1))]]
-          Projection:
-            Aggregate: groupBy=[[]], aggr=[[count(Int32(1))]]
-              TableScan: ?table? projection=[]
+          EmptyRelation
         "
         )
     }

diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt
@@ -429,14 +429,11 @@ logical_plan
 01)LeftSemi Join: 
 02)--TableScan: t1 projection=[a]
 03)--SubqueryAlias: __correlated_sq_1
-04)----Projection:
-05)------Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
-06)--------TableScan: t2 projection=[]
+04)----EmptyRelation
 physical_plan
 01)NestedLoopJoinExec: join_type=LeftSemi
 02)--DataSourceExec: partitions=1, partition_sizes=[0]
-03)--ProjectionExec: expr=[]
-04)----PlaceholderRowExec
+03)--PlaceholderRowExec
 
 statement ok
 drop table t1;

diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt
@@ -1263,14 +1263,11 @@ physical_plan
 04)│    join_type: LeftSemi    │              │
 05)└─────────────┬─────────────┘              │
 06)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-07)│       DataSourceExec      ││       ProjectionExec      │
+07)│       DataSourceExec      ││     PlaceholderRowExec    │
 08)│    --------------------   ││                           │
 09)│          files: 1         ││                           │
 10)│        format: csv        ││                           │
-11)└───────────────────────────┘└─────────────┬─────────────┘
-12)-----------------------------┌─────────────┴─────────────┐
-13)-----------------------------│     PlaceholderRowExec    │
-14)-----------------------------└───────────────────────────┘
+11)└───────────────────────────┘└───────────────────────────┘
 
 # Query with cross join.
 query TT

diff --git a/datafusion/sqllogictest/test_files/expr/date_part.slt b/datafusion/sqllogictest/test_files/expr/date_part.slt
@@ -1089,4 +1089,4 @@ SELECT EXTRACT("isodow" FROM to_timestamp('2020-09-08T12:00:00+00:00'))
 query I
 SELECT EXTRACT('isodow' FROM to_timestamp('2020-09-08T12:00:00+00:00'))
 ----
-1
+1
diff --git a/datafusion/sqllogictest/test_files/issue_17138.slt b/datafusion/sqllogictest/test_files/issue_17138.slt
@@ -0,0 +1,36 @@
+statement ok
+CREATE TABLE tab1(col0 INTEGER, col1 INTEGER, col2 INTEGER)
+
+statement ok
+INSERT INTO tab1 VALUES(51,14,96)
+
+query R
+SELECT NULL * AVG(DISTINCT 4) + SUM(col1) AS col0 FROM tab1
+----
+NULL
+
+query TT
+EXPLAIN SELECT NULL * AVG(DISTINCT 4) + SUM(col1) AS col0 FROM tab1
+----
+logical_plan
+01)Projection: Float64(NULL) AS col0
+02)--EmptyRelation
+physical_plan
+01)ProjectionExec: expr=[NULL as col0]
+02)--PlaceholderRowExec
+
+# Similar, with a few more arithmetic operations
+query R
+SELECT + CAST ( NULL AS INTEGER ) * + + AVG ( DISTINCT 4 ) + - SUM ( ALL + col1 ) AS col0 FROM tab1
+----
+NULL
+
+query TT
+EXPLAIN SELECT + CAST ( NULL AS INTEGER ) * + + AVG ( DISTINCT 4 ) + - SUM ( ALL + col1 ) AS col0 FROM tab1
+----
+logical_plan
+01)Projection: Float64(NULL) AS col0
+02)--EmptyRelation
+physical_plan
+01)ProjectionExec: expr=[NULL as col0]
+02)--PlaceholderRowExec
diff --git a/datafusion/sqllogictest/test_files/spark/bitwise/getbit.slt b/datafusion/sqllogictest/test_files/spark/bitwise/getbit.slt
@@ -73,4 +73,3 @@ query I
 SELECT getbit(11, NULL);
 ----
 NULL
-
diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt
@@ -1453,9 +1453,7 @@ logical_plan
 01)LeftSemi Join: 
 02)--TableScan: t1 projection=[a]
 03)--SubqueryAlias: __correlated_sq_1
-04)----Projection:
-05)------Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
-06)--------TableScan: t2 projection=[]
+04)----EmptyRelation
 
 statement count 0
 drop table t1;
-Original file line number
+Diff line change
@@ Expand Up @@
     query I
     SELECT EXTRACT('isodow' FROM to_timestamp('2020-09-08T12:00:00+00:00'))
     ----
-
Original file line number	Diff line number	Diff line change
Expand Up		@@ -73,4 +73,3 @@ query I
		SELECT getbit(11, NULL);
		----
		NULL