Skip to content

Commit bcf7849

Browse files
wangyumcloud-fan
authored andcommitted
[SPARK-38489][SQL] Aggregate.groupOnly support foldable expressions
### What changes were proposed in this pull request? This pr makes `Aggregate.groupOnly` support foldable expressions. ### Why are the changes needed? Improve query performance. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #35795 from wangyum/SPARK-38489. Authored-by: Yuming Wang <yumwang@ebay.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent 3ab2455 commit bcf7849

File tree

2 files changed

+16
-3
lines changed

2 files changed

+16
-3
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1003,7 +1003,7 @@ case class Aggregate(
10031003
groupingExpressions.nonEmpty && aggregateExpressions.map {
10041004
case Alias(child, _) => child
10051005
case e => e
1006-
}.forall(a => groupingExpressions.exists(g => a.semanticEquals(g)))
1006+
}.forall(a => a.foldable || groupingExpressions.exists(g => a.semanticEquals(g)))
10071007
}
10081008
}
10091009

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.analysis.AnalysisTest
2121
import org.apache.spark.sql.catalyst.dsl.expressions._
2222
import org.apache.spark.sql.catalyst.dsl.plans._
2323
import org.apache.spark.sql.catalyst.expressions.Literal
24+
import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
2425
import org.apache.spark.sql.catalyst.plans.{LeftOuter, RightOuter}
2526
import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Distinct, LocalRelation, LogicalPlan}
2627
import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -122,8 +123,7 @@ class AggregateOptimizeSuite extends AnalysisTest {
122123
Optimize.execute(
123124
x.join(y, LeftOuter, Some("x.a".attr === "y.a".attr))
124125
.groupBy("x.a".attr)("x.a".attr, Literal(1)).analyze),
125-
x.join(y, LeftOuter, Some("x.a".attr === "y.a".attr))
126-
.groupBy("x.a".attr)("x.a".attr, Literal(1)).analyze)
126+
x.groupBy("x.a".attr)("x.a".attr, Literal(1)).analyze)
127127
}
128128

129129
test("SPARK-37292: Removes outer join if it only has DISTINCT on streamed side with alias") {
@@ -148,4 +148,17 @@ class AggregateOptimizeSuite extends AnalysisTest {
148148
x.select("x.b".attr.as("newAlias1"), "x.b".attr.as("newAlias2"))
149149
.groupBy("newAlias1".attr, "newAlias2".attr)("newAlias1".attr, "newAlias2".attr).analyze)
150150
}
151+
152+
test("SPARK-38489: Aggregate.groupOnly support foldable expressions") {
153+
val x = testRelation.subquery('x)
154+
val y = testRelation.subquery('y)
155+
comparePlans(
156+
Optimize.execute(
157+
Distinct(x.join(y, LeftOuter, Some("x.a".attr === "y.a".attr))
158+
.select("x.b".attr, TrueLiteral, FalseLiteral.as("newAlias")))
159+
.analyze),
160+
x.select("x.b".attr, TrueLiteral, FalseLiteral.as("newAlias"))
161+
.groupBy("x.b".attr)("x.b".attr, TrueLiteral, FalseLiteral.as("newAlias"))
162+
.analyze)
163+
}
151164
}

0 commit comments

Comments
 (0)