Skip to content

Commit aa89883

Browse files
mihailoale-dbcloud-fan
authored andcommitted
[SPARK-52046][SQL] Prettify OuterReference on AggregateExpression names when using toPrettySQL
### What changes were proposed in this pull request? In this PR I propose that we remove backticks, qualifiers from `OuterReference` over `AggregateExpression` when creating name. In other words, name for `column` which is an outer reference would be `outer(min(column))` instead of `outer('min(col)')`. We also introduce a flag to guard the behavior as it changes the schema of some specific `LATERAL JOIN` queries. ### Why are the changes needed? To ease development of single-pass analyzer and improve names in fixed-point implementation. ### Does this PR introduce _any_ user-facing change? Schema changes in a way that `OuterReference` over `AggregateExpression` name doesn't contain backticks and qualifiers. ### How was this patch tested? Existing tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #50836 from mihailoale-db/prettifyouterrefname. Authored-by: mihailoale-db <mihailo.aleksic@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent 10c7614 commit aa89883

File tree

2 files changed

+38
-7
lines changed

2 files changed

+38
-7
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,12 @@ import org.apache.spark.internal.Logging
2727
import org.apache.spark.sql.catalyst.expressions._
2828
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
2929
import org.apache.spark.sql.connector.catalog.MetadataColumn
30+
import org.apache.spark.sql.internal.SQLConf
3031
import org.apache.spark.sql.types.{MetadataBuilder, NumericType, StringType, StructType}
3132
import org.apache.spark.unsafe.types.UTF8String
3233
import org.apache.spark.util.{SparkErrorUtils, Utils}
3334

34-
package object util extends Logging {
35+
package object util extends Logging with SQLConfHelper {
3536

3637
/** Silences output to stderr or stdout for the duration of f */
3738
def quietly[A](f: => A): A = {
@@ -103,13 +104,27 @@ package object util extends Logging {
103104
private def usePrettyExpression(e: Expression, stripOuterReference: Boolean = true): Expression =
104105
e transform {
105106
case aggregateExpression: AggregateExpression
106-
if stripOuterReference && SubExprUtils.containsOuter(aggregateExpression) =>
107+
if stripOuterReference && conf.getConf(
108+
SQLConf.PRETTY_ALIAS_NAME_FOR_CORRELATED_AGGREGATE_FUNCTION
109+
) && SubExprUtils.containsOuter(aggregateExpression) =>
107110
val strippedAggregateExpression = SubExprUtils.stripOuterReference(aggregateExpression)
108111
OuterReference(
109112
new PrettyAttribute(
110113
Alias(
111114
strippedAggregateExpression,
112-
toPrettySQL(strippedAggregateExpression)
115+
toPrettySQL(strippedAggregateExpression, stripOuterReference)
116+
)().toAttribute
117+
)
118+
)
119+
case _ @ OuterReference(aggregateExpression: AggregateExpression)
120+
if conf.getConf(
121+
SQLConf.PRETTY_ALIAS_NAME_FOR_CORRELATED_AGGREGATE_FUNCTION
122+
) =>
123+
OuterReference(
124+
new PrettyAttribute(
125+
Alias(
126+
aggregateExpression,
127+
toPrettySQL(aggregateExpression, stripOuterReference)
113128
)().toAttribute
114129
)
115130
)
@@ -119,16 +134,24 @@ package object util extends Logging {
119134
case Literal(null, dataType) => PrettyAttribute("NULL", dataType)
120135
case e: GetStructField =>
121136
val name = e.name.getOrElse(e.childSchema(e.ordinal).name)
122-
PrettyAttribute(usePrettyExpression(e.child).sql + "." + name, e.dataType)
137+
PrettyAttribute(
138+
usePrettyExpression(e.child, stripOuterReference).sql + "." + name,
139+
e.dataType
140+
)
123141
case e: GetArrayStructFields =>
124-
PrettyAttribute(s"${usePrettyExpression(e.child)}.${e.field.name}", e.dataType)
142+
PrettyAttribute(
143+
s"${usePrettyExpression(e.child, stripOuterReference)}.${e.field.name}",
144+
e.dataType
145+
)
125146
case r: InheritAnalysisRules =>
126147
PrettyAttribute(
127-
r.makeSQLString(r.parameters.map(parameter => toPrettySQL(parameter))),
148+
r.makeSQLString(
149+
r.parameters.map(parameter => toPrettySQL(parameter, stripOuterReference))
150+
),
128151
r.dataType
129152
)
130153
case c: Cast if c.getTagValue(Cast.USER_SPECIFIED_CAST).isEmpty =>
131-
PrettyAttribute(usePrettyExpression(c.child).sql, c.dataType)
154+
PrettyAttribute(usePrettyExpression(c.child, stripOuterReference).sql, c.dataType)
132155
case p: PythonFuncExpression => PrettyPythonUDF(p.name, p.dataType, p.children)
133156
}
134157

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5818,6 +5818,14 @@ object SQLConf {
58185818
.booleanConf
58195819
.createWithDefault(true)
58205820

5821+
val PRETTY_ALIAS_NAME_FOR_CORRELATED_AGGREGATE_FUNCTION =
5822+
buildConf("spark.sql.prettyAliasNameForCorrelatedAggFunc.enabled")
5823+
.internal()
5824+
.doc("When true, use prettified name for correlated aggregate functions.")
5825+
.version("4.1.0")
5826+
.booleanConf
5827+
.createWithDefault(true)
5828+
58215829
/**
58225830
* Holds information about keys that have been deprecated.
58235831
*

0 commit comments

Comments
 (0)