Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1655,7 +1655,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
val resolvedBasic = p.projectList.map(resolveExpressionByPlanChildren(_, p))
// Lateral column alias has higher priority than outer reference.
val resolvedWithLCA = resolveLateralColumnAlias(resolvedBasic)
val resolvedFinal = resolvedWithLCA.map(resolveColsLastResort)
val resolvedFinal = resolveProjectListLastResort(resolvedWithLCA)
p.copy(projectList = resolvedFinal.map(_.asInstanceOf[NamedExpression]))

case o: OverwriteByExpression if o.table.resolved =>
Expand Down Expand Up @@ -1846,6 +1846,40 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
}
}

/**
* If `spark.sql.analyzer.delayLastResortColumnResolution` is set to true, delay resolving a
* column with [[resolveColsLast]] until all [[UnresolvedAlias]]es that come before that column
* are resolved. This is necessary in order to allow that column to be resolved as a lateral
* column alias reference to an [[UnresolvedAlias]], if possible. For example, for a query like:
*
* {{{
* DECLARE a = 'aa';
* SELECT 'a', a;
* -- result is ('a', 'aa')
* }}}
*
* Without delaying [[resolveColsLastResort]], second 'a' column would be resolved as a
* variable instead of being resolved as a lateral column alias reference to
* [[UnresolvedAlias]] of literal 'a'. The intended behavior should be the same as for the
* following query without variable declaration:
*
* {{{ SELECT 'a', a; -- result is ('a', 'a') }}}
*/
private def resolveProjectListLastResort(projectList: Seq[Expression]) = {
if (conf.getConf(SQLConf.DELAY_LAST_RESORT_COLUMN_RESOLUTION)) {
var hasUnresolvedAlias = false
projectList.map {
case col if hasUnresolvedAlias => col
case unresolvedAlias: UnresolvedAlias =>
hasUnresolvedAlias = true
resolveColsLastResort(unresolvedAlias)
case col => resolveColsLastResort(col)
}
} else {
projectList.map(resolveColsLastResort)
}
}

private def resolveMergeExprOrFail(e: Expression, p: LogicalPlan): Expression = {
val resolved = resolveExprInAssignment(e, p)
checkResolvedMergeExpr(resolved, p)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,13 +241,27 @@ object SQLConf {
}
}

val DELAY_LAST_RESORT_COLUMN_RESOLUTION =
buildConf("spark.sql.analyzer.delayLastResortColumnResolution")
.internal()
.doc(
"When true, delay resolution of a column to a local/session variable or outer reference " +
"until all unresolved aliases that come before the column in the project list are " +
"resolved. This is needed in order to prevent incorrectly resolving a column to a " +
"variable or outer reference instead of a lateral column reference (see SPARK-53733)."
)
.version("4.1.0")
.booleanConf
.createWithDefault(true)

val PREFER_COLUMN_OVER_LCA_IN_ARRAY_INDEX =
buildConf("spark.sql.analyzer.preferColumnOverLcaInArrayIndex")
.internal()
.doc(
"When true, prefer the column from the underlying relation over the lateral column alias " +
"reference with the same name (see SPARK-53734)."
)
.version("4.1.0")
.booleanConf
.createWithDefault(true)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
-- Automatically generated by SQLQueryTestSuite
-- !query
DECLARE a = "aa"
-- !query analysis
CreateVariable defaultvalueexpression(aa, "aa"), false
+- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.a


-- !query
SELECT 'a', a
-- !query analysis
Project [a AS a#x, variablereference(system.session.a='aa') AS a#x]
+- OneRowRelation


-- !query
SELECT 'a' AS a, a
-- !query analysis
Project [a#x, a#x]
+- Project [a AS a#x]
+- OneRowRelation


-- !query
SELECT 'a', a FROM VALUES(1) AS t(a)
-- !query analysis
Project [a AS a#x, a#x]
+- SubqueryAlias t
+- LocalRelation [a#x]


-- !query
SELECT 'a' AS a, a FROM VALUES(1) AS t(a)
-- !query analysis
Project [a AS a#x, a#x]
+- SubqueryAlias t
+- LocalRelation [a#x]


-- !query
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1', col1)
-- !query analysis
Project [col1#x]
+- Filter exists#x [col1#x]
: +- Project [col1 AS col1#x, outer(col1#x)]
: +- OneRowRelation
+- LocalRelation [col1#x]


-- !query
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT col1 AS col1, col1)
-- !query analysis
Project [col1#x]
+- Filter exists#x [col1#x]
: +- Project [col1#x, col1#x]
: +- Project [outer(col1#x) AS col1#x]
: +- OneRowRelation
+- LocalRelation [col1#x]


-- !query
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1' AS col1, col1)
-- !query analysis
Project [col1#x]
+- Filter exists#x []
: +- Project [col1#x, col1#x]
: +- Project [col1 AS col1#x]
: +- OneRowRelation
+- LocalRelation [col1#x]


-- !query
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1', col1 FROM VALUES(1))
-- !query analysis
Project [col1#x]
+- Filter exists#x []
: +- Project [col1 AS col1#x, col1#x]
: +- LocalRelation [col1#x]
+- LocalRelation [col1#x]


-- !query
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT col1 AS col1, col1 FROM VALUES(1))
-- !query analysis
Project [col1#x]
+- Filter exists#x []
: +- Project [col1#x AS col1#x, col1#x]
: +- LocalRelation [col1#x]
+- LocalRelation [col1#x]


-- !query
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1' AS col1, col1 FROM VALUES(1))
-- !query analysis
Project [col1#x]
+- Filter exists#x []
: +- Project [col1 AS col1#x, col1#x]
: +- LocalRelation [col1#x]
+- LocalRelation [col1#x]


-- !query
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT 'col1', col1)
-- !query analysis
Filter exists#x [col1#x]
: +- Project [col1 AS col1#x, outer(col1#x)]
: +- OneRowRelation
+- Aggregate [col1#x], [col1#x]
+- LocalRelation [col1#x]


-- !query
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT col1 AS col1, col1)
-- !query analysis
Filter exists#x [col1#x]
: +- Project [col1#x, col1#x]
: +- Project [outer(col1#x) AS col1#x]
: +- OneRowRelation
+- Aggregate [col1#x], [col1#x]
+- LocalRelation [col1#x]


-- !query
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT 'col1' AS col1, col1)
-- !query analysis
Filter exists#x []
: +- Project [col1#x, col1#x]
: +- Project [col1 AS col1#x]
: +- OneRowRelation
+- Aggregate [col1#x], [col1#x]
+- LocalRelation [col1#x]


-- !query
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT 'col1', col1 FROM VALUES(1))
-- !query analysis
Filter exists#x []
: +- Project [col1 AS col1#x, col1#x]
: +- LocalRelation [col1#x]
+- Aggregate [col1#x], [col1#x]
+- LocalRelation [col1#x]


-- !query
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT col1 AS col1, col1 FROM VALUES(1))
-- !query analysis
Filter exists#x []
: +- Project [col1#x AS col1#x, col1#x]
: +- LocalRelation [col1#x]
+- Aggregate [col1#x], [col1#x]
+- LocalRelation [col1#x]


-- !query
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT 'col1' AS col1, col1 FROM VALUES(1))
-- !query analysis
Filter exists#x []
: +- Project [col1 AS col1#x, col1#x]
: +- LocalRelation [col1#x]
+- Aggregate [col1#x], [col1#x]
+- LocalRelation [col1#x]
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
-- Automatically generated by SQLQueryTestSuite
-- !query
DECLARE a = "aa"
-- !query analysis
CreateVariable defaultvalueexpression(aa, "aa"), false
+- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.a


-- !query
SELECT 'a', a
-- !query analysis
Project [a#x, a#x]
+- Project [a AS a#x]
+- OneRowRelation


-- !query
SELECT 'a' AS a, a
-- !query analysis
Project [a#x, a#x]
+- Project [a AS a#x]
+- OneRowRelation


-- !query
SELECT 'a', a FROM VALUES(1) AS t(a)
-- !query analysis
Project [a AS a#x, a#x]
+- SubqueryAlias t
+- LocalRelation [a#x]


-- !query
SELECT 'a' AS a, a FROM VALUES(1) AS t(a)
-- !query analysis
Project [a AS a#x, a#x]
+- SubqueryAlias t
+- LocalRelation [a#x]


-- !query
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1', col1)
-- !query analysis
Project [col1#x]
+- Filter exists#x []
: +- Project [col1#x, col1#x]
: +- Project [col1 AS col1#x]
: +- OneRowRelation
+- LocalRelation [col1#x]


-- !query
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT col1 AS col1, col1)
-- !query analysis
Project [col1#x]
+- Filter exists#x [col1#x]
: +- Project [col1#x, col1#x]
: +- Project [outer(col1#x) AS col1#x]
: +- OneRowRelation
+- LocalRelation [col1#x]


-- !query
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1' AS col1, col1)
-- !query analysis
Project [col1#x]
+- Filter exists#x []
: +- Project [col1#x, col1#x]
: +- Project [col1 AS col1#x]
: +- OneRowRelation
+- LocalRelation [col1#x]


-- !query
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1', col1 FROM VALUES(1))
-- !query analysis
Project [col1#x]
+- Filter exists#x []
: +- Project [col1 AS col1#x, col1#x]
: +- LocalRelation [col1#x]
+- LocalRelation [col1#x]


-- !query
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT col1 AS col1, col1 FROM VALUES(1))
-- !query analysis
Project [col1#x]
+- Filter exists#x []
: +- Project [col1#x AS col1#x, col1#x]
: +- LocalRelation [col1#x]
+- LocalRelation [col1#x]


-- !query
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1' AS col1, col1 FROM VALUES(1))
-- !query analysis
Project [col1#x]
+- Filter exists#x []
: +- Project [col1 AS col1#x, col1#x]
: +- LocalRelation [col1#x]
+- LocalRelation [col1#x]
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- This is a legacy test for last resort column resolution that has a correctness issue.
-- For more information, see SPARK-53733.

--IMPORT column-last-resort-resolution-precedence.sql

--SET spark.sql.analyzer.delayLastResortColumnResolution = false
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
DECLARE a = "aa";

SELECT 'a', a;
SELECT 'a' AS a, a;

SELECT 'a', a FROM VALUES(1) AS t(a);
SELECT 'a' AS a, a FROM VALUES(1) AS t(a);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please use a view instead of VALUES? The lines would get shorter.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think arguments for CREATE FUNCTION might also be affected - they are resolved after LCAs and before session variables


SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1', col1);
Copy link
Contributor

@vladimirg-db vladimirg-db Sep 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have the logic for Project, but the logic for Aggregate is either missing or non-obvious.

We need the same tests with with GROUP BY.

SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT col1 AS col1, col1);
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1' AS col1, col1);

SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1', col1 FROM VALUES(1));
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT col1 AS col1, col1 FROM VALUES(1));
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1' AS col1, col1 FROM VALUES(1));

SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT 'col1', col1);
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT col1 AS col1, col1);
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT 'col1' AS col1, col1);

SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT 'col1', col1 FROM VALUES(1));
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT col1 AS col1, col1 FROM VALUES(1));
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT 'col1' AS col1, col1 FROM VALUES(1));
Loading