Skip to content

Commit ff85dcd

Browse files
committed
fix
1 parent 46ac78e commit ff85dcd

File tree

8 files changed

+562
-1
lines changed

8 files changed

+562
-1
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1655,7 +1655,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
16551655
val resolvedBasic = p.projectList.map(resolveExpressionByPlanChildren(_, p))
16561656
// Lateral column alias has higher priority than outer reference.
16571657
val resolvedWithLCA = resolveLateralColumnAlias(resolvedBasic)
1658-
val resolvedFinal = resolvedWithLCA.map(resolveColsLastResort)
1658+
val resolvedFinal = resolveProjectListLastResort(resolvedWithLCA)
16591659
p.copy(projectList = resolvedFinal.map(_.asInstanceOf[NamedExpression]))
16601660

16611661
case o: OverwriteByExpression if o.table.resolved =>
@@ -1846,6 +1846,40 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
18461846
}
18471847
}
18481848

1849+
/**
1850+
* If `spark.sql.analyzer.delayLastResortColumnResolution` is set to true, delay resolving a
1851+
* column with [[resolveColsLast]] until all [[UnresolvedAlias]]es that come before that column
1852+
* are resolved. This is necessary in order to allow that column to be resolved as a lateral
1853+
* column alias reference to an [[UnresolvedAlias]], if possible. For example, for a query like:
1854+
*
1855+
* {{{
1856+
* DECLARE a = 'aa';
1857+
* SELECT 'a', a;
1858+
* -- result is ('a', 'aa')
1859+
* }}}
1860+
*
1861+
* Without delaying [[resolveColsLastResort]], second 'a' column would be resolved as a
1862+
* variable instead of being resolved as a lateral column alias reference to
1863+
* [[UnresolvedAlias]] of literal 'a'. The intended behavior should be the same as for the
1864+
* following query without variable declaration:
1865+
*
1866+
* {{{ SELECT 'a', a; -- result is ('a', 'a') }}}
1867+
*/
1868+
private def resolveProjectListLastResort(projectList: Seq[Expression]) = {
1869+
if (conf.getConf(SQLConf.DELAY_LAST_RESORT_COLUMN_RESOLUTION)) {
1870+
var hasUnresolvedAlias = false
1871+
projectList.map {
1872+
case col if hasUnresolvedAlias => col
1873+
case unresolvedAlias: UnresolvedAlias =>
1874+
hasUnresolvedAlias = true
1875+
resolveColsLastResort(unresolvedAlias)
1876+
case col => resolveColsLastResort(col)
1877+
}
1878+
} else {
1879+
projectList.map(resolveColsLastResort)
1880+
}
1881+
}
1882+
18491883
private def resolveMergeExprOrFail(e: Expression, p: LogicalPlan): Expression = {
18501884
val resolved = resolveExprInAssignment(e, p)
18511885
checkResolvedMergeExpr(resolved, p)

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,13 +241,27 @@ object SQLConf {
241241
}
242242
}
243243

244+
val DELAY_LAST_RESORT_COLUMN_RESOLUTION =
245+
buildConf("spark.sql.analyzer.delayLastResortColumnResolution")
246+
.internal()
247+
.doc(
248+
"When true, delay resolution of a column to a local/session variable or outer reference " +
249+
"until all unresolved aliases that come before the column in the project list are " +
250+
"resolved. This is needed in order to prevent incorrectly resolving a column to a " +
251+
"variable or outer reference instead of a lateral column reference (see SPARK-53733)."
252+
)
253+
.version("4.1.0")
254+
.booleanConf
255+
.createWithDefault(true)
256+
244257
val PREFER_COLUMN_OVER_LCA_IN_ARRAY_INDEX =
245258
buildConf("spark.sql.analyzer.preferColumnOverLcaInArrayIndex")
246259
.internal()
247260
.doc(
248261
"When true, prefer the column from the underlying relation over the lateral column alias " +
249262
"reference with the same name (see SPARK-53734)."
250263
)
264+
.version("4.1.0")
251265
.booleanConf
252266
.createWithDefault(true)
253267

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
-- Automatically generated by SQLQueryTestSuite
2+
-- !query
3+
DECLARE a = "aa"
4+
-- !query analysis
5+
CreateVariable defaultvalueexpression(aa, "aa"), false
6+
+- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.a
7+
8+
9+
-- !query
10+
SELECT 'a', a
11+
-- !query analysis
12+
Project [a AS a#x, variablereference(system.session.a='aa') AS a#x]
13+
+- OneRowRelation
14+
15+
16+
-- !query
17+
SELECT 'a' AS a, a
18+
-- !query analysis
19+
Project [a#x, a#x]
20+
+- Project [a AS a#x]
21+
+- OneRowRelation
22+
23+
24+
-- !query
25+
SELECT 'a', a FROM VALUES(1) AS t(a)
26+
-- !query analysis
27+
Project [a AS a#x, a#x]
28+
+- SubqueryAlias t
29+
+- LocalRelation [a#x]
30+
31+
32+
-- !query
33+
SELECT 'a' AS a, a FROM VALUES(1) AS t(a)
34+
-- !query analysis
35+
Project [a AS a#x, a#x]
36+
+- SubqueryAlias t
37+
+- LocalRelation [a#x]
38+
39+
40+
-- !query
41+
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1', col1)
42+
-- !query analysis
43+
Project [col1#x]
44+
+- Filter exists#x [col1#x]
45+
: +- Project [col1 AS col1#x, outer(col1#x)]
46+
: +- OneRowRelation
47+
+- LocalRelation [col1#x]
48+
49+
50+
-- !query
51+
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT col1 AS col1, col1)
52+
-- !query analysis
53+
Project [col1#x]
54+
+- Filter exists#x [col1#x]
55+
: +- Project [col1#x, col1#x]
56+
: +- Project [outer(col1#x) AS col1#x]
57+
: +- OneRowRelation
58+
+- LocalRelation [col1#x]
59+
60+
61+
-- !query
62+
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1' AS col1, col1)
63+
-- !query analysis
64+
Project [col1#x]
65+
+- Filter exists#x []
66+
: +- Project [col1#x, col1#x]
67+
: +- Project [col1 AS col1#x]
68+
: +- OneRowRelation
69+
+- LocalRelation [col1#x]
70+
71+
72+
-- !query
73+
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1', col1 FROM VALUES(1))
74+
-- !query analysis
75+
Project [col1#x]
76+
+- Filter exists#x []
77+
: +- Project [col1 AS col1#x, col1#x]
78+
: +- LocalRelation [col1#x]
79+
+- LocalRelation [col1#x]
80+
81+
82+
-- !query
83+
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT col1 AS col1, col1 FROM VALUES(1))
84+
-- !query analysis
85+
Project [col1#x]
86+
+- Filter exists#x []
87+
: +- Project [col1#x AS col1#x, col1#x]
88+
: +- LocalRelation [col1#x]
89+
+- LocalRelation [col1#x]
90+
91+
92+
-- !query
93+
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1' AS col1, col1 FROM VALUES(1))
94+
-- !query analysis
95+
Project [col1#x]
96+
+- Filter exists#x []
97+
: +- Project [col1 AS col1#x, col1#x]
98+
: +- LocalRelation [col1#x]
99+
+- LocalRelation [col1#x]
100+
101+
102+
-- !query
103+
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT 'col1', col1)
104+
-- !query analysis
105+
Filter exists#x [col1#x]
106+
: +- Project [col1 AS col1#x, outer(col1#x)]
107+
: +- OneRowRelation
108+
+- Aggregate [col1#x], [col1#x]
109+
+- LocalRelation [col1#x]
110+
111+
112+
-- !query
113+
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT col1 AS col1, col1)
114+
-- !query analysis
115+
Filter exists#x [col1#x]
116+
: +- Project [col1#x, col1#x]
117+
: +- Project [outer(col1#x) AS col1#x]
118+
: +- OneRowRelation
119+
+- Aggregate [col1#x], [col1#x]
120+
+- LocalRelation [col1#x]
121+
122+
123+
-- !query
124+
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT 'col1' AS col1, col1)
125+
-- !query analysis
126+
Filter exists#x []
127+
: +- Project [col1#x, col1#x]
128+
: +- Project [col1 AS col1#x]
129+
: +- OneRowRelation
130+
+- Aggregate [col1#x], [col1#x]
131+
+- LocalRelation [col1#x]
132+
133+
134+
-- !query
135+
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT 'col1', col1 FROM VALUES(1))
136+
-- !query analysis
137+
Filter exists#x []
138+
: +- Project [col1 AS col1#x, col1#x]
139+
: +- LocalRelation [col1#x]
140+
+- Aggregate [col1#x], [col1#x]
141+
+- LocalRelation [col1#x]
142+
143+
144+
-- !query
145+
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT col1 AS col1, col1 FROM VALUES(1))
146+
-- !query analysis
147+
Filter exists#x []
148+
: +- Project [col1#x AS col1#x, col1#x]
149+
: +- LocalRelation [col1#x]
150+
+- Aggregate [col1#x], [col1#x]
151+
+- LocalRelation [col1#x]
152+
153+
154+
-- !query
155+
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT 'col1' AS col1, col1 FROM VALUES(1))
156+
-- !query analysis
157+
Filter exists#x []
158+
: +- Project [col1 AS col1#x, col1#x]
159+
: +- LocalRelation [col1#x]
160+
+- Aggregate [col1#x], [col1#x]
161+
+- LocalRelation [col1#x]
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
-- Automatically generated by SQLQueryTestSuite
2+
-- !query
3+
DECLARE a = "aa"
4+
-- !query analysis
5+
CreateVariable defaultvalueexpression(aa, "aa"), false
6+
+- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.a
7+
8+
9+
-- !query
10+
SELECT 'a', a
11+
-- !query analysis
12+
Project [a#x, a#x]
13+
+- Project [a AS a#x]
14+
+- OneRowRelation
15+
16+
17+
-- !query
18+
SELECT 'a' AS a, a
19+
-- !query analysis
20+
Project [a#x, a#x]
21+
+- Project [a AS a#x]
22+
+- OneRowRelation
23+
24+
25+
-- !query
26+
SELECT 'a', a FROM VALUES(1) AS t(a)
27+
-- !query analysis
28+
Project [a AS a#x, a#x]
29+
+- SubqueryAlias t
30+
+- LocalRelation [a#x]
31+
32+
33+
-- !query
34+
SELECT 'a' AS a, a FROM VALUES(1) AS t(a)
35+
-- !query analysis
36+
Project [a AS a#x, a#x]
37+
+- SubqueryAlias t
38+
+- LocalRelation [a#x]
39+
40+
41+
-- !query
42+
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1', col1)
43+
-- !query analysis
44+
Project [col1#x]
45+
+- Filter exists#x []
46+
: +- Project [col1#x, col1#x]
47+
: +- Project [col1 AS col1#x]
48+
: +- OneRowRelation
49+
+- LocalRelation [col1#x]
50+
51+
52+
-- !query
53+
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT col1 AS col1, col1)
54+
-- !query analysis
55+
Project [col1#x]
56+
+- Filter exists#x [col1#x]
57+
: +- Project [col1#x, col1#x]
58+
: +- Project [outer(col1#x) AS col1#x]
59+
: +- OneRowRelation
60+
+- LocalRelation [col1#x]
61+
62+
63+
-- !query
64+
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1' AS col1, col1)
65+
-- !query analysis
66+
Project [col1#x]
67+
+- Filter exists#x []
68+
: +- Project [col1#x, col1#x]
69+
: +- Project [col1 AS col1#x]
70+
: +- OneRowRelation
71+
+- LocalRelation [col1#x]
72+
73+
74+
-- !query
75+
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1', col1 FROM VALUES(1))
76+
-- !query analysis
77+
Project [col1#x]
78+
+- Filter exists#x []
79+
: +- Project [col1 AS col1#x, col1#x]
80+
: +- LocalRelation [col1#x]
81+
+- LocalRelation [col1#x]
82+
83+
84+
-- !query
85+
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT col1 AS col1, col1 FROM VALUES(1))
86+
-- !query analysis
87+
Project [col1#x]
88+
+- Filter exists#x []
89+
: +- Project [col1#x AS col1#x, col1#x]
90+
: +- LocalRelation [col1#x]
91+
+- LocalRelation [col1#x]
92+
93+
94+
-- !query
95+
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1' AS col1, col1 FROM VALUES(1))
96+
-- !query analysis
97+
Project [col1#x]
98+
+- Filter exists#x []
99+
: +- Project [col1 AS col1#x, col1#x]
100+
: +- LocalRelation [col1#x]
101+
+- LocalRelation [col1#x]
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
-- This is a legacy test for last resort column resolution that has a correctness issue.
2+
-- For more information, see SPARK-53733.
3+
4+
--IMPORT column-last-resort-resolution-precedence.sql
5+
6+
--SET spark.sql.analyzer.delayLastResortColumnResolution = false
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
DECLARE a = "aa";
2+
3+
SELECT 'a', a;
4+
SELECT 'a' AS a, a;
5+
6+
SELECT 'a', a FROM VALUES(1) AS t(a);
7+
SELECT 'a' AS a, a FROM VALUES(1) AS t(a);
8+
9+
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1', col1);
10+
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT col1 AS col1, col1);
11+
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1' AS col1, col1);
12+
13+
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1', col1 FROM VALUES(1));
14+
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT col1 AS col1, col1 FROM VALUES(1));
15+
SELECT col1 FROM VALUES(1) WHERE EXISTS (SELECT 'col1' AS col1, col1 FROM VALUES(1));
16+
17+
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT 'col1', col1);
18+
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT col1 AS col1, col1);
19+
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT 'col1' AS col1, col1);
20+
21+
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT 'col1', col1 FROM VALUES(1));
22+
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT col1 AS col1, col1 FROM VALUES(1));
23+
SELECT col1 FROM VALUES(1) GROUP BY col1 HAVING EXISTS (SELECT 'col1' AS col1, col1 FROM VALUES(1));

0 commit comments

Comments
 (0)