Skip to content

Commit 55a1daa

Browse files
committed
[SPARK-35070][SQL] TRANSFORM not support alias in inputs
1 parent b5241c9 commit 55a1daa

File tree

4 files changed

+118
-59
lines changed

4 files changed

+118
-59
lines changed

sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -524,9 +524,9 @@ querySpecification
524524
;
525525

526526
transformClause
527-
: (SELECT kind=TRANSFORM '(' namedExpressionSeq ')'
528-
| kind=MAP namedExpressionSeq
529-
| kind=REDUCE namedExpressionSeq)
527+
: (SELECT kind=TRANSFORM '(' expressionSeq ')'
528+
| kind=MAP expressionSeq
529+
| kind=REDUCE expressionSeq)
530530
inRowFormat=rowFormat?
531531
(RECORDWRITER recordWriter=STRING)?
532532
USING script=STRING
@@ -774,6 +774,10 @@ expression
774774
: booleanExpression
775775
;
776776

777+
expressionSeq
778+
: expression (',' expression)*
779+
;
780+
777781
booleanExpression
778782
: NOT booleanExpression #logicalNot
779783
| EXISTS '(' query ')' #exists

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,13 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
627627
.map(typedVisit[Expression])
628628
}
629629

630+
override def visitExpressionSeq(
631+
ctx: ExpressionSeqContext): Seq[Expression] = {
632+
Option(ctx).toSeq
633+
.flatMap(_.expression.asScala)
634+
.map(typedVisit[Expression])
635+
}
636+
630637
/**
631638
* Create a logical plan using a having clause.
632639
*/
@@ -677,8 +684,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
677684

678685
val plan = visitCommonSelectQueryClausePlan(
679686
relation,
687+
visitExpressionSeq(transformClause.expressionSeq),
680688
lateralView,
681-
transformClause.namedExpressionSeq,
682689
whereClause,
683690
aggregationClause,
684691
havingClause,
@@ -723,8 +730,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
723730

724731
val plan = visitCommonSelectQueryClausePlan(
725732
relation,
733+
visitNamedExpressionSeq(selectClause.namedExpressionSeq),
726734
lateralView,
727-
selectClause.namedExpressionSeq,
728735
whereClause,
729736
aggregationClause,
730737
havingClause,
@@ -737,8 +744,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
737744

738745
def visitCommonSelectQueryClausePlan(
739746
relation: LogicalPlan,
747+
expressions: Seq[Expression],
740748
lateralView: java.util.List[LateralViewContext],
741-
namedExpressionSeq: NamedExpressionSeqContext,
742749
whereClause: WhereClauseContext,
743750
aggregationClause: AggregationClauseContext,
744751
havingClause: HavingClauseContext,
@@ -750,8 +757,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
750757
// Add where.
751758
val withFilter = withLateralView.optionalMap(whereClause)(withWhereClause)
752759

753-
val expressions = visitNamedExpressionSeq(namedExpressionSeq)
754-
755760
// Add aggregation or a project.
756761
val namedExpressions = expressions.map {
757762
case e: NamedExpression => e

sql/core/src/test/resources/sql-tests/inputs/transform.sql

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ FROM script_trans
206206
LIMIT 1;
207207

208208
SELECT TRANSFORM(
209-
b AS d5, a,
209+
b, a,
210210
CASE
211211
WHEN c > 100 THEN 1
212212
WHEN c < 100 THEN 2
@@ -225,45 +225,45 @@ SELECT TRANSFORM(*)
225225
FROM script_trans
226226
WHERE a <= 4;
227227

228-
SELECT TRANSFORM(b AS d, MAX(a) as max_a, CAST(SUM(c) AS STRING))
228+
SELECT TRANSFORM(b, MAX(a), CAST(SUM(c) AS STRING))
229229
USING 'cat' AS (a, b, c)
230230
FROM script_trans
231231
WHERE a <= 4
232232
GROUP BY b;
233233

234-
SELECT TRANSFORM(b AS d, MAX(a) FILTER (WHERE a > 3) AS max_a, CAST(SUM(c) AS STRING))
234+
SELECT TRANSFORM(b, MAX(a) FILTER (WHERE a > 3), CAST(SUM(c) AS STRING))
235235
USING 'cat' AS (a,b,c)
236236
FROM script_trans
237237
WHERE a <= 4
238238
GROUP BY b;
239239

240-
SELECT TRANSFORM(b, MAX(a) as max_a, CAST(sum(c) AS STRING))
240+
SELECT TRANSFORM(b, MAX(a), CAST(sum(c) AS STRING))
241241
USING 'cat' AS (a, b, c)
242242
FROM script_trans
243243
WHERE a <= 2
244244
GROUP BY b;
245245

246-
SELECT TRANSFORM(b, MAX(a) as max_a, CAST(SUM(c) AS STRING))
246+
SELECT TRANSFORM(b, MAX(a), CAST(SUM(c) AS STRING))
247247
USING 'cat' AS (a, b, c)
248248
FROM script_trans
249249
WHERE a <= 4
250250
GROUP BY b
251-
HAVING max_a > 0;
251+
HAVING MAX(a) > 0;
252252

253-
SELECT TRANSFORM(b, MAX(a) as max_a, CAST(SUM(c) AS STRING))
253+
SELECT TRANSFORM(b, MAX(a), CAST(SUM(c) AS STRING))
254254
USING 'cat' AS (a, b, c)
255255
FROM script_trans
256256
WHERE a <= 4
257257
GROUP BY b
258-
HAVING max(a) > 1;
258+
HAVING MAX(a) > 1;
259259

260-
SELECT TRANSFORM(b, MAX(a) OVER w as max_a, CAST(SUM(c) OVER w AS STRING))
260+
SELECT TRANSFORM(b, MAX(a) OVER w, CAST(SUM(c) OVER w AS STRING))
261261
USING 'cat' AS (a, b, c)
262262
FROM script_trans
263263
WHERE a <= 4
264264
WINDOW w AS (PARTITION BY b ORDER BY a);
265265

266-
SELECT TRANSFORM(b, MAX(a) as max_a, CAST(SUM(c) AS STRING), myCol, myCol2)
266+
SELECT TRANSFORM(b, MAX(a), CAST(SUM(c) AS STRING), myCol, myCol2)
267267
USING 'cat' AS (a, b, c, d, e)
268268
FROM script_trans
269269
LATERAL VIEW explode(array(array(1,2,3))) myTable AS myCol
@@ -280,7 +280,7 @@ FROM(
280280
SELECT a + 1;
281281

282282
FROM(
283-
SELECT TRANSFORM(a, SUM(b) b)
283+
SELECT TRANSFORM(a, SUM(b))
284284
USING 'cat' AS (`a` INT, b STRING)
285285
FROM script_trans
286286
GROUP BY a
@@ -308,16 +308,27 @@ HAVING true;
308308

309309
SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false;
310310

311-
SET spark.sql.parser.quotedRegexColumnNames=true;
312-
313-
SELECT TRANSFORM(`(a|b)?+.+`)
314-
USING 'cat' AS (c)
315-
FROM script_trans;
316-
317-
SET spark.sql.parser.quotedRegexColumnNames=false;
318-
319311
-- SPARK-34634: self join using CTE contains transform
320312
WITH temp AS (
321313
SELECT TRANSFORM(a) USING 'cat' AS (b string) FROM t
322314
)
323-
SELECT t1.b FROM temp t1 JOIN temp t2 ON t1.b = t2.b
315+
SELECT t1.b FROM temp t1 JOIN temp t2 ON t1.b = t2.b;
316+
317+
-- SPARK-35070: TRANSFORM not support alias in inputs
318+
SELECT TRANSFORM(b AS b_1, MAX(a), CAST(sum(c) AS STRING))
319+
USING 'cat' AS (a, b, c)
320+
FROM script_trans
321+
WHERE a <= 2
322+
GROUP BY b;
323+
324+
SELECT TRANSFORM(b b_1, MAX(a), CAST(sum(c) AS STRING))
325+
USING 'cat' AS (a, b, c)
326+
FROM script_trans
327+
WHERE a <= 2
328+
GROUP BY b;
329+
330+
SELECT TRANSFORM(b, MAX(a) AS max_a, CAST(sum(c) AS STRING))
331+
USING 'cat' AS (a, b, c)
332+
FROM script_trans
333+
WHERE a <= 2
334+
GROUP BY b;

sql/core/src/test/resources/sql-tests/results/transform.sql.out

Lines changed: 70 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ struct<a:int,b:int>
376376

377377
-- !query
378378
SELECT TRANSFORM(
379-
b AS d5, a,
379+
b, a,
380380
CASE
381381
WHEN c > 100 THEN 1
382382
WHEN c < 100 THEN 2
@@ -416,7 +416,7 @@ struct<a:string,b:string,c:string>
416416

417417

418418
-- !query
419-
SELECT TRANSFORM(b AS d, MAX(a) as max_a, CAST(SUM(c) AS STRING))
419+
SELECT TRANSFORM(b, MAX(a), CAST(SUM(c) AS STRING))
420420
USING 'cat' AS (a, b, c)
421421
FROM script_trans
422422
WHERE a <= 4
@@ -429,7 +429,7 @@ struct<a:string,b:string,c:string>
429429

430430

431431
-- !query
432-
SELECT TRANSFORM(b AS d, MAX(a) FILTER (WHERE a > 3) AS max_a, CAST(SUM(c) AS STRING))
432+
SELECT TRANSFORM(b, MAX(a) FILTER (WHERE a > 3), CAST(SUM(c) AS STRING))
433433
USING 'cat' AS (a,b,c)
434434
FROM script_trans
435435
WHERE a <= 4
@@ -442,7 +442,7 @@ struct<a:string,b:string,c:string>
442442

443443

444444
-- !query
445-
SELECT TRANSFORM(b, MAX(a) as max_a, CAST(sum(c) AS STRING))
445+
SELECT TRANSFORM(b, MAX(a), CAST(sum(c) AS STRING))
446446
USING 'cat' AS (a, b, c)
447447
FROM script_trans
448448
WHERE a <= 2
@@ -454,12 +454,12 @@ struct<a:string,b:string,c:string>
454454

455455

456456
-- !query
457-
SELECT TRANSFORM(b, MAX(a) as max_a, CAST(SUM(c) AS STRING))
457+
SELECT TRANSFORM(b, MAX(a), CAST(SUM(c) AS STRING))
458458
USING 'cat' AS (a, b, c)
459459
FROM script_trans
460460
WHERE a <= 4
461461
GROUP BY b
462-
HAVING max_a > 0
462+
HAVING MAX(a) > 0
463463
-- !query schema
464464
struct<a:string,b:string,c:string>
465465
-- !query output
@@ -468,20 +468,20 @@ struct<a:string,b:string,c:string>
468468

469469

470470
-- !query
471-
SELECT TRANSFORM(b, MAX(a) as max_a, CAST(SUM(c) AS STRING))
471+
SELECT TRANSFORM(b, MAX(a), CAST(SUM(c) AS STRING))
472472
USING 'cat' AS (a, b, c)
473473
FROM script_trans
474474
WHERE a <= 4
475475
GROUP BY b
476-
HAVING max(a) > 1
476+
HAVING MAX(a) > 1
477477
-- !query schema
478478
struct<a:string,b:string,c:string>
479479
-- !query output
480480
5 4 6
481481

482482

483483
-- !query
484-
SELECT TRANSFORM(b, MAX(a) OVER w as max_a, CAST(SUM(c) OVER w AS STRING))
484+
SELECT TRANSFORM(b, MAX(a) OVER w, CAST(SUM(c) OVER w AS STRING))
485485
USING 'cat' AS (a, b, c)
486486
FROM script_trans
487487
WHERE a <= 4
@@ -494,7 +494,7 @@ struct<a:string,b:string,c:string>
494494

495495

496496
-- !query
497-
SELECT TRANSFORM(b, MAX(a) as max_a, CAST(SUM(c) AS STRING), myCol, myCol2)
497+
SELECT TRANSFORM(b, MAX(a), CAST(SUM(c) AS STRING), myCol, myCol2)
498498
USING 'cat' AS (a, b, c, d, e)
499499
FROM script_trans
500500
LATERAL VIEW explode(array(array(1,2,3))) myTable AS myCol
@@ -527,7 +527,7 @@ struct<(a + 1):int>
527527

528528
-- !query
529529
FROM(
530-
SELECT TRANSFORM(a, SUM(b) b)
530+
SELECT TRANSFORM(a, SUM(b))
531531
USING 'cat' AS (`a` INT, b STRING)
532532
FROM script_trans
533533
GROUP BY a
@@ -601,40 +601,79 @@ spark.sql.legacy.parser.havingWithoutGroupByAsWhere false
601601

602602

603603
-- !query
604-
SET spark.sql.parser.quotedRegexColumnNames=true
604+
WITH temp AS (
605+
SELECT TRANSFORM(a) USING 'cat' AS (b string) FROM t
606+
)
607+
SELECT t1.b FROM temp t1 JOIN temp t2 ON t1.b = t2.b
605608
-- !query schema
606-
struct<key:string,value:string>
609+
struct<b:string>
607610
-- !query output
608-
spark.sql.parser.quotedRegexColumnNames true
611+
1
612+
2
613+
3
609614

610615

611616
-- !query
612-
SELECT TRANSFORM(`(a|b)?+.+`)
613-
USING 'cat' AS (c)
617+
SELECT TRANSFORM(b AS b_1, MAX(a), CAST(sum(c) AS STRING))
618+
USING 'cat' AS (a, b, c)
614619
FROM script_trans
620+
WHERE a <= 2
621+
GROUP BY b
615622
-- !query schema
616-
struct<c:string>
623+
struct<>
617624
-- !query output
618-
3
619-
6
620-
9
625+
org.apache.spark.sql.catalyst.parser.ParseException
626+
627+
no viable alternative at input 'SELECT TRANSFORM(b AS'(line 1, pos 19)
628+
629+
== SQL ==
630+
SELECT TRANSFORM(b AS b_1, MAX(a), CAST(sum(c) AS STRING))
631+
-------------------^^^
632+
USING 'cat' AS (a, b, c)
633+
FROM script_trans
634+
WHERE a <= 2
635+
GROUP BY b
621636

622637

623638
-- !query
624-
SET spark.sql.parser.quotedRegexColumnNames=false
639+
SELECT TRANSFORM(b b_1, MAX(a), CAST(sum(c) AS STRING))
640+
USING 'cat' AS (a, b, c)
641+
FROM script_trans
642+
WHERE a <= 2
643+
GROUP BY b
625644
-- !query schema
626-
struct<key:string,value:string>
645+
struct<>
627646
-- !query output
628-
spark.sql.parser.quotedRegexColumnNames false
647+
org.apache.spark.sql.catalyst.parser.ParseException
648+
649+
no viable alternative at input 'SELECT TRANSFORM(b b_1'(line 1, pos 19)
650+
651+
== SQL ==
652+
SELECT TRANSFORM(b b_1, MAX(a), CAST(sum(c) AS STRING))
653+
-------------------^^^
654+
USING 'cat' AS (a, b, c)
655+
FROM script_trans
656+
WHERE a <= 2
657+
GROUP BY b
658+
629659

630660
-- !query
631-
WITH temp AS (
632-
SELECT TRANSFORM(a) USING 'cat' AS (b string) FROM t
633-
)
634-
SELECT t1.b FROM temp t1 JOIN temp t2 ON t1.b = t2.b
661+
SELECT TRANSFORM(b, MAX(a) AS max_a, CAST(sum(c) AS STRING))
662+
USING 'cat' AS (a, b, c)
663+
FROM script_trans
664+
WHERE a <= 2
665+
GROUP BY b
635666
-- !query schema
636-
struct<b:string>
667+
struct<>
637668
-- !query output
638-
1
639-
2
640-
3
669+
org.apache.spark.sql.catalyst.parser.ParseException
670+
671+
no viable alternative at input 'SELECT TRANSFORM(b, MAX(a) AS'(line 1, pos 27)
672+
673+
== SQL ==
674+
SELECT TRANSFORM(b, MAX(a) AS max_a, CAST(sum(c) AS STRING))
675+
---------------------------^^^
676+
USING 'cat' AS (a, b, c)
677+
FROM script_trans
678+
WHERE a <= 2
679+
GROUP BY b

0 commit comments

Comments
 (0)