Skip to content

Commit 847d6d4

Browse files
javierivanovmaropu
authored andcommitted
[SPARK-31102][SQL][3.0] Spark-sql fails to parse when contains comment
This PR introduces a change to false for the insideComment flag on a newline. Fixing the issue introduced by SPARK-30049. Backport to 3.0 from #27920 Previously on SPARK-30049 a comment containing an unclosed quote produced the following issue: ``` spark-sql> SELECT 1 -- someone's comment here > ; Error in query: extraneous input ';' expecting <EOF>(line 2, pos 0) == SQL == SELECT 1 -- someone's comment here ; ^^^ ``` This was caused because there was no flag for comment sections inside the splitSemiColon method to ignore quotes. SPARK-30049 added that flag and fixed the issue, but introduced the follwoing problem: ``` spark-sql> select > 1, > -- two > 2; Error in query: mismatched input '<EOF>' expecting {'(', 'ADD', 'AFTER', 'ALL', 'ALTER', ...}(line 3, pos 2) == SQL == select 1, --^^^ ``` This issue is generated by a missing turn-off for the insideComment flag with a newline. No - For previous tests using line-continuity(`\`) it was added a line-continuity rule in the SqlBase.g4 file to add the functionality to the SQL context. - A new test for inline comments was added. Closes #27920 from javierivanov/SPARK-31102. Authored-by: Javier Fuentes <j.fuentes.micloud.com> Signed-off-by: Wenchen Fan <wenchendatabricks.com> ### What changes were proposed in this pull request? ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? Closes #28565 from javierivanov/SPARK-3.0-31102. Authored-by: Javier Fuentes <j.fuentes.m@icloud.com> Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
1 parent f6053b9 commit 847d6d4

File tree

4 files changed

+28
-16
lines changed

4 files changed

+28
-16
lines changed

sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1784,7 +1784,7 @@ fragment LETTER
17841784
;
17851785

17861786
SIMPLE_COMMENT
1787-
: '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
1787+
: '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN)
17881788
;
17891789

17901790
BRACKETED_EMPTY_COMMENT

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,16 @@ class PlanParserSuite extends AnalysisTest {
5555
With(plan, ctes)
5656
}
5757

58+
test("single comment case one") {
59+
val plan = table("a").select(star())
60+
assertEqual("-- single comment\nSELECT * FROM a", plan)
61+
}
62+
63+
test("single comment case two") {
64+
val plan = table("a").select(star())
65+
assertEqual("-- single comment\\\nwith line continuity\nSELECT * FROM a", plan)
66+
}
67+
5868
test("case insensitive") {
5969
val plan = table("a").select(star())
6070
assertEqual("sELEct * FroM a", plan)

sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,6 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
524524
var insideComment = false
525525
var escape = false
526526
var beginIndex = 0
527-
var endIndex = line.length
528527
val ret = new JArrayList[String]
529528

530529
for (index <- 0 until line.length) {
@@ -552,8 +551,6 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
552551
} else if (hasNext && line.charAt(index + 1) == '-') {
553552
// ignore quotes and ;
554553
insideComment = true
555-
// ignore eol
556-
endIndex = index
557554
}
558555
} else if (line.charAt(index) == ';') {
559556
if (insideSingleQuote || insideDoubleQuote || insideComment) {
@@ -563,8 +560,11 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
563560
ret.add(line.substring(beginIndex, index))
564561
beginIndex = index + 1
565562
}
566-
} else {
567-
// nothing to do
563+
} else if (line.charAt(index) == '\n') {
564+
// with a new line the inline comment should end.
565+
if (!escape) {
566+
insideComment = false
567+
}
568568
}
569569
// set the escape
570570
if (escape) {
@@ -573,7 +573,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
573573
escape = true
574574
}
575575
}
576-
ret.add(line.substring(beginIndex, endIndex))
576+
ret.add(line.substring(beginIndex))
577577
ret
578578
}
579579
}

sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -508,18 +508,20 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
508508
)
509509
}
510510

511-
test("SPARK-30049 Should not complain for quotes in commented with multi-lines") {
511+
test("SPARK-31102 spark-sql fails to parse when contains comment") {
512512
runCliWithin(1.minute)(
513-
"""SELECT concat('test', 'comment') -- someone's comment here \\
514-
| comment continues here with single ' quote \\
515-
| extra ' \\
516-
|;""".stripMargin -> "testcomment"
513+
"""SELECT concat('test', 'comment'),
514+
| -- someone's comment here
515+
| 2;""".stripMargin -> "testcomment"
517516
)
517+
}
518+
519+
test("SPARK-30049 Should not complain for quotes in commented with multi-lines") {
518520
runCliWithin(1.minute)(
519-
"""SELECT concat('test', 'comment') -- someone's comment here \\
520-
| comment continues here with single ' quote \\
521-
| extra ' \\
522-
| ;""".stripMargin -> "testcomment"
521+
"""SELECT concat('test', 'comment') -- someone's comment here \
522+
| comment continues here with single ' quote \
523+
| extra ' \
524+
|;""".stripMargin -> "testcomment"
523525
)
524526
}
525527

0 commit comments

Comments
 (0)