Skip to content

Commit 9cc925c

Browse files
dilipbiswalcloud-fan
authored andcommitted
[SPARK-27209][SQL] Split parsing of SELECT and INSERT into two top-level rules in the grammar file.
## What changes were proposed in this pull request? Currently in the grammar file the rule `query` is responsible to parse both select and insert statements. As a result, we need to have more semantic checks in the code to guard against in-valid insert constructs in a query. Couple of examples are in the `visitCreateView` and `visitAlterView` functions. One other issue is that, we don't catch the `invalid insert constructs` in all the places until checkAnalysis (the errors we raise can be confusing as well). Here are couple of examples : ```SQL select * from (insert into bar values (2)); ``` ``` Error in query: unresolved operator 'Project [*]; 'Project [*] +- SubqueryAlias `__auto_generated_subquery_name` +- InsertIntoHiveTable `default`.`bar`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, false, false, [c1] +- Project [cast(col1#18 as int) AS c1#20] +- LocalRelation [col1#18] ``` ```SQL select * from foo where c1 in (insert into bar values (2)) ``` ``` Error in query: cannot resolve '(default.foo.`c1` IN (listquery()))' due to data type mismatch: The number of columns in the left hand side of an IN subquery does not match the number of columns in the output of subquery. #columns in left hand side: 1. #columns in right hand side: 0. Left side columns: [default.foo.`c1`]. Right side columns: [].;; 'Project [*] +- 'Filter c1#6 IN (list#5 []) : +- InsertIntoHiveTable `default`.`bar`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, false, false, [c1] : +- Project [cast(col1#7 as int) AS c1#9] : +- LocalRelation [col1#7] +- SubqueryAlias `default`.`foo` +- HiveTableRelation `default`.`foo`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [c1#6] ``` For both the cases above, we should reject the syntax at parser level. In this PR, we create two top-level parser rules to parse `SELECT` and `INSERT` respectively. I will create a small PR to allow CTEs in DESCRIBE QUERY after this PR is in. ## How was this patch tested? Added tests to PlanParserSuite and removed the semantic check tests from SparkSqlParserSuites. Closes #24150 from dilipbiswal/split-query-insert. Authored-by: Dilip Biswal <dbiswal@us.ibm.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent 300ec1a commit 9cc925c

File tree

5 files changed

+124
-64
lines changed

5 files changed

+124
-64
lines changed

sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ singleTableSchema
8181

8282
statement
8383
: query #statementDefault
84+
| insertStatement #insertStatementDefault
85+
| multiSelectStatement #multiSelectStatementDefault
8486
| USE db=identifier #use
8587
| CREATE database (IF NOT EXISTS)? identifier
8688
(COMMENT comment=STRING)? locationSpec?
@@ -358,9 +360,14 @@ resource
358360
: identifier STRING
359361
;
360362

363+
insertStatement
364+
: (ctes)? insertInto queryTerm queryOrganization #singleInsertQuery
365+
| (ctes)? fromClause multiInsertQueryBody+ #multiInsertQuery
366+
;
367+
361368
queryNoWith
362-
: insertInto? queryTerm queryOrganization #singleInsertQuery
363-
| fromClause multiInsertQueryBody+ #multiInsertQuery
369+
: queryTerm queryOrganization #noWithQuery
370+
| fromClause selectStatement #queryWithFrom
364371
;
365372

366373
queryOrganization
@@ -373,9 +380,15 @@ queryOrganization
373380
;
374381

375382
multiInsertQueryBody
376-
: insertInto?
377-
querySpecification
378-
queryOrganization
383+
: insertInto selectStatement
384+
;
385+
386+
multiSelectStatement
387+
: (ctes)? fromClause selectStatement+ #multiSelect
388+
;
389+
390+
selectStatement
391+
: querySpecification queryOrganization
379392
;
380393

381394
queryTerm

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 58 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -112,21 +112,36 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
112112
val query = plan(ctx.queryNoWith)
113113

114114
// Apply CTEs
115-
query.optional(ctx.ctes) {
116-
val ctes = ctx.ctes.namedQuery.asScala.map { nCtx =>
117-
val namedQuery = visitNamedQuery(nCtx)
118-
(namedQuery.alias, namedQuery)
119-
}
120-
// Check for duplicate names.
121-
checkDuplicateKeys(ctes, ctx)
122-
With(query, ctes)
115+
query.optionalMap(ctx.ctes)(withCTE)
116+
}
117+
118+
private def withCTE(ctx: CtesContext, plan: LogicalPlan): LogicalPlan = {
119+
val ctes = ctx.namedQuery.asScala.map { nCtx =>
120+
val namedQuery = visitNamedQuery(nCtx)
121+
(namedQuery.alias, namedQuery)
123122
}
123+
// Check for duplicate names.
124+
checkDuplicateKeys(ctes, ctx)
125+
With(plan, ctes)
124126
}
125127

126128
override def visitQueryToDesc(ctx: QueryToDescContext): LogicalPlan = withOrigin(ctx) {
127129
plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses)
128130
}
129131

132+
override def visitQueryWithFrom(ctx: QueryWithFromContext): LogicalPlan = withOrigin(ctx) {
133+
val from = visitFromClause(ctx.fromClause)
134+
validate(ctx.selectStatement.querySpecification.fromClause == null,
135+
"Individual select statement can not have FROM cause as its already specified in the" +
136+
" outer query block", ctx)
137+
withQuerySpecification(ctx.selectStatement.querySpecification, from).
138+
optionalMap(ctx.selectStatement.queryOrganization)(withQueryResultClauses)
139+
}
140+
141+
override def visitNoWithQuery(ctx: NoWithQueryContext): LogicalPlan = withOrigin(ctx) {
142+
plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses)
143+
}
144+
130145
/**
131146
* Create a named logical plan.
132147
*
@@ -156,36 +171,60 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
156171
val from = visitFromClause(ctx.fromClause)
157172

158173
// Build the insert clauses.
159-
val inserts = ctx.multiInsertQueryBody.asScala.map {
174+
val inserts = ctx.multiInsertQueryBody().asScala.map {
160175
body =>
161-
validate(body.querySpecification.fromClause == null,
176+
validate(body.selectStatement.querySpecification.fromClause == null,
162177
"Multi-Insert queries cannot have a FROM clause in their individual SELECT statements",
163178
body)
164179

180+
withInsertInto(body.insertInto,
181+
withQuerySpecification(body.selectStatement.querySpecification, from).
182+
// Add organization statements.
183+
optionalMap(body.selectStatement.queryOrganization)(withQueryResultClauses))
184+
}
185+
186+
// If there are multiple INSERTS just UNION them together into one query.
187+
val insertPlan = inserts match {
188+
case Seq(query) => query
189+
case queries => Union(queries)
190+
}
191+
// Apply CTEs
192+
insertPlan.optionalMap(ctx.ctes)(withCTE)
193+
}
194+
195+
override def visitMultiSelect(ctx: MultiSelectContext): LogicalPlan = withOrigin(ctx) {
196+
val from = visitFromClause(ctx.fromClause)
197+
198+
// Build the insert clauses.
199+
val selects = ctx.selectStatement.asScala.map {
200+
body =>
201+
validate(body.querySpecification.fromClause == null,
202+
"Multi-select queries cannot have a FROM clause in their individual SELECT statements",
203+
body)
204+
165205
withQuerySpecification(body.querySpecification, from).
166206
// Add organization statements.
167-
optionalMap(body.queryOrganization)(withQueryResultClauses).
168-
// Add insert.
169-
optionalMap(body.insertInto())(withInsertInto)
207+
optionalMap(body.queryOrganization)(withQueryResultClauses)
170208
}
171209

172210
// If there are multiple INSERTS just UNION them together into one query.
173-
inserts match {
211+
val selectUnionPlan = selects match {
174212
case Seq(query) => query
175213
case queries => Union(queries)
176214
}
215+
// Apply CTEs
216+
selectUnionPlan.optionalMap(ctx.ctes)(withCTE)
177217
}
178218

179219
/**
180220
* Create a logical plan for a regular (single-insert) query.
181221
*/
182222
override def visitSingleInsertQuery(
183223
ctx: SingleInsertQueryContext): LogicalPlan = withOrigin(ctx) {
184-
plan(ctx.queryTerm).
185-
// Add organization statements.
186-
optionalMap(ctx.queryOrganization)(withQueryResultClauses).
187-
// Add insert.
188-
optionalMap(ctx.insertInto())(withInsertInto)
224+
val insertPlan = withInsertInto(ctx.insertInto(),
225+
plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses))
226+
// Apply CTEs
227+
insertPlan.optionalMap(ctx.ctes)(withCTE)
189228
}
190229

191230
/**

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,11 @@ class PlanParserSuite extends AnalysisTest {
132132
table("a").select(star()).union(table("a").where('s < 10).select(star())))
133133
intercept(
134134
"from a select * select * from x where a.s < 10",
135-
"Multi-Insert queries cannot have a FROM clause in their individual SELECT statements")
135+
"Multi-select queries cannot have a FROM clause in their individual SELECT statements")
136+
intercept(
137+
"from a select * from b",
138+
"Individual select statement can not have FROM cause as its already specified in " +
139+
"the outer query block")
136140
assertEqual(
137141
"from a insert into tbl1 select * insert into tbl2 select * where s < 10",
138142
table("a").select(star()).insertInto("tbl1").union(
@@ -754,4 +758,47 @@ class PlanParserSuite extends AnalysisTest {
754758
assertEqual(query2, Distinct(a.union(b)).except(c.intersect(d, isAll = true), isAll = true))
755759
}
756760
}
761+
762+
test("create/alter view as insert into table") {
763+
val m1 = intercept[ParseException] {
764+
parsePlan("CREATE VIEW testView AS INSERT INTO jt VALUES(1, 1)")
765+
}.getMessage
766+
assert(m1.contains("mismatched input 'INSERT' expecting"))
767+
// Multi insert query
768+
val m2 = intercept[ParseException] {
769+
parsePlan(
770+
"""
771+
|CREATE VIEW testView AS FROM jt
772+
|INSERT INTO tbl1 SELECT * WHERE jt.id < 5
773+
|INSERT INTO tbl2 SELECT * WHERE jt.id > 4
774+
""".stripMargin)
775+
}.getMessage
776+
assert(m2.contains("mismatched input 'INSERT' expecting"))
777+
val m3 = intercept[ParseException] {
778+
parsePlan("ALTER VIEW testView AS INSERT INTO jt VALUES(1, 1)")
779+
}.getMessage
780+
assert(m3.contains("mismatched input 'INSERT' expecting"))
781+
// Multi insert query
782+
val m4 = intercept[ParseException] {
783+
parsePlan(
784+
"""
785+
|ALTER VIEW testView AS FROM jt
786+
|INSERT INTO tbl1 SELECT * WHERE jt.id < 5
787+
|INSERT INTO tbl2 SELECT * WHERE jt.id > 4
788+
""".stripMargin
789+
)
790+
}.getMessage
791+
assert(m4.contains("mismatched input 'INSERT' expecting"))
792+
}
793+
794+
test("Invalid insert constructs in the query") {
795+
val m1 = intercept[ParseException] {
796+
parsePlan("SELECT * FROM (INSERT INTO BAR VALUES (2))")
797+
}.getMessage
798+
assert(m1.contains("mismatched input 'FROM' expecting"))
799+
val m2 = intercept[ParseException] {
800+
parsePlan("SELECT * FROM S WHERE C1 IN (INSERT INTO T VALUES (2))")
801+
}.getMessage
802+
assert(m2.contains("mismatched input 'FROM' expecting"))
803+
}
757804
}

sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1257,15 +1257,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
12571257
if (ctx.identifierList != null) {
12581258
operationNotAllowed("CREATE VIEW ... PARTITIONED ON", ctx)
12591259
} else {
1260-
// CREATE VIEW ... AS INSERT INTO is not allowed.
1261-
ctx.query.queryNoWith match {
1262-
case s: SingleInsertQueryContext if s.insertInto != null =>
1263-
operationNotAllowed("CREATE VIEW ... AS INSERT INTO", ctx)
1264-
case _: MultiInsertQueryContext =>
1265-
operationNotAllowed("CREATE VIEW ... AS FROM ... [INSERT INTO ...]+", ctx)
1266-
case _ => // OK
1267-
}
1268-
12691260
val userSpecifiedColumns = Option(ctx.identifierCommentList).toSeq.flatMap { icl =>
12701261
icl.identifierComment.asScala.map { ic =>
12711262
ic.identifier.getText -> Option(ic.STRING).map(string)
@@ -1302,14 +1293,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
13021293
* }}}
13031294
*/
13041295
override def visitAlterViewQuery(ctx: AlterViewQueryContext): LogicalPlan = withOrigin(ctx) {
1305-
// ALTER VIEW ... AS INSERT INTO is not allowed.
1306-
ctx.query.queryNoWith match {
1307-
case s: SingleInsertQueryContext if s.insertInto != null =>
1308-
operationNotAllowed("ALTER VIEW ... AS INSERT INTO", ctx)
1309-
case _: MultiInsertQueryContext =>
1310-
operationNotAllowed("ALTER VIEW ... AS FROM ... [INSERT INTO ...]+", ctx)
1311-
case _ => // OK
1312-
}
13131296
AlterViewAsCommand(
13141297
name = visitTableIdentifier(ctx.tableIdentifier),
13151298
originalText = source(ctx.query),

sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -215,17 +215,6 @@ class SparkSqlParserSuite extends AnalysisTest {
215215
"no viable alternative at input")
216216
}
217217

218-
test("create view as insert into table") {
219-
// Single insert query
220-
intercept("CREATE VIEW testView AS INSERT INTO jt VALUES(1, 1)",
221-
"Operation not allowed: CREATE VIEW ... AS INSERT INTO")
222-
223-
// Multi insert query
224-
intercept("CREATE VIEW testView AS FROM jt INSERT INTO tbl1 SELECT * WHERE jt.id < 5 " +
225-
"INSERT INTO tbl2 SELECT * WHERE jt.id > 4",
226-
"Operation not allowed: CREATE VIEW ... AS FROM ... [INSERT INTO ...]+")
227-
}
228-
229218
test("SPARK-17328 Fix NPE with EXPLAIN DESCRIBE TABLE") {
230219
assertEqual("describe t",
231220
DescribeTableCommand(TableIdentifier("t"), Map.empty, isExtended = false))
@@ -369,17 +358,6 @@ class SparkSqlParserSuite extends AnalysisTest {
369358
Project(UnresolvedAlias(concat) :: Nil, UnresolvedRelation(TableIdentifier("t"))))
370359
}
371360

372-
test("SPARK-25046 Fix Alter View ... As Insert Into Table") {
373-
// Single insert query
374-
intercept("ALTER VIEW testView AS INSERT INTO jt VALUES(1, 1)",
375-
"Operation not allowed: ALTER VIEW ... AS INSERT INTO")
376-
377-
// Multi insert query
378-
intercept("ALTER VIEW testView AS FROM jt INSERT INTO tbl1 SELECT * WHERE jt.id < 5 " +
379-
"INSERT INTO tbl2 SELECT * WHERE jt.id > 4",
380-
"Operation not allowed: ALTER VIEW ... AS FROM ... [INSERT INTO ...]+")
381-
}
382-
383361
test("database and schema tokens are interchangeable") {
384362
assertEqual("CREATE DATABASE foo", parser.parsePlan("CREATE SCHEMA foo"))
385363
assertEqual("DROP DATABASE foo", parser.parsePlan("DROP SCHEMA foo"))

0 commit comments

Comments
 (0)