Skip to content

Commit 1472cac

Browse files
gatorsmilecloud-fan
authored andcommitted
[SPARK-19830][SQL] Add parseTableSchema API to ParserInterface
### What changes were proposed in this pull request? Specifying the table schema in DDL formats is needed for different scenarios. For example, - [specifying the schema in SQL function `from_json` using DDL formats](https://issues.apache.org/jira/browse/SPARK-19637), which is suggested by marmbrus , - [specifying the customized JDBC data types](apache#16209). These two PRs need users to use the JSON format to specify the table schema. This is not user friendly. This PR is to provide a `parseTableSchema` API in `ParserInterface`. ### How was this patch tested? Added a test suite `TableSchemaParserSuite` Author: Xiao Li <gatorsmile@gmail.com> Closes apache#17171 from gatorsmile/parseDDLStmt.
1 parent 21f333c commit 1472cac

File tree

3 files changed

+104
-1
lines changed

3 files changed

+104
-1
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
2626
import org.apache.spark.sql.catalyst.expressions.Expression
2727
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
2828
import org.apache.spark.sql.catalyst.trees.Origin
29-
import org.apache.spark.sql.types.DataType
29+
import org.apache.spark.sql.types.{DataType, StructType}
3030

3131
/**
3232
* Base SQL parsing infrastructure.
@@ -49,6 +49,14 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
4949
astBuilder.visitSingleTableIdentifier(parser.singleTableIdentifier())
5050
}
5151

52+
/**
53+
* Creates StructType for a given SQL string, which is a comma separated list of field
54+
* definitions which will preserve the correct Hive metadata.
55+
*/
56+
override def parseTableSchema(sqlText: String): StructType = parse(sqlText) { parser =>
57+
StructType(astBuilder.visitColTypeList(parser.colTypeList()))
58+
}
59+
5260
/** Creates LogicalPlan for a given SQL string. */
5361
override def parsePlan(sqlText: String): LogicalPlan = parse(sqlText) { parser =>
5462
astBuilder.visitSingleStatement(parser.singleStatement()) match {

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.parser
2020
import org.apache.spark.sql.catalyst.TableIdentifier
2121
import org.apache.spark.sql.catalyst.expressions.Expression
2222
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
23+
import org.apache.spark.sql.types.StructType
2324

2425
/**
2526
* Interface for a parser.
@@ -33,4 +34,10 @@ trait ParserInterface {
3334

3435
/** Creates TableIdentifier for a given SQL string. */
3536
def parseTableIdentifier(sqlText: String): TableIdentifier
37+
38+
/**
39+
* Creates StructType for a given SQL string, which is a comma separated list of field
40+
* definitions which will preserve the correct Hive metadata.
41+
*/
42+
def parseTableSchema(sqlText: String): StructType
3643
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.catalyst.parser
19+
20+
import org.apache.spark.SparkFunSuite
21+
import org.apache.spark.sql.types._
22+
23+
class TableSchemaParserSuite extends SparkFunSuite {
24+
25+
def parse(sql: String): StructType = CatalystSqlParser.parseTableSchema(sql)
26+
27+
def checkTableSchema(tableSchemaString: String, expectedDataType: DataType): Unit = {
28+
test(s"parse $tableSchemaString") {
29+
assert(parse(tableSchemaString) === expectedDataType)
30+
}
31+
}
32+
33+
def assertError(sql: String): Unit =
34+
intercept[ParseException](CatalystSqlParser.parseTableSchema(sql))
35+
36+
checkTableSchema("a int", new StructType().add("a", "int"))
37+
checkTableSchema("A int", new StructType().add("A", "int"))
38+
checkTableSchema("a INT", new StructType().add("a", "int"))
39+
checkTableSchema("`!@#$%.^&*()` string", new StructType().add("!@#$%.^&*()", "string"))
40+
checkTableSchema("a int, b long", new StructType().add("a", "int").add("b", "long"))
41+
checkTableSchema("a STRUCT<intType: int, ts:timestamp>",
42+
StructType(
43+
StructField("a", StructType(
44+
StructField("intType", IntegerType) ::
45+
StructField("ts", TimestampType) :: Nil)) :: Nil))
46+
checkTableSchema(
47+
"a int comment 'test'",
48+
new StructType().add("a", "int", nullable = true, "test"))
49+
50+
test("complex hive type") {
51+
val tableSchemaString =
52+
"""
53+
|complexStructCol struct<
54+
|struct:struct<deciMal:DECimal, anotherDecimal:decimAL(5,2)>,
55+
|MAP:Map<timestamp, varchar(10)>,
56+
|arrAy:Array<double>,
57+
|anotherArray:Array<char(9)>>
58+
""".stripMargin.replace("\n", "")
59+
60+
val builder = new MetadataBuilder
61+
builder.putString(HIVE_TYPE_STRING,
62+
"struct<struct:struct<deciMal:decimal(10,0),anotherDecimal:decimal(5,2)>," +
63+
"MAP:map<timestamp,varchar(10)>,arrAy:array<double>,anotherArray:array<char(9)>>")
64+
65+
val expectedDataType =
66+
StructType(
67+
StructField("complexStructCol", StructType(
68+
StructField("struct",
69+
StructType(
70+
StructField("deciMal", DecimalType.USER_DEFAULT) ::
71+
StructField("anotherDecimal", DecimalType(5, 2)) :: Nil)) ::
72+
StructField("MAP", MapType(TimestampType, StringType)) ::
73+
StructField("arrAy", ArrayType(DoubleType)) ::
74+
StructField("anotherArray", ArrayType(StringType)) :: Nil),
75+
nullable = true,
76+
builder.build()) :: Nil)
77+
78+
assert(parse(tableSchemaString) === expectedDataType)
79+
}
80+
81+
// Negative cases
82+
assertError("")
83+
assertError("a")
84+
assertError("a INT b long")
85+
assertError("a INT,, b long")
86+
assertError("a INT, b long,,")
87+
assertError("a INT, b long, c int,")
88+
}

0 commit comments

Comments
 (0)