@@ -19,10 +19,9 @@ package org.apache.spark.sql.sources
19
19
20
20
import scala .language .implicitConversions
21
21
import scala .util .parsing .combinator .syntactical .StandardTokenParsers
22
- import scala .util .parsing .combinator .{ RegexParsers , PackratParsers }
22
+ import scala .util .parsing .combinator .PackratParsers
23
23
24
24
import org .apache .spark .Logging
25
- import org .apache .spark .annotation .DeveloperApi
26
25
import org .apache .spark .sql .SQLContext
27
26
import org .apache .spark .sql .catalyst .types ._
28
27
import org .apache .spark .sql .execution .RunnableCommand
@@ -44,18 +43,43 @@ private[sql] class DDLParser extends StandardTokenParsers with PackratParsers wi
44
43
}
45
44
}
46
45
46
+ def parseType (input : String ): DataType = {
47
+ phrase(dataType)(new lexical.Scanner (input)) match {
48
+ case Success (r, x) => r
49
+ case x =>
50
+ sys.error(s " Unsupported dataType: $x" )
51
+ }
52
+ }
53
+
47
54
protected case class Keyword (str : String )
48
55
49
56
protected implicit def asParser (k : Keyword ): Parser [String ] =
50
57
lexical.allCaseVersions(k.str).map(x => x : Parser [String ]).reduce(_ | _)
51
58
52
59
protected val CREATE = Keyword (" CREATE" )
53
- protected val DECIMAL = Keyword (" DECIMAL" )
54
60
protected val TEMPORARY = Keyword (" TEMPORARY" )
55
61
protected val TABLE = Keyword (" TABLE" )
56
62
protected val USING = Keyword (" USING" )
57
63
protected val OPTIONS = Keyword (" OPTIONS" )
58
64
65
+ // Data types.
66
+ protected val STRING = Keyword (" STRING" )
67
+ protected val FLOAT = Keyword (" FLOAT" )
68
+ protected val INT = Keyword (" INT" )
69
+ protected val TINYINT = Keyword (" TINYINT" )
70
+ protected val SMALLINT = Keyword (" SMALLINT" )
71
+ protected val DOUBLE = Keyword (" DOUBLE" )
72
+ protected val BIGINT = Keyword (" BIGINT" )
73
+ protected val BINARY = Keyword (" BINARY" )
74
+ protected val BOOLEAN = Keyword (" BOOLEAN" )
75
+ protected val DECIMAL = Keyword (" DECIMAL" )
76
+ protected val DATE = Keyword (" DATE" )
77
+ protected val TIMESTAMP = Keyword (" TIMESTAMP" )
78
+ protected val VARCHAR = Keyword (" VARCHAR" )
79
+ protected val ARRAY = Keyword (" ARRAY" )
80
+ protected val MAP = Keyword (" MAP" )
81
+ protected val STRUCT = Keyword (" STRUCT" )
82
+
59
83
// Use reflection to find the reserved words defined in this class.
60
84
protected val reservedWords =
61
85
this .getClass
@@ -77,20 +101,15 @@ private[sql] class DDLParser extends StandardTokenParsers with PackratParsers wi
77
101
* OPTIONS (path "../hive/src/test/resources/data/files/episodes.avro")`
78
102
*/
79
103
protected lazy val createTable : Parser [LogicalPlan ] =
80
- ( CREATE ~ TEMPORARY ~ TABLE ~> ident ~ (USING ~> className) ~ (OPTIONS ~> options) ^^ {
81
- case tableName ~ provider ~ opts =>
82
- CreateTableUsing (tableName, Seq .empty, provider, opts)
83
- }
84
- |
104
+ (
85
105
CREATE ~ TEMPORARY ~ TABLE ~> ident
86
- ~ tableCols ~ (USING ~> className) ~ (OPTIONS ~> options) ^^ {
87
- case tableName ~ tableColumns ~ provider ~ opts =>
88
- CreateTableUsing (tableName, tableColumns, provider, opts)
106
+ ~ (tableCols).? ~ (USING ~> className) ~ (OPTIONS ~> options) ^^ {
107
+ case tableName ~ columns ~ provider ~ opts =>
108
+ val tblColumns = if (columns.isEmpty) Seq .empty else columns.get
109
+ CreateTableUsing (tableName, tblColumns, provider, opts)
89
110
}
90
111
)
91
112
92
- protected lazy val metastoreTypes = new MetastoreTypes
93
-
94
113
protected lazy val tableCols : Parser [Seq [StructField ]] = " (" ~> repsep(column, " ," ) <~ " )"
95
114
96
115
protected lazy val options : Parser [Map [String , String ]] =
@@ -101,96 +120,62 @@ private[sql] class DDLParser extends StandardTokenParsers with PackratParsers wi
101
120
protected lazy val pair : Parser [(String , String )] = ident ~ stringLit ^^ { case k ~ v => (k,v) }
102
121
103
122
protected lazy val column : Parser [StructField ] =
104
- ( ident ~ ident ^^ { case name ~ typ =>
105
- StructField (name, metastoreTypes.toDataType( typ) )
123
+ ident ~ dataType ^^ { case columnName ~ typ =>
124
+ StructField (cleanIdentifier(columnName), typ)
106
125
}
107
- |
108
- ident ~ (DECIMAL ~ " (" ~> numericLit) ~ (" ," ~> numericLit <~ " )" ) ^^ {
109
- case name ~ precision ~ scale =>
110
- StructField (name, DecimalType (precision.toInt, scale.toInt))
111
- }
112
- )
113
- }
114
126
115
- /**
116
- * :: DeveloperApi ::
117
- * Provides a parser for data types.
118
- */
119
- @ DeveloperApi
120
- private [sql] class MetastoreTypes extends RegexParsers {
121
127
protected lazy val primitiveType : Parser [DataType ] =
122
- " string " ^^^ StringType |
123
- " float " ^^^ FloatType |
124
- " int " ^^^ IntegerType |
125
- " tinyint " ^^^ ByteType |
126
- " smallint " ^^^ ShortType |
127
- " double " ^^^ DoubleType |
128
- " bigint " ^^^ LongType |
129
- " binary " ^^^ BinaryType |
130
- " boolean " ^^^ BooleanType |
131
- fixedDecimalType | // decimal with precision/scale
132
- " decimal " ^^^ DecimalType .Unlimited | // decimal with no precision/scale
133
- " date " ^^^ DateType |
134
- " timestamp " ^^^ TimestampType |
135
- " varchar \\ (( \\ d+) \\ ) " .r ^^^ StringType
128
+ STRING ^^^ StringType |
129
+ BINARY ^^^ BinaryType |
130
+ BOOLEAN ^^^ BooleanType |
131
+ TINYINT ^^^ ByteType |
132
+ SMALLINT ^^^ ShortType |
133
+ INT ^^^ IntegerType |
134
+ BIGINT ^^^ LongType |
135
+ FLOAT ^^^ FloatType |
136
+ DOUBLE ^^^ DoubleType |
137
+ fixedDecimalType | // decimal with precision/scale
138
+ DECIMAL ^^^ DecimalType .Unlimited | // decimal with no precision/scale
139
+ DATE ^^^ DateType |
140
+ TIMESTAMP ^^^ TimestampType |
141
+ VARCHAR ~ " ( " ~ numericLit ~ " ) " ^^^ StringType
136
142
137
143
protected lazy val fixedDecimalType : Parser [DataType ] =
138
- (" decimal" ~> " (" ~> " \\ d+" .r) ~ (" ," ~> " \\ d+" .r <~ " )" ) ^^ {
139
- case precision ~ scale =>
140
- DecimalType (precision.toInt, scale.toInt)
144
+ (DECIMAL ~ " (" ~> numericLit) ~ (" ," ~> numericLit <~ " )" ) ^^ {
145
+ case precision ~ scale => DecimalType (precision.toInt, scale.toInt)
141
146
}
142
147
143
148
protected lazy val arrayType : Parser [DataType ] =
144
- " array " ~> " <" ~> dataType <~ " >" ^^ {
149
+ ARRAY ~> " <" ~> dataType <~ " >" ^^ {
145
150
case tpe => ArrayType (tpe)
146
151
}
147
152
148
153
protected lazy val mapType : Parser [DataType ] =
149
- " map " ~> " <" ~> dataType ~ " ," ~ dataType <~ " >" ^^ {
154
+ MAP ~> " <" ~> dataType ~ " ," ~ dataType <~ " >" ^^ {
150
155
case t1 ~ _ ~ t2 => MapType (t1, t2)
151
156
}
152
157
153
158
protected lazy val structField : Parser [StructField ] =
154
- " [a-zA-Z0-9_]* " .r ~ " :" ~ dataType ^^ {
155
- case name ~ _ ~ tpe => StructField (name , tpe, nullable = true )
159
+ ident ~ " :" ~ dataType ^^ {
160
+ case fieldName ~ _ ~ tpe => StructField (cleanIdentifier(fieldName) , tpe, nullable = true )
156
161
}
157
162
158
163
protected lazy val structType : Parser [DataType ] =
159
- " struct " ~> " <" ~> repsep(structField," ," ) <~ " >" ^^ {
164
+ STRUCT ~> " <" ~> repsep(structField, " ," ) <~ " >" ^^ {
160
165
case fields => new StructType (fields)
161
166
}
162
167
163
168
private [sql] lazy val dataType : Parser [DataType ] =
164
169
arrayType |
165
- mapType |
166
- structType |
167
- primitiveType
168
-
169
- def toDataType (metastoreType : String ): DataType = parseAll(dataType, metastoreType) match {
170
- case Success (result, _) => result
171
- case failure : NoSuccess => sys.error(s " Unsupported dataType: $metastoreType" )
172
- }
173
-
174
- def toMetastoreType (dt : DataType ): String = dt match {
175
- case ArrayType (elementType, _) => s " array< ${toMetastoreType(elementType)}> "
176
- case StructType (fields) =>
177
- s " struct< ${fields.map(f => s " ${f.name}: ${toMetastoreType(f.dataType)}" ).mkString(" ," )}> "
178
- case MapType (keyType, valueType, _) =>
179
- s " map< ${toMetastoreType(keyType)}, ${toMetastoreType(valueType)}> "
180
- case StringType => " string"
181
- case FloatType => " float"
182
- case IntegerType => " int"
183
- case ByteType => " tinyint"
184
- case ShortType => " smallint"
185
- case DoubleType => " double"
186
- case LongType => " bigint"
187
- case BinaryType => " binary"
188
- case BooleanType => " boolean"
189
- case DateType => " date"
190
- case d : DecimalType => " decimal"
191
- case TimestampType => " timestamp"
192
- case NullType => " void"
193
- case udt : UserDefinedType [_] => toMetastoreType(udt.sqlType)
170
+ mapType |
171
+ structType |
172
+ primitiveType
173
+
174
+ protected val escapedIdentifier = " `([^`]+)`" .r
175
+ /** Strips backticks from ident if present */
176
+ protected def cleanIdentifier (ident : String ): String = ident match {
177
+ case escapedIdentifier(i) => i
178
+ case plainIdent => plainIdent
194
179
}
195
180
}
196
181
0 commit comments