Skip to content

Commit 0522c93

Browse files
committed
NETFLIX-BUILD: Update CREATE/REPLACE conversions with serde.
1 parent 51593cb commit 0522c93

File tree

5 files changed

+109
-41
lines changed

5 files changed

+109
-41
lines changed

sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ public interface TableCatalog extends CatalogPlugin {
4646
*/
4747
String PROP_LOCATION = "location";
4848

49+
/**
50+
* A reserved property to specify a table was created with EXTERNAL.
51+
*/
52+
String PROP_EXTERNAL = "external";
53+
4954
/**
5055
* A reserved property to specify the description of the table.
5156
*/
@@ -61,6 +66,11 @@ public interface TableCatalog extends CatalogPlugin {
6166
*/
6267
String PROP_OWNER = "owner";
6368

69+
/**
70+
* A prefix used to pass OPTIONS in table properties
71+
*/
72+
String OPTION_PREFIX = "option.";
73+
6474
/**
6575
* List the tables in a namespace from the catalog.
6676
* <p>

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -144,53 +144,53 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
144144
throw new AnalysisException("Describing columns is not supported for v2 tables.")
145145

146146
case c @ CreateTableStatement(
147-
NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) =>
147+
NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _) =>
148148
assertNoCharTypeInSchema(c.tableSchema)
149149
CreateV2Table(
150150
catalog.asTableCatalog,
151151
tbl.asIdentifier,
152152
c.tableSchema,
153153
// convert the bucket spec and add it as a transform
154154
c.partitioning ++ c.bucketSpec.map(_.asTransform),
155-
convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
155+
convertTableProperties(c),
156156
ignoreIfExists = c.ifNotExists)
157157

158158
case c @ CreateTableAsSelectStatement(
159-
NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) =>
159+
NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _) =>
160160
CreateTableAsSelect(
161161
catalog.asTableCatalog,
162162
tbl.asIdentifier,
163163
// convert the bucket spec and add it as a transform
164164
c.partitioning ++ c.bucketSpec.map(_.asTransform),
165165
c.asSelect,
166-
convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
166+
convertTableProperties(c),
167167
writeOptions = c.options,
168168
ignoreIfExists = c.ifNotExists)
169169

170170
case RefreshTableStatement(NonSessionCatalogAndTable(catalog, tbl)) =>
171171
RefreshTable(catalog.asTableCatalog, tbl.asIdentifier)
172172

173173
case c @ ReplaceTableStatement(
174-
NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) =>
174+
NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _) =>
175175
assertNoCharTypeInSchema(c.tableSchema)
176176
ReplaceTable(
177177
catalog.asTableCatalog,
178178
tbl.asIdentifier,
179179
c.tableSchema,
180180
// convert the bucket spec and add it as a transform
181181
c.partitioning ++ c.bucketSpec.map(_.asTransform),
182-
convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
182+
convertTableProperties(c),
183183
orCreate = c.orCreate)
184184

185185
case c @ ReplaceTableAsSelectStatement(
186-
NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) =>
186+
NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _) =>
187187
ReplaceTableAsSelect(
188188
catalog.asTableCatalog,
189189
tbl.asIdentifier,
190190
// convert the bucket spec and add it as a transform
191191
c.partitioning ++ c.bucketSpec.map(_.asTransform),
192192
c.asSelect,
193-
convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
193+
convertTableProperties(c),
194194
writeOptions = c.options,
195195
orCreate = c.orCreate)
196196

sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import scala.collection.JavaConverters._
2525
import org.apache.spark.sql.AnalysisException
2626
import org.apache.spark.sql.catalyst.analysis.{NamedRelation, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException, UnresolvedV2Relation}
2727
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
28-
import org.apache.spark.sql.catalyst.plans.logical.AlterTable
28+
import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, CreateTableAsSelectStatement, CreateTableStatement, ReplaceTableAsSelectStatement, ReplaceTableStatement, SerdeInfo}
2929
import org.apache.spark.sql.connector.catalog.TableChange._
3030
import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
3131
import org.apache.spark.sql.types.{ArrayType, DataType, HIVE_TYPE_STRING, HiveStringType, MapType, StructField, StructType}
@@ -295,18 +295,58 @@ private[sql] object CatalogV2Util {
295295
catalog.name().equalsIgnoreCase(CatalogManager.SESSION_CATALOG_NAME)
296296
}
297297

298-
def convertTableProperties(
298+
def convertTableProperties(c: CreateTableStatement): Map[String, String] = {
299+
convertTableProperties(
300+
c.properties, c.options, c.serde, c.location, c.comment, c.provider, c.external)
301+
}
302+
303+
def convertTableProperties(c: CreateTableAsSelectStatement): Map[String, String] = {
304+
convertTableProperties(
305+
c.properties, c.options, c.serde, c.location, c.comment, c.provider, c.external)
306+
}
307+
308+
def convertTableProperties(r: ReplaceTableStatement): Map[String, String] = {
309+
convertTableProperties(r.properties, r.options, r.serde, r.location, r.comment, r.provider)
310+
}
311+
312+
def convertTableProperties(r: ReplaceTableAsSelectStatement): Map[String, String] = {
313+
convertTableProperties(r.properties, r.options, r.serde, r.location, r.comment, r.provider)
314+
}
315+
316+
private def convertTableProperties(
299317
properties: Map[String, String],
300318
options: Map[String, String],
319+
serdeInfo: Option[SerdeInfo],
301320
location: Option[String],
302321
comment: Option[String],
303-
provider: Option[String]): Map[String, String] = {
304-
properties ++ options ++
322+
provider: Option[String],
323+
external: Boolean = false): Map[String, String] = {
324+
properties ++
325+
options.map { case (key, value) => TableCatalog.OPTION_PREFIX + key -> value } ++
326+
convertToProperties(serdeInfo) ++
327+
(if (external) Map(TableCatalog.PROP_EXTERNAL -> "true") else Map.empty) ++
305328
provider.map(TableCatalog.PROP_PROVIDER -> _) ++
306329
comment.map(TableCatalog.PROP_COMMENT -> _) ++
307330
location.map(TableCatalog.PROP_LOCATION -> _)
308331
}
309332

333+
private def convertToProperties(serdeInfo: Option[SerdeInfo]): Map[String, String] = {
334+
serdeInfo match {
335+
case Some(s) =>
336+
(s.formatClasses match {
337+
case Some((inputFormat, outputFormat)) =>
338+
Map("hive.input-format" -> inputFormat, "hive.output-format" -> outputFormat)
339+
case _ =>
340+
Map.empty
341+
}) ++
342+
s.storedAs.map("hive.stored-as" -> _) ++
343+
s.serde.map("hive.serde" -> _) ++
344+
s.serdeProperties.map { case (key, value) => TableCatalog.OPTION_PREFIX + key -> value }
345+
case None =>
346+
Map.empty
347+
}
348+
}
349+
310350
def withDefaultOwnership(properties: Map[String, String]): Map[String, String] = {
311351
properties ++ Map(TableCatalog.PROP_OWNER -> Utils.getCurrentUserName())
312352
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ class DDLParserSuite extends AnalysisTest {
6464
Some("parquet"),
6565
Map.empty[String, String],
6666
None,
67+
None,
6768
None)
6869

6970
Seq(createSql, replaceSql).foreach { sql =>
@@ -87,6 +88,7 @@ class DDLParserSuite extends AnalysisTest {
8788
Some("parquet"),
8889
Map.empty[String, String],
8990
None,
91+
None,
9092
None),
9193
expectedIfNotExists = true)
9294
}
@@ -107,6 +109,7 @@ class DDLParserSuite extends AnalysisTest {
107109
Some("parquet"),
108110
Map.empty[String, String],
109111
None,
112+
None,
110113
None)
111114
Seq(createSql, replaceSql).foreach { sql =>
112115
testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
@@ -161,6 +164,7 @@ class DDLParserSuite extends AnalysisTest {
161164
Some("parquet"),
162165
Map.empty[String, String],
163166
None,
167+
None,
164168
None)
165169
Seq(createSql, replaceSql).foreach { sql =>
166170
testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
@@ -183,6 +187,7 @@ class DDLParserSuite extends AnalysisTest {
183187
Some("parquet"),
184188
Map.empty[String, String],
185189
None,
190+
None,
186191
None)
187192
Seq(createSql, replaceSql).foreach { sql =>
188193
testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
@@ -201,7 +206,8 @@ class DDLParserSuite extends AnalysisTest {
201206
Some("parquet"),
202207
Map.empty[String, String],
203208
None,
204-
Some("abc"))
209+
Some("abc"),
210+
None)
205211
Seq(createSql, replaceSql).foreach{ sql =>
206212
testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
207213
}
@@ -221,6 +227,7 @@ class DDLParserSuite extends AnalysisTest {
221227
Some("parquet"),
222228
Map.empty[String, String],
223229
None,
230+
None,
224231
None)
225232
Seq(createSql, replaceSql).foreach { sql =>
226233
testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
@@ -239,6 +246,7 @@ class DDLParserSuite extends AnalysisTest {
239246
Some("parquet"),
240247
Map.empty[String, String],
241248
Some("/tmp/file"),
249+
None,
242250
None)
243251
Seq(createSql, replaceSql).foreach { sql =>
244252
testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
@@ -257,6 +265,7 @@ class DDLParserSuite extends AnalysisTest {
257265
Some("parquet"),
258266
Map.empty[String, String],
259267
None,
268+
None,
260269
None)
261270
Seq(createSql, replaceSql).foreach { sql =>
262271
testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
@@ -318,6 +327,7 @@ class DDLParserSuite extends AnalysisTest {
318327
Some("json"),
319328
Map("a" -> "1", "b" -> "0.1", "c" -> "true"),
320329
None,
330+
None,
321331
None),
322332
expectedIfNotExists = false)
323333
}
@@ -373,7 +383,8 @@ class DDLParserSuite extends AnalysisTest {
373383
Some("parquet"),
374384
Map.empty[String, String],
375385
Some("/user/external/page_view"),
376-
Some("This is the staging page view table"))
386+
Some("This is the staging page view table"),
387+
None)
377388
Seq(s1, s2, s3, s4).foreach { sql =>
378389
testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = true)
379390
}
@@ -2089,7 +2100,9 @@ class DDLParserSuite extends AnalysisTest {
20892100
provider: Option[String],
20902101
options: Map[String, String],
20912102
location: Option[String],
2092-
comment: Option[String])
2103+
comment: Option[String],
2104+
serdeInfo: Option[SerdeInfo],
2105+
extneral: Boolean = false)
20932106

20942107
private object TableSpec {
20952108
def apply(plan: LogicalPlan): TableSpec = {
@@ -2104,7 +2117,9 @@ class DDLParserSuite extends AnalysisTest {
21042117
create.provider,
21052118
create.options,
21062119
create.location,
2107-
create.comment)
2120+
create.comment,
2121+
create.serde,
2122+
create.external)
21082123
case replace: ReplaceTableStatement =>
21092124
TableSpec(
21102125
replace.tableName,
@@ -2115,7 +2130,8 @@ class DDLParserSuite extends AnalysisTest {
21152130
replace.provider,
21162131
replace.options,
21172132
replace.location,
2118-
replace.comment)
2133+
replace.comment,
2134+
replace.serde)
21192135
case ctas: CreateTableAsSelectStatement =>
21202136
TableSpec(
21212137
ctas.tableName,
@@ -2126,7 +2142,9 @@ class DDLParserSuite extends AnalysisTest {
21262142
ctas.provider,
21272143
ctas.options,
21282144
ctas.location,
2129-
ctas.comment)
2145+
ctas.comment,
2146+
ctas.serde,
2147+
ctas.external)
21302148
case rtas: ReplaceTableAsSelectStatement =>
21312149
TableSpec(
21322150
rtas.tableName,
@@ -2137,7 +2155,8 @@ class DDLParserSuite extends AnalysisTest {
21372155
rtas.provider,
21382156
rtas.options,
21392157
rtas.location,
2140-
rtas.comment)
2158+
rtas.comment,
2159+
rtas.serde)
21412160
case other =>
21422161
fail(s"Expected to parse Create, CTAS, Replace, or RTAS plan" +
21432162
s" from query, got ${other.getClass.getName}.")
@@ -2164,20 +2183,19 @@ class DDLParserSuite extends AnalysisTest {
21642183
}
21652184

21662185
test("create table - without using") {
2167-
withSQLConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED.key -> "false") {
2168-
val sql = "CREATE TABLE 1m.2g(a INT)"
2169-
val expectedTableSpec = TableSpec(
2170-
Seq("1m", "2g"),
2171-
Some(new StructType().add("a", IntegerType)),
2172-
Seq.empty[Transform],
2173-
None,
2174-
Map.empty[String, String],
2175-
None,
2176-
Map.empty[String, String],
2177-
None,
2178-
None)
2186+
val sql = "CREATE TABLE 1m.2g(a INT)"
2187+
val expectedTableSpec = TableSpec(
2188+
Seq("1m", "2g"),
2189+
Some(new StructType().add("a", IntegerType)),
2190+
Seq.empty[Transform],
2191+
None,
2192+
Map.empty[String, String],
2193+
None,
2194+
Map.empty[String, String],
2195+
None,
2196+
None,
2197+
None)
21792198

2180-
testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
2181-
}
2199+
testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
21822200
}
21832201
}

0 commit comments

Comments
 (0)