@@ -28,10 +28,11 @@ import org.apache.hadoop.mapred.{FileInputFormat, JobConf}
28
28
29
29
import org .apache .spark .sql .{AnalysisException , Row , SparkSession }
30
30
import org .apache .spark .sql .catalyst .TableIdentifier
31
+ import org .apache .spark .sql .catalyst .analysis .Resolver
31
32
import org .apache .spark .sql .catalyst .catalog .{CatalogDatabase , CatalogTable , CatalogTablePartition , CatalogTableType , SessionCatalog }
32
33
import org .apache .spark .sql .catalyst .catalog .CatalogTypes .TablePartitionSpec
33
34
import org .apache .spark .sql .catalyst .expressions .{Attribute , AttributeReference }
34
- import org .apache .spark .sql .execution .datasources .PartitioningUtils
35
+ import org .apache .spark .sql .execution .datasources .{ CaseInsensitiveMap , PartitioningUtils }
35
36
import org .apache .spark .sql .types ._
36
37
import org .apache .spark .util .SerializableConfiguration
37
38
@@ -346,10 +347,7 @@ case class AlterTableAddPartitionCommand(
346
347
val catalog = sparkSession.sessionState.catalog
347
348
val table = catalog.getTableMetadata(tableName)
348
349
DDLUtils .verifyAlterTableType(catalog, table, isView = false )
349
- if (DDLUtils .isDatasourceTable(table)) {
350
- throw new AnalysisException (
351
- " ALTER TABLE ADD PARTITION is not allowed for tables defined using the datasource API" )
352
- }
350
+ DDLUtils .verifyPartitionProviderIsHive(sparkSession, table, " ALTER TABLE ADD PARTITION" )
353
351
val parts = partitionSpecsAndLocs.map { case (spec, location) =>
354
352
val normalizedSpec = PartitioningUtils .normalizePartitionSpec(
355
353
spec,
@@ -382,11 +380,8 @@ case class AlterTableRenamePartitionCommand(
382
380
override def run (sparkSession : SparkSession ): Seq [Row ] = {
383
381
val catalog = sparkSession.sessionState.catalog
384
382
val table = catalog.getTableMetadata(tableName)
385
- if (DDLUtils .isDatasourceTable(table)) {
386
- throw new AnalysisException (
387
- " ALTER TABLE RENAME PARTITION is not allowed for tables defined using the datasource API" )
388
- }
389
383
DDLUtils .verifyAlterTableType(catalog, table, isView = false )
384
+ DDLUtils .verifyPartitionProviderIsHive(sparkSession, table, " ALTER TABLE RENAME PARTITION" )
390
385
391
386
val normalizedOldPartition = PartitioningUtils .normalizePartitionSpec(
392
387
oldPartition,
@@ -432,10 +427,7 @@ case class AlterTableDropPartitionCommand(
432
427
val catalog = sparkSession.sessionState.catalog
433
428
val table = catalog.getTableMetadata(tableName)
434
429
DDLUtils .verifyAlterTableType(catalog, table, isView = false )
435
- if (DDLUtils .isDatasourceTable(table)) {
436
- throw new AnalysisException (
437
- " ALTER TABLE DROP PARTITIONS is not allowed for tables defined using the datasource API" )
438
- }
430
+ DDLUtils .verifyPartitionProviderIsHive(sparkSession, table, " ALTER TABLE DROP PARTITION" )
439
431
440
432
val normalizedSpecs = specs.map { spec =>
441
433
PartitioningUtils .normalizePartitionSpec(
@@ -493,33 +485,39 @@ case class AlterTableRecoverPartitionsCommand(
493
485
}
494
486
}
495
487
488
+ private def getBasePath (table : CatalogTable ): Option [String ] = {
489
+ if (table.provider == Some (" hive" )) {
490
+ table.storage.locationUri
491
+ } else {
492
+ new CaseInsensitiveMap (table.storage.properties).get(" path" )
493
+ }
494
+ }
495
+
496
496
override def run (spark : SparkSession ): Seq [Row ] = {
497
497
val catalog = spark.sessionState.catalog
498
498
val table = catalog.getTableMetadata(tableName)
499
499
val tableIdentWithDB = table.identifier.quotedString
500
500
DDLUtils .verifyAlterTableType(catalog, table, isView = false )
501
- if (DDLUtils .isDatasourceTable(table)) {
502
- throw new AnalysisException (
503
- s " Operation not allowed: $cmd on datasource tables: $tableIdentWithDB" )
504
- }
505
501
if (table.partitionColumnNames.isEmpty) {
506
502
throw new AnalysisException (
507
503
s " Operation not allowed: $cmd only works on partitioned tables: $tableIdentWithDB" )
508
504
}
509
- if (table.storage.locationUri.isEmpty) {
505
+
506
+ val tablePath = getBasePath(table)
507
+ if (tablePath.isEmpty) {
510
508
throw new AnalysisException (s " Operation not allowed: $cmd only works on table with " +
511
509
s " location provided: $tableIdentWithDB" )
512
510
}
513
511
514
- val root = new Path (table.storage.locationUri .get)
512
+ val root = new Path (tablePath .get)
515
513
logInfo(s " Recover all the partitions in $root" )
516
514
val fs = root.getFileSystem(spark.sparkContext.hadoopConfiguration)
517
515
518
516
val threshold = spark.conf.get(" spark.rdd.parallelListingThreshold" , " 10" ).toInt
519
517
val hadoopConf = spark.sparkContext.hadoopConfiguration
520
518
val pathFilter = getPathFilter(hadoopConf)
521
- val partitionSpecsAndLocs = scanPartitions(
522
- spark, fs, pathFilter, root, Map (), table.partitionColumnNames.map(_.toLowerCase), threshold )
519
+ val partitionSpecsAndLocs = scanPartitions(spark, fs, pathFilter, root, Map (),
520
+ table.partitionColumnNames, threshold, spark.sessionState.conf.resolver )
523
521
val total = partitionSpecsAndLocs.length
524
522
logInfo(s " Found $total partitions in $root" )
525
523
@@ -531,6 +529,11 @@ case class AlterTableRecoverPartitionsCommand(
531
529
logInfo(s " Finished to gather the fast stats for all $total partitions. " )
532
530
533
531
addPartitions(spark, table, partitionSpecsAndLocs, partitionStats)
532
+ // Updates the table to indicate that its partition metadata is stored in the Hive metastore.
533
+ // This is always the case for Hive format tables, but is not true for Datasource tables created
534
+ // before Spark 2.1 unless they are converted via `msck repair table`.
535
+ spark.sessionState.catalog.alterTable(table.copy(partitionProviderIsHive = true ))
536
+ catalog.refreshTable(tableName)
534
537
logInfo(s " Recovered all partitions ( $total). " )
535
538
Seq .empty[Row ]
536
539
}
@@ -544,7 +547,8 @@ case class AlterTableRecoverPartitionsCommand(
544
547
path : Path ,
545
548
spec : TablePartitionSpec ,
546
549
partitionNames : Seq [String ],
547
- threshold : Int ): GenSeq [(TablePartitionSpec , Path )] = {
550
+ threshold : Int ,
551
+ resolver : Resolver ): GenSeq [(TablePartitionSpec , Path )] = {
548
552
if (partitionNames.isEmpty) {
549
553
return Seq (spec -> path)
550
554
}
@@ -563,15 +567,15 @@ case class AlterTableRecoverPartitionsCommand(
563
567
val name = st.getPath.getName
564
568
if (st.isDirectory && name.contains(" =" )) {
565
569
val ps = name.split(" =" , 2 )
566
- val columnName = PartitioningUtils .unescapePathName(ps(0 )).toLowerCase
570
+ val columnName = PartitioningUtils .unescapePathName(ps(0 ))
567
571
// TODO: Validate the value
568
572
val value = PartitioningUtils .unescapePathName(ps(1 ))
569
- // comparing with case-insensitive, but preserve the case
570
- if (columnName == partitionNames.head) {
571
- scanPartitions(spark, fs, filter, st.getPath, spec ++ Map (columnName -> value),
572
- partitionNames.drop(1 ), threshold)
573
+ if (resolver(columnName, partitionNames.head)) {
574
+ scanPartitions(spark, fs, filter, st.getPath, spec ++ Map (partitionNames.head -> value),
575
+ partitionNames.drop(1 ), threshold, resolver)
573
576
} else {
574
- logWarning(s " expect partition column ${partitionNames.head}, but got ${ps(0 )}, ignore it " )
577
+ logWarning(
578
+ s " expected partition column ${partitionNames.head}, but got ${ps(0 )}, ignoring it " )
575
579
Seq ()
576
580
}
577
581
} else {
@@ -676,16 +680,11 @@ case class AlterTableSetLocationCommand(
676
680
DDLUtils .verifyAlterTableType(catalog, table, isView = false )
677
681
partitionSpec match {
678
682
case Some (spec) =>
683
+ DDLUtils .verifyPartitionProviderIsHive(
684
+ sparkSession, table, " ALTER TABLE ... SET LOCATION" )
679
685
// Partition spec is specified, so we set the location only for this partition
680
686
val part = catalog.getPartition(table.identifier, spec)
681
- val newPart =
682
- if (DDLUtils .isDatasourceTable(table)) {
683
- throw new AnalysisException (
684
- " ALTER TABLE SET LOCATION for partition is not allowed for tables defined " +
685
- " using the datasource API" )
686
- } else {
687
- part.copy(storage = part.storage.copy(locationUri = Some (location)))
688
- }
687
+ val newPart = part.copy(storage = part.storage.copy(locationUri = Some (location)))
689
688
catalog.alterPartitions(table.identifier, Seq (newPart))
690
689
case None =>
691
690
// No partition spec is specified, so we set the location for the table itself
@@ -709,6 +708,25 @@ object DDLUtils {
709
708
table.provider.isDefined && table.provider.get != " hive"
710
709
}
711
710
711
+ /**
712
+ * Throws a standard error for actions that require partitionProvider = hive.
713
+ */
714
+ def verifyPartitionProviderIsHive (
715
+ spark : SparkSession , table : CatalogTable , action : String ): Unit = {
716
+ val tableName = table.identifier.table
717
+ if (! spark.sqlContext.conf.manageFilesourcePartitions && isDatasourceTable(table)) {
718
+ throw new AnalysisException (
719
+ s " $action is not allowed on $tableName since filesource partition management is " +
720
+ " disabled (spark.sql.hive.manageFilesourcePartitions = false)." )
721
+ }
722
+ if (! table.partitionProviderIsHive && isDatasourceTable(table)) {
723
+ throw new AnalysisException (
724
+ s " $action is not allowed on $tableName since its partition metadata is not stored in " +
725
+ " the Hive metastore. To import this information into the metastore, run " +
726
+ s " `msck repair table $tableName` " )
727
+ }
728
+ }
729
+
712
730
/**
713
731
* If the command ALTER VIEW is to alter a table or ALTER TABLE is to alter a view,
714
732
* issue an exception [[AnalysisException ]].
0 commit comments