Skip to content

Commit 332e252

Browse files
yaooqinncloud-fan
authored andcommitted
[SPARK-29425][SQL] The ownership of a database should be respected
### What changes were proposed in this pull request? Keep the owner of a database when executing alter database commands ### Why are the changes needed? Spark will inadvertently delete the owner of a database for executing databases ddls ### Does this PR introduce any user-facing change? NO ### How was this patch tested? add and modify uts Closes #26080 from yaooqinn/SPARK-29425. Authored-by: Kent Yao <yaooqinn@hotmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent 0ab922c commit 332e252

File tree

9 files changed

+191
-37
lines changed

9 files changed

+191
-37
lines changed

sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsNamespaces.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,16 @@ public interface SupportsNamespaces extends CatalogPlugin {
5353
*/
5454
String PROP_COMMENT = "comment";
5555

56+
/**
57+
* A property to specify the owner of the namespace.
58+
*/
59+
String PROP_OWNER_NAME = "ownerName";
60+
61+
/**
62+
* A property to specify the type of the namespace's owner.
63+
*/
64+
String PROP_OWNER_TYPE = "ownerType";
65+
5666
/**
5767
* The list of reserved namespace properties.
5868
*/

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, N
3131
import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
3232
import org.apache.spark.sql.catalyst.dsl.expressions._
3333
import org.apache.spark.sql.catalyst.expressions._
34+
import org.apache.spark.sql.connector.catalog.SupportsNamespaces.{PROP_OWNER_NAME, PROP_OWNER_TYPE}
3435
import org.apache.spark.sql.types._
3536
import org.apache.spark.util.Utils
3637

@@ -143,8 +144,9 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
143144
// Note: alter properties here because Hive does not support altering other fields
144145
catalog.alterDatabase(db1.copy(properties = Map("k" -> "v3", "good" -> "true")))
145146
val newDb1 = catalog.getDatabase("db1")
146-
assert(db1.properties.isEmpty)
147-
assert(newDb1.properties.size == 2)
147+
val reversedProperties = Seq(PROP_OWNER_NAME, PROP_OWNER_TYPE)
148+
assert((db1.properties -- reversedProperties).isEmpty)
149+
assert((newDb1.properties -- reversedProperties).size == 2)
148150
assert(newDb1.properties.get("k") == Some("v3"))
149151
assert(newDb1.properties.get("good") == Some("true"))
150152
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@
1818
package org.apache.spark.sql.catalyst.catalog
1919

2020
import org.apache.spark.sql.AnalysisException
21-
import org.apache.spark.sql.catalyst.{AliasIdentifier, FunctionIdentifier, TableIdentifier}
21+
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
2222
import org.apache.spark.sql.catalyst.analysis._
2323
import org.apache.spark.sql.catalyst.expressions._
2424
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
2525
import org.apache.spark.sql.catalyst.plans.logical.{Range, SubqueryAlias, View}
26+
import org.apache.spark.sql.connector.catalog.SupportsNamespaces.{PROP_OWNER_NAME, PROP_OWNER_TYPE}
2627
import org.apache.spark.sql.internal.SQLConf
2728
import org.apache.spark.sql.types._
2829

@@ -207,8 +208,9 @@ abstract class SessionCatalogSuite extends AnalysisTest {
207208
// Note: alter properties here because Hive does not support altering other fields
208209
catalog.alterDatabase(db1.copy(properties = Map("k" -> "v3", "good" -> "true")))
209210
val newDb1 = catalog.getDatabaseMetadata("db1")
210-
assert(db1.properties.isEmpty)
211-
assert(newDb1.properties.size == 2)
211+
val reversedProperties = Seq(PROP_OWNER_NAME, PROP_OWNER_TYPE)
212+
assert((db1.properties -- reversedProperties).isEmpty)
213+
assert((newDb1.properties -- reversedProperties).size == 2)
212214
assert(newDb1.properties.get("k") == Some("v3"))
213215
assert(newDb1.properties.get("good") == Some("true"))
214216
}

sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import java.util.Locale
2121
import java.util.concurrent.TimeUnit._
2222

2323
import scala.collection.{GenMap, GenSeq}
24+
import scala.collection.JavaConverters._
2425
import scala.collection.parallel.ForkJoinTaskSupport
2526
import scala.collection.parallel.immutable.ParVector
2627
import scala.util.control.NonFatal
@@ -37,6 +38,7 @@ import org.apache.spark.sql.catalyst.catalog._
3738
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
3839
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
3940
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
41+
import org.apache.spark.sql.connector.catalog.SupportsNamespaces._
4042
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, PartitioningUtils}
4143
import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
4244
import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter
@@ -172,19 +174,23 @@ case class DescribeDatabaseCommand(
172174
override def run(sparkSession: SparkSession): Seq[Row] = {
173175
val dbMetadata: CatalogDatabase =
174176
sparkSession.sessionState.catalog.getDatabaseMetadata(databaseName)
177+
val allDbProperties = dbMetadata.properties
175178
val result =
176179
Row("Database Name", dbMetadata.name) ::
177180
Row("Description", dbMetadata.description) ::
178-
Row("Location", CatalogUtils.URIToString(dbMetadata.locationUri)) :: Nil
181+
Row("Location", CatalogUtils.URIToString(dbMetadata.locationUri))::
182+
Row("Owner Name", allDbProperties.getOrElse(PROP_OWNER_NAME, "")) ::
183+
Row("Owner Type", allDbProperties.getOrElse(PROP_OWNER_TYPE, "")) :: Nil
179184

180185
if (extended) {
181-
val properties =
182-
if (dbMetadata.properties.isEmpty) {
186+
val properties = allDbProperties -- Seq(PROP_OWNER_NAME, PROP_OWNER_TYPE)
187+
val propertiesStr =
188+
if (properties.isEmpty) {
183189
""
184190
} else {
185-
dbMetadata.properties.toSeq.mkString("(", ", ", ")")
191+
properties.toSeq.mkString("(", ", ", ")")
186192
}
187-
result :+ Row("Properties", properties)
193+
result :+ Row("Properties", propertiesStr)
188194
} else {
189195
result
190196
}

sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ import java.net.URI
2222
import java.util.Locale
2323

2424
import org.apache.hadoop.fs.Path
25-
import org.scalatest.BeforeAndAfterEach
2625

2726
import org.apache.spark.internal.config
2827
import org.apache.spark.internal.config.RDD_PARALLEL_LISTING_THRESHOLD
@@ -31,6 +30,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
3130
import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchDatabaseException, NoSuchPartitionException, NoSuchTableException, TempTableAlreadyExistsException}
3231
import org.apache.spark.sql.catalyst.catalog._
3332
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
33+
import org.apache.spark.sql.connector.catalog.SupportsNamespaces.{PROP_OWNER_NAME, PROP_OWNER_TYPE}
3434
import org.apache.spark.sql.internal.SQLConf
3535
import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
3636
import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
@@ -185,6 +185,8 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
185185

186186
abstract class DDLSuite extends QueryTest with SQLTestUtils {
187187

188+
protected val reversedProperties = Seq(PROP_OWNER_NAME, PROP_OWNER_TYPE)
189+
188190
protected def isUsingHiveMetastore: Boolean = {
189191
spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive"
190192
}
@@ -328,7 +330,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
328330
try {
329331
sql(s"CREATE DATABASE $dbName")
330332
val db1 = catalog.getDatabaseMetadata(dbName)
331-
assert(db1 == CatalogDatabase(
333+
assert(db1.copy(properties = db1.properties -- reversedProperties) == CatalogDatabase(
332334
dbName,
333335
"",
334336
getDBPath(dbName),
@@ -351,7 +353,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
351353
sql(s"CREATE DATABASE $dbName Location '$path'")
352354
val db1 = catalog.getDatabaseMetadata(dbNameWithoutBackTicks)
353355
val expPath = makeQualifiedPath(tmpDir.toString)
354-
assert(db1 == CatalogDatabase(
356+
assert(db1.copy(properties = db1.properties -- reversedProperties) == CatalogDatabase(
355357
dbNameWithoutBackTicks,
356358
"",
357359
expPath,
@@ -374,7 +376,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
374376
val dbNameWithoutBackTicks = cleanIdentifier(dbName)
375377
sql(s"CREATE DATABASE $dbName")
376378
val db1 = catalog.getDatabaseMetadata(dbNameWithoutBackTicks)
377-
assert(db1 == CatalogDatabase(
379+
assert(db1.copy(properties = db1.properties -- reversedProperties) == CatalogDatabase(
378380
dbNameWithoutBackTicks,
379381
"",
380382
getDBPath(dbNameWithoutBackTicks),
@@ -747,7 +749,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
747749
sql(s"CREATE DATABASE $dbName")
748750

749751
checkAnswer(
750-
sql(s"DESCRIBE DATABASE EXTENDED $dbName"),
752+
sql(s"DESCRIBE DATABASE EXTENDED $dbName").toDF("key", "value")
753+
.where("key not like 'Owner%'"), // filter for consistency with in-memory catalog
751754
Row("Database Name", dbNameWithoutBackTicks) ::
752755
Row("Description", "") ::
753756
Row("Location", CatalogUtils.URIToString(location)) ::
@@ -756,7 +759,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
756759
sql(s"ALTER DATABASE $dbName SET DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')")
757760

758761
checkAnswer(
759-
sql(s"DESCRIBE DATABASE EXTENDED $dbName"),
762+
sql(s"DESCRIBE DATABASE EXTENDED $dbName").toDF("key", "value")
763+
.where("key not like 'Owner%'"), // filter for consistency with in-memory catalog
760764
Row("Database Name", dbNameWithoutBackTicks) ::
761765
Row("Description", "") ::
762766
Row("Location", CatalogUtils.URIToString(location)) ::
@@ -765,7 +769,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
765769
sql(s"ALTER DATABASE $dbName SET DBPROPERTIES ('d'='d')")
766770

767771
checkAnswer(
768-
sql(s"DESCRIBE DATABASE EXTENDED $dbName"),
772+
sql(s"DESCRIBE DATABASE EXTENDED $dbName").toDF("key", "value")
773+
.where("key not like 'Owner%'"), // filter for consistency with in-memory catalog
769774
Row("Database Name", dbNameWithoutBackTicks) ::
770775
Row("Description", "") ::
771776
Row("Location", CatalogUtils.URIToString(location)) ::

sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ import org.apache.hadoop.hive.common.StatsSetupConst
3232
import org.apache.hadoop.hive.conf.HiveConf
3333
import org.apache.hadoop.hive.conf.HiveConf.ConfVars
3434
import org.apache.hadoop.hive.metastore.{IMetaStoreClient, TableType => HiveTableType}
35-
import org.apache.hadoop.hive.metastore.api.{Database => HiveDatabase, Table => MetaStoreApiTable}
36-
import org.apache.hadoop.hive.metastore.api.{FieldSchema, Order, SerDeInfo, StorageDescriptor}
35+
import org.apache.hadoop.hive.metastore.api.{Database => HiveDatabase, Table => MetaStoreApiTable, _}
3736
import org.apache.hadoop.hive.ql.Driver
3837
import org.apache.hadoop.hive.ql.metadata.{Hive, HiveException, Partition => HivePartition, Table => HiveTable}
3938
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC
@@ -54,6 +53,7 @@ import org.apache.spark.sql.catalyst.catalog._
5453
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
5554
import org.apache.spark.sql.catalyst.expressions.Expression
5655
import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
56+
import org.apache.spark.sql.connector.catalog.SupportsNamespaces._
5757
import org.apache.spark.sql.execution.QueryExecutionException
5858
import org.apache.spark.sql.hive.HiveExternalCatalog
5959
import org.apache.spark.sql.hive.HiveExternalCatalog.{DATASOURCE_SCHEMA, DATASOURCE_SCHEMA_NUMPARTS, DATASOURCE_SCHEMA_PART_PREFIX}
@@ -355,13 +355,8 @@ private[hive] class HiveClientImpl(
355355
override def createDatabase(
356356
database: CatalogDatabase,
357357
ignoreIfExists: Boolean): Unit = withHiveState {
358-
client.createDatabase(
359-
new HiveDatabase(
360-
database.name,
361-
database.description,
362-
CatalogUtils.URIToString(database.locationUri),
363-
Option(database.properties).map(_.asJava).orNull),
364-
ignoreIfExists)
358+
val hiveDb = toHiveDatabase(database, true)
359+
client.createDatabase(hiveDb, ignoreIfExists)
365360
}
366361

367362
override def dropDatabase(
@@ -379,22 +374,38 @@ private[hive] class HiveClientImpl(
379374
s"Hive ${version.fullVersion} does not support altering database location")
380375
}
381376
}
382-
client.alterDatabase(
377+
val hiveDb = toHiveDatabase(database, false)
378+
client.alterDatabase(database.name, hiveDb)
379+
}
380+
381+
private def toHiveDatabase(database: CatalogDatabase, isCreate: Boolean): HiveDatabase = {
382+
val props = database.properties
383+
val hiveDb = new HiveDatabase(
383384
database.name,
384-
new HiveDatabase(
385-
database.name,
386-
database.description,
387-
CatalogUtils.URIToString(database.locationUri),
388-
Option(database.properties).map(_.asJava).orNull))
385+
database.description,
386+
CatalogUtils.URIToString(database.locationUri),
387+
(props -- Seq(PROP_OWNER_NAME, PROP_OWNER_TYPE)).asJava)
388+
props.get(PROP_OWNER_NAME).orElse(if (isCreate) Some(userName) else None).foreach { ownerName =>
389+
shim.setDatabaseOwnerName(hiveDb, ownerName)
390+
}
391+
props.get(PROP_OWNER_TYPE).orElse(if (isCreate) Some(PrincipalType.USER.name) else None)
392+
.foreach { ownerType =>
393+
shim.setDatabaseOwnerType(hiveDb, ownerType)
394+
}
395+
hiveDb
389396
}
390397

391398
override def getDatabase(dbName: String): CatalogDatabase = withHiveState {
392399
Option(client.getDatabase(dbName)).map { d =>
400+
val paras = Option(d.getParameters).map(_.asScala.toMap).getOrElse(Map()) ++
401+
Map(PROP_OWNER_NAME -> shim.getDatabaseOwnerName(d),
402+
PROP_OWNER_TYPE -> shim.getDatabaseOwnerType(d))
403+
393404
CatalogDatabase(
394405
name = d.getName,
395406
description = Option(d.getDescription).getOrElse(""),
396407
locationUri = CatalogUtils.stringToURI(d.getLocationUri),
397-
properties = Option(d.getParameters).map(_.asScala.toMap).orNull)
408+
properties = paras)
398409
}.getOrElse(throw new NoSuchDatabaseException(dbName))
399410
}
400411

sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,7 @@ import scala.util.control.NonFatal
2929
import org.apache.hadoop.fs.Path
3030
import org.apache.hadoop.hive.conf.HiveConf
3131
import org.apache.hadoop.hive.metastore.IMetaStoreClient
32-
import org.apache.hadoop.hive.metastore.api.{EnvironmentContext, Function => HiveFunction, FunctionType}
33-
import org.apache.hadoop.hive.metastore.api.{MetaException, PrincipalType, ResourceType, ResourceUri}
32+
import org.apache.hadoop.hive.metastore.api.{Database, EnvironmentContext, Function => HiveFunction, FunctionType, MetaException, PrincipalType, ResourceType, ResourceUri}
3433
import org.apache.hadoop.hive.ql.Driver
3534
import org.apache.hadoop.hive.ql.io.AcidUtils
3635
import org.apache.hadoop.hive.ql.metadata.{Hive, HiveException, Partition, Table}
@@ -154,6 +153,14 @@ private[client] sealed abstract class Shim {
154153
deleteData: Boolean,
155154
purge: Boolean): Unit
156155

156+
def getDatabaseOwnerName(db: Database): String
157+
158+
def setDatabaseOwnerName(db: Database, owner: String): Unit
159+
160+
def getDatabaseOwnerType(db: Database): String
161+
162+
def setDatabaseOwnerType(db: Database, ownerType: String): Unit
163+
157164
protected def findStaticMethod(klass: Class[_], name: String, args: Class[_]*): Method = {
158165
val method = findMethod(klass, name, args: _*)
159166
require(Modifier.isStatic(method.getModifiers()),
@@ -456,6 +463,14 @@ private[client] class Shim_v0_12 extends Shim with Logging {
456463
def listFunctions(hive: Hive, db: String, pattern: String): Seq[String] = {
457464
Seq.empty[String]
458465
}
466+
467+
override def getDatabaseOwnerName(db: Database): String = ""
468+
469+
override def setDatabaseOwnerName(db: Database, owner: String): Unit = {}
470+
471+
override def getDatabaseOwnerType(db: Database): String = ""
472+
473+
override def setDatabaseOwnerType(db: Database, ownerType: String): Unit = {}
459474
}
460475

461476
private[client] class Shim_v0_13 extends Shim_v0_12 {
@@ -493,6 +508,28 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
493508
"getResults",
494509
classOf[JList[Object]])
495510

511+
private lazy val getDatabaseOwnerNameMethod =
512+
findMethod(
513+
classOf[Database],
514+
"getOwnerName")
515+
516+
private lazy val setDatabaseOwnerNameMethod =
517+
findMethod(
518+
classOf[Database],
519+
"setOwnerName",
520+
classOf[String])
521+
522+
private lazy val getDatabaseOwnerTypeMethod =
523+
findMethod(
524+
classOf[Database],
525+
"getOwnerType")
526+
527+
private lazy val setDatabaseOwnerTypeMethod =
528+
findMethod(
529+
classOf[Database],
530+
"setOwnerType",
531+
classOf[PrincipalType])
532+
496533
override def setCurrentSessionState(state: SessionState): Unit =
497534
setCurrentSessionStateMethod.invoke(null, state)
498535

@@ -809,6 +846,22 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
809846
}
810847
}
811848

849+
override def getDatabaseOwnerName(db: Database): String = {
850+
Option(getDatabaseOwnerNameMethod.invoke(db)).map(_.asInstanceOf[String]).getOrElse("")
851+
}
852+
853+
override def setDatabaseOwnerName(db: Database, owner: String): Unit = {
854+
setDatabaseOwnerNameMethod.invoke(db, owner)
855+
}
856+
857+
override def getDatabaseOwnerType(db: Database): String = {
858+
Option(getDatabaseOwnerTypeMethod.invoke(db))
859+
.map(_.asInstanceOf[PrincipalType].name()).getOrElse("")
860+
}
861+
862+
override def setDatabaseOwnerType(db: Database, ownerType: String): Unit = {
863+
setDatabaseOwnerTypeMethod.invoke(db, PrincipalType.valueOf(ownerType))
864+
}
812865
}
813866

814867
private[client] class Shim_v0_14 extends Shim_v0_13 {

0 commit comments

Comments
 (0)