@@ -21,6 +21,8 @@ import java.io.IOException
21
21
import java .lang .reflect .InvocationTargetException
22
22
import java .util
23
23
import java .util .Locale
24
+ import java .util .concurrent .ConcurrentHashMap
25
+ import java .util .concurrent .locks .ReentrantReadWriteLock
24
26
25
27
import scala .collection .mutable
26
28
import scala .util .control .NonFatal
@@ -68,6 +70,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
68
70
HiveUtils .newClientForMetadata(conf, hadoopConf)
69
71
}
70
72
73
+ private val clientLocks = new ConcurrentHashMap [String , ReentrantReadWriteLock ]()
74
+
71
75
// Exceptions thrown by the hive client that we would like to wrap
72
76
private val clientExceptions = Set (
73
77
classOf [HiveException ].getCanonicalName,
@@ -94,8 +98,14 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
94
98
* Run some code involving `client` in a [[synchronized ]] block and wrap certain
95
99
* exceptions thrown in the process in [[AnalysisException ]].
96
100
*/
97
- private def withClient [T ](body : => T ): T = synchronized {
101
+ private def withClient [T ](write : Boolean , db : String )(body : => T ): T = {
102
+ val lock = clientLocks.computeIfAbsent(db, (_ : String ) => new ReentrantReadWriteLock ())
98
103
try {
104
+ if (write) {
105
+ lock.writeLock().lock()
106
+ } else {
107
+ lock.readLock().lock()
108
+ }
99
109
body
100
110
} catch {
101
111
case NonFatal (exception) if isClientException(exception) =>
@@ -107,6 +117,12 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
107
117
}
108
118
throw new AnalysisException (
109
119
e.getClass.getCanonicalName + " : " + e.getMessage, cause = Some (e))
120
+ } finally {
121
+ if (write) {
122
+ lock.writeLock().unlock()
123
+ } else {
124
+ lock.readLock().unlock()
125
+ }
110
126
}
111
127
}
112
128
@@ -186,14 +202,14 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
186
202
187
203
override def createDatabase (
188
204
dbDefinition : CatalogDatabase ,
189
- ignoreIfExists : Boolean ): Unit = withClient {
205
+ ignoreIfExists : Boolean ): Unit = withClient( true , dbDefinition.name) {
190
206
client.createDatabase(dbDefinition, ignoreIfExists)
191
207
}
192
208
193
209
override def dropDatabase (
194
210
db : String ,
195
211
ignoreIfNotExists : Boolean ,
196
- cascade : Boolean ): Unit = withClient {
212
+ cascade : Boolean ): Unit = withClient( true , db) {
197
213
client.dropDatabase(db, ignoreIfNotExists, cascade)
198
214
}
199
215
@@ -203,7 +219,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
203
219
*
204
220
* Note: As of now, this only supports altering database properties!
205
221
*/
206
- override def alterDatabase (dbDefinition : CatalogDatabase ): Unit = withClient {
222
+ override def alterDatabase (dbDefinition : CatalogDatabase ): Unit =
223
+ withClient(true , dbDefinition.name) {
207
224
val existingDb = getDatabase(dbDefinition.name)
208
225
if (existingDb.properties == dbDefinition.properties) {
209
226
logWarning(s " Request to alter database ${dbDefinition.name} is a no-op because " +
@@ -213,23 +230,23 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
213
230
client.alterDatabase(dbDefinition)
214
231
}
215
232
216
- override def getDatabase (db : String ): CatalogDatabase = withClient {
233
+ override def getDatabase (db : String ): CatalogDatabase = withClient( false , db) {
217
234
client.getDatabase(db)
218
235
}
219
236
220
- override def databaseExists (db : String ): Boolean = withClient {
237
+ override def databaseExists (db : String ): Boolean = withClient( false , db) {
221
238
client.databaseExists(db)
222
239
}
223
240
224
- override def listDatabases (): Seq [String ] = withClient {
241
+ override def listDatabases (): Seq [String ] = withClient( false , " " ) {
225
242
client.listDatabases(" *" )
226
243
}
227
244
228
- override def listDatabases (pattern : String ): Seq [String ] = withClient {
245
+ override def listDatabases (pattern : String ): Seq [String ] = withClient( false , " " ) {
229
246
client.listDatabases(pattern)
230
247
}
231
248
232
- override def setCurrentDatabase (db : String ): Unit = withClient {
249
+ override def setCurrentDatabase (db : String ): Unit = withClient( false , " " ) {
233
250
client.setCurrentDatabase(db)
234
251
}
235
252
@@ -239,7 +256,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
239
256
240
257
override def createTable (
241
258
tableDefinition : CatalogTable ,
242
- ignoreIfExists : Boolean ): Unit = withClient {
259
+ ignoreIfExists : Boolean ): Unit = withClient( true , tableDefinition.database) {
243
260
assert(tableDefinition.identifier.database.isDefined)
244
261
val db = tableDefinition.identifier.database.get
245
262
val table = tableDefinition.identifier.table
@@ -511,15 +528,15 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
511
528
db : String ,
512
529
table : String ,
513
530
ignoreIfNotExists : Boolean ,
514
- purge : Boolean ): Unit = withClient {
531
+ purge : Boolean ): Unit = withClient( true , db) {
515
532
requireDbExists(db)
516
533
client.dropTable(db, table, ignoreIfNotExists, purge)
517
534
}
518
535
519
536
override def renameTable (
520
537
db : String ,
521
538
oldName : String ,
522
- newName : String ): Unit = withClient {
539
+ newName : String ): Unit = withClient( true , db) {
523
540
val rawTable = getRawTable(db, oldName)
524
541
525
542
// Note that Hive serde tables don't use path option in storage properties to store the value
@@ -567,7 +584,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
567
584
* Note: As of now, this doesn't support altering table schema, partition column names and bucket
568
585
* specification. We will ignore them even if users do specify different values for these fields.
569
586
*/
570
- override def alterTable (tableDefinition : CatalogTable ): Unit = withClient {
587
+ override def alterTable (tableDefinition : CatalogTable ): Unit =
588
+ withClient(true , tableDefinition.database) {
571
589
assert(tableDefinition.identifier.database.isDefined)
572
590
val db = tableDefinition.identifier.database.get
573
591
requireTableExists(db, tableDefinition.identifier.table)
@@ -666,7 +684,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
666
684
override def alterTableDataSchema (
667
685
db : String ,
668
686
table : String ,
669
- newDataSchema : StructType ): Unit = withClient {
687
+ newDataSchema : StructType ): Unit = withClient( true , db) {
670
688
requireTableExists(db, table)
671
689
val oldTable = getTable(db, table)
672
690
verifyDataSchema(oldTable.identifier, oldTable.tableType, newDataSchema)
@@ -698,7 +716,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
698
716
override def alterTableStats (
699
717
db : String ,
700
718
table : String ,
701
- stats : Option [CatalogStatistics ]): Unit = withClient {
719
+ stats : Option [CatalogStatistics ]): Unit = withClient( true , db) {
702
720
requireTableExists(db, table)
703
721
val rawTable = getRawTable(db, table)
704
722
@@ -715,11 +733,12 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
715
733
client.alterTable(updatedTable)
716
734
}
717
735
718
- override def getTable (db : String , table : String ): CatalogTable = withClient {
736
+ override def getTable (db : String , table : String ): CatalogTable = withClient( false , db) {
719
737
restoreTableMetadata(getRawTable(db, table))
720
738
}
721
739
722
- override def getTablesByName (db : String , tables : Seq [String ]): Seq [CatalogTable ] = withClient {
740
+ override def getTablesByName (db : String , tables : Seq [String ]): Seq [CatalogTable ] =
741
+ withClient(false , db) {
723
742
getRawTablesByNames(db, tables).map(restoreTableMetadata)
724
743
}
725
744
@@ -847,21 +866,21 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
847
866
properties = table.properties.filterKeys(! HIVE_GENERATED_TABLE_PROPERTIES (_)))
848
867
}
849
868
850
- override def tableExists (db : String , table : String ): Boolean = withClient {
869
+ override def tableExists (db : String , table : String ): Boolean = withClient( false , db) {
851
870
client.tableExists(db, table)
852
871
}
853
872
854
- override def listTables (db : String ): Seq [String ] = withClient {
873
+ override def listTables (db : String ): Seq [String ] = withClient( false , db) {
855
874
requireDbExists(db)
856
875
client.listTables(db)
857
876
}
858
877
859
- override def listTables (db : String , pattern : String ): Seq [String ] = withClient {
878
+ override def listTables (db : String , pattern : String ): Seq [String ] = withClient( false , db) {
860
879
requireDbExists(db)
861
880
client.listTables(db, pattern)
862
881
}
863
882
864
- override def listViews (db : String , pattern : String ): Seq [String ] = withClient {
883
+ override def listViews (db : String , pattern : String ): Seq [String ] = withClient( false , db) {
865
884
requireDbExists(db)
866
885
client.listTablesByType(db, pattern, CatalogTableType .VIEW )
867
886
}
@@ -871,7 +890,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
871
890
table : String ,
872
891
loadPath : String ,
873
892
isOverwrite : Boolean ,
874
- isSrcLocal : Boolean ): Unit = withClient {
893
+ isSrcLocal : Boolean ): Unit = withClient( false , db) {
875
894
requireTableExists(db, table)
876
895
client.loadTable(
877
896
loadPath,
@@ -887,7 +906,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
887
906
partition : TablePartitionSpec ,
888
907
isOverwrite : Boolean ,
889
908
inheritTableSpecs : Boolean ,
890
- isSrcLocal : Boolean ): Unit = withClient {
909
+ isSrcLocal : Boolean ): Unit = withClient( false , db) {
891
910
requireTableExists(db, table)
892
911
893
912
val orderedPartitionSpec = new util.LinkedHashMap [String , String ]()
@@ -917,7 +936,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
917
936
loadPath : String ,
918
937
partition : TablePartitionSpec ,
919
938
replace : Boolean ,
920
- numDP : Int ): Unit = withClient {
939
+ numDP : Int ): Unit = withClient( false , db) {
921
940
requireTableExists(db, table)
922
941
923
942
val orderedPartitionSpec = new util.LinkedHashMap [String , String ]()
@@ -982,7 +1001,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
982
1001
db : String ,
983
1002
table : String ,
984
1003
parts : Seq [CatalogTablePartition ],
985
- ignoreIfExists : Boolean ): Unit = withClient {
1004
+ ignoreIfExists : Boolean ): Unit = withClient( true , db) {
986
1005
requireTableExists(db, table)
987
1006
988
1007
val tableMeta = getTable(db, table)
@@ -1008,7 +1027,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
1008
1027
parts : Seq [TablePartitionSpec ],
1009
1028
ignoreIfNotExists : Boolean ,
1010
1029
purge : Boolean ,
1011
- retainData : Boolean ): Unit = withClient {
1030
+ retainData : Boolean ): Unit = withClient( true , db) {
1012
1031
requireTableExists(db, table)
1013
1032
client.dropPartitions(
1014
1033
db, table, parts.map(lowerCasePartitionSpec), ignoreIfNotExists, purge, retainData)
@@ -1018,7 +1037,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
1018
1037
db : String ,
1019
1038
table : String ,
1020
1039
specs : Seq [TablePartitionSpec ],
1021
- newSpecs : Seq [TablePartitionSpec ]): Unit = withClient {
1040
+ newSpecs : Seq [TablePartitionSpec ]): Unit = withClient( true , db) {
1022
1041
client.renamePartitions(
1023
1042
db, table, specs.map(lowerCasePartitionSpec), newSpecs.map(lowerCasePartitionSpec))
1024
1043
@@ -1145,7 +1164,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
1145
1164
override def alterPartitions (
1146
1165
db : String ,
1147
1166
table : String ,
1148
- newParts : Seq [CatalogTablePartition ]): Unit = withClient {
1167
+ newParts : Seq [CatalogTablePartition ]): Unit = withClient( true , db) {
1149
1168
val lowerCasedParts = newParts.map(p => p.copy(spec = lowerCasePartitionSpec(p.spec)))
1150
1169
1151
1170
val rawTable = getRawTable(db, table)
@@ -1166,7 +1185,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
1166
1185
override def getPartition (
1167
1186
db : String ,
1168
1187
table : String ,
1169
- spec : TablePartitionSpec ): CatalogTablePartition = withClient {
1188
+ spec : TablePartitionSpec ): CatalogTablePartition = withClient( false , db) {
1170
1189
val part = client.getPartition(db, table, lowerCasePartitionSpec(spec))
1171
1190
restorePartitionMetadata(part, getTable(db, table))
1172
1191
}
@@ -1204,7 +1223,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
1204
1223
override def getPartitionOption (
1205
1224
db : String ,
1206
1225
table : String ,
1207
- spec : TablePartitionSpec ): Option [CatalogTablePartition ] = withClient {
1226
+ spec : TablePartitionSpec ): Option [CatalogTablePartition ] = withClient( false , db) {
1208
1227
client.getPartitionOption(db, table, lowerCasePartitionSpec(spec)).map { part =>
1209
1228
restorePartitionMetadata(part, getTable(db, table))
1210
1229
}
@@ -1216,7 +1235,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
1216
1235
override def listPartitionNames (
1217
1236
db : String ,
1218
1237
table : String ,
1219
- partialSpec : Option [TablePartitionSpec ] = None ): Seq [String ] = withClient {
1238
+ partialSpec : Option [TablePartitionSpec ] = None ): Seq [String ] = withClient( false , db) {
1220
1239
val catalogTable = getTable(db, table)
1221
1240
val partColNameMap = buildLowerCasePartColNameMap(catalogTable).mapValues(escapePathName)
1222
1241
val clientPartitionNames =
@@ -1237,7 +1256,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
1237
1256
override def listPartitions (
1238
1257
db : String ,
1239
1258
table : String ,
1240
- partialSpec : Option [TablePartitionSpec ] = None ): Seq [CatalogTablePartition ] = withClient {
1259
+ partialSpec : Option [TablePartitionSpec ] = None ): Seq [CatalogTablePartition ] =
1260
+ withClient(false , db) {
1241
1261
val partColNameMap = buildLowerCasePartColNameMap(getTable(db, table))
1242
1262
val res = client.getPartitions(db, table, partialSpec.map(lowerCasePartitionSpec)).map { part =>
1243
1263
part.copy(spec = restorePartitionSpec(part.spec, partColNameMap))
@@ -1258,7 +1278,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
1258
1278
db : String ,
1259
1279
table : String ,
1260
1280
predicates : Seq [Expression ],
1261
- defaultTimeZoneId : String ): Seq [CatalogTablePartition ] = withClient {
1281
+ defaultTimeZoneId : String ): Seq [CatalogTablePartition ] = withClient( false , db) {
1262
1282
val rawTable = getRawTable(db, table)
1263
1283
val catalogTable = restoreTableMetadata(rawTable)
1264
1284
@@ -1277,7 +1297,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
1277
1297
1278
1298
override def createFunction (
1279
1299
db : String ,
1280
- funcDefinition : CatalogFunction ): Unit = withClient {
1300
+ funcDefinition : CatalogFunction ): Unit = withClient( true , db) {
1281
1301
requireDbExists(db)
1282
1302
// Hive's metastore is case insensitive. However, Hive's createFunction does
1283
1303
// not normalize the function name (unlike the getFunction part). So,
@@ -1288,13 +1308,13 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
1288
1308
client.createFunction(db, funcDefinition.copy(identifier = functionIdentifier))
1289
1309
}
1290
1310
1291
- override def dropFunction (db : String , name : String ): Unit = withClient {
1311
+ override def dropFunction (db : String , name : String ): Unit = withClient( true , db) {
1292
1312
requireFunctionExists(db, name)
1293
1313
client.dropFunction(db, name)
1294
1314
}
1295
1315
1296
1316
override def alterFunction (
1297
- db : String , funcDefinition : CatalogFunction ): Unit = withClient {
1317
+ db : String , funcDefinition : CatalogFunction ): Unit = withClient( true , db) {
1298
1318
requireDbExists(db)
1299
1319
val functionName = funcDefinition.identifier.funcName.toLowerCase(Locale .ROOT )
1300
1320
requireFunctionExists(db, functionName)
@@ -1305,23 +1325,23 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
1305
1325
override def renameFunction (
1306
1326
db : String ,
1307
1327
oldName : String ,
1308
- newName : String ): Unit = withClient {
1328
+ newName : String ): Unit = withClient( true , db) {
1309
1329
requireFunctionExists(db, oldName)
1310
1330
requireFunctionNotExists(db, newName)
1311
1331
client.renameFunction(db, oldName, newName)
1312
1332
}
1313
1333
1314
- override def getFunction (db : String , funcName : String ): CatalogFunction = withClient {
1334
+ override def getFunction (db : String , funcName : String ): CatalogFunction = withClient( false , db) {
1315
1335
requireFunctionExists(db, funcName)
1316
1336
client.getFunction(db, funcName)
1317
1337
}
1318
1338
1319
- override def functionExists (db : String , funcName : String ): Boolean = withClient {
1339
+ override def functionExists (db : String , funcName : String ): Boolean = withClient( false , db) {
1320
1340
requireDbExists(db)
1321
1341
client.functionExists(db, funcName)
1322
1342
}
1323
1343
1324
- override def listFunctions (db : String , pattern : String ): Seq [String ] = withClient {
1344
+ override def listFunctions (db : String , pattern : String ): Seq [String ] = withClient( false , db) {
1325
1345
requireDbExists(db)
1326
1346
client.listFunctions(db, pattern)
1327
1347
}
0 commit comments