apache · gaborgsomogyi · Jul 6, 2020 · Jul 8, 2020 · Jul 8, 2020 · Jul 10, 2020
diff --git a/core/src/main/scala/org/apache/spark/security/SecurityConfigurationLock.scala b/core/src/main/scala/org/apache/spark/security/SecurityConfigurationLock.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.security
+
+import org.apache.spark.annotation.DeveloperApi
+
+/**
+ * ::DeveloperApi::
+ * There are cases when global JVM security configuration must be modified.
+ * In order to avoid race the modification must be synchronized with this.
+ */
+@DeveloperApi
+object SecurityConfigurationLock
diff --git a/...r-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala b/...r-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
@@ -54,7 +54,7 @@ class DB2KrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
         JDBCOptions.JDBC_KEYTAB -> keytabFileName,
         JDBCOptions.JDBC_PRINCIPAL -> principal
       ))
-      new DB2ConnectionProvider(null, options).getAdditionalProperties()
+      new DB2ConnectionProvider().getAdditionalProperties(options)
     }
 
     override def beforeContainerStart(

diff --git a/...ervices/org.apache.spark.sql.execution.datasources.jdbc.connection.JdbcConnectionProvider b/...ervices/org.apache.spark.sql.execution.datasources.jdbc.connection.JdbcConnectionProvider
@@ -0,0 +1,6 @@
+org.apache.spark.sql.execution.datasources.jdbc.connection.BasicConnectionProvider
+org.apache.spark.sql.execution.datasources.jdbc.connection.DB2ConnectionProvider
+org.apache.spark.sql.execution.datasources.jdbc.connection.MariaDBConnectionProvider
+org.apache.spark.sql.execution.datasources.jdbc.connection.MSSQLConnectionProvider
+org.apache.spark.sql.execution.datasources.jdbc.connection.PostgresConnectionProvider
+org.apache.spark.sql.execution.datasources.jdbc.connection.OracleConnectionProvider
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
@@ -23,12 +23,15 @@ import java.util.{Locale, Properties}
 import org.apache.commons.io.FilenameUtils
 
 import org.apache.spark.SparkFiles
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 
 /**
+ * ::DeveloperApi::
  * Options for the JDBC data source.
  */
+@DeveloperApi
 class JDBCOptions(
     @transient val parameters: CaseInsensitiveMap[String])
   extends Serializable with Logging {

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -62,7 +62,7 @@ object JdbcUtils extends Logging {
         throw new IllegalStateException(
           s"Did not find registered driver with class $driverClass")
       }
-      val connection = ConnectionProvider.create(driver, options).getConnection()
+      val connection = ConnectionProvider.create(driver, options)
       require(connection != null,
         s"The driver could not open a JDBC connection. Check the URL: ${options.url}")
 

diff --git a/.../org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala b/.../org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala
@@ -18,18 +18,29 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
+import java.util.Properties
 
 import scala.collection.JavaConverters._
 
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 
-private[jdbc] class BasicConnectionProvider(driver: Driver, options: JDBCOptions)
-  extends ConnectionProvider {
-  def getConnection(): Connection = {
-    val properties = getAdditionalProperties()
+private[jdbc] class BasicConnectionProvider extends JdbcConnectionProvider with Logging {
+  /**
+   * Additional properties for data connection (Data source property takes precedence).
+   */
+  def getAdditionalProperties(options: JDBCOptions): Properties = new Properties()
+
+  override def canHandle(driver: Driver, options: JDBCOptions): Boolean = {
+    options.keytab == null || options.principal == null
+  }
+
+  override def getConnection(driver: Driver, options: JDBCOptions): Connection = {
+    val properties = getAdditionalProperties(options)
     options.asConnectionProperties.entrySet().asScala.foreach { e =>
       properties.put(e.getKey(), e.getValue())
     }
+    logDebug(s"JDBC connection initiated with URL: ${options.url} and properties: $properties")
     driver.connect(options.url, properties)
   }
 }
diff --git a/...scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala b/...scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
@@ -18,60 +18,41 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
+
+import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases (database specific instance
- * needed). If kerberos authentication required then it's the provider's responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
-
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-    if (options.keytab == null || options.principal == null) {
-      logDebug("No authentication configuration found, using basic connection provider")
-      new BasicConnectionProvider(driver, options)
-    } else {
-      logDebug("Authentication configuration found, using database specific connection provider")
-      options.driverClass match {
-        case PostgresConnectionProvider.driverClass =>
-          logDebug("Postgres connection provider found")
-          new PostgresConnectionProvider(driver, options)
-
-        case MariaDBConnectionProvider.driverClass =>
-          logDebug("MariaDB connection provider found")
-          new MariaDBConnectionProvider(driver, options)
-
-        case DB2ConnectionProvider.driverClass =>
-          logDebug("DB2 connection provider found")
-          new DB2ConnectionProvider(driver, options)
-
-        case MSSQLConnectionProvider.driverClass =>
-          logDebug("MS SQL connection provider found")
-          new MSSQLConnectionProvider(driver, options)
-
-        case OracleConnectionProvider.driverClass =>
-          logDebug("Oracle connection provider found")
-          new OracleConnectionProvider(driver, options)
-
-        case _ =>
-          throw new IllegalArgumentException(s"Driver ${options.driverClass} does not support " +
-            "Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+    val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+      Utils.getContextOrSparkClassLoader)
+    val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+    val iterator = loader.iterator
+    while (iterator.hasNext) {
+      try {
+        val provider = iterator.next
+        logDebug(s"Loaded built in provider: $provider")
+        providers += provider
+      } catch {
+        case t: Throwable =>
+          logError(s"Failed to load built in provider.", t)
 case e: NoClassDefFoundError => // This one won't be caught by Scala NonFatal 
   // NoClassDefFoundError's class name uses "/" rather than "." for packages 
   val className = e.getMessage.replaceAll("/", ".") 
   if (spark2RemovedClasses.contains(className)) { 
     throw new ClassNotFoundException(s"$className was removed in Spark 2.0. " + 
       "Please check if your library is compatible with Spark 2.0", e) 
   } else { 
     throw e 
   } 
 case e: NoClassDefFoundError => // This one won't be caught by Scala NonFatal 
   // NoClassDefFoundError's class name uses "/" rather than "." for packages 
   val className = e.getMessage.replaceAll("/", ".") 
   if (spark2RemovedClasses.contains(className)) { 
     throw new ClassNotFoundException(s"$className was removed in Spark 2.0. " + 
       "Please check if your library is compatible with Spark 2.0", e) 
   } else { 
     throw e 
   } 
       }
     }
+    providers
+  }
+
+  def create(driver: Driver, options: JDBCOptions): Connection = {
+    val filteredProviders = providers.filter(_.canHandle(driver, options))
+    logDebug(s"Filtered providers: $filteredProviders")
+    require(filteredProviders.size == 1,
+      "JDBC connection initiated but not exactly one connection provider found which can handle it")
+    filteredProviders.head.getConnection(driver, options)
   }
 }
diff --git a/...la/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProvider.scala b/...la/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProvider.scala
@@ -23,39 +23,39 @@ import java.util.Properties
 
 import org.apache.hadoop.security.UserGroupInformation
 
+import org.apache.spark.security.SecurityConfigurationLock
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 
-private[sql] class DB2ConnectionProvider(driver: Driver, options: JDBCOptions)
-  extends SecureConnectionProvider(driver, options) {
-  override val appEntry: String = "JaasClient"
+private[sql] class DB2ConnectionProvider extends SecureConnectionProvider {
+  override val driverClass = "com.ibm.db2.jcc.DB2Driver"
 
-  override def getConnection(): Connection = {
-    setAuthenticationConfigIfNeeded()
+  override def appEntry(driver: Driver, options: JDBCOptions): String = "JaasClient"
+
+  override def getConnection(driver: Driver, options: JDBCOptions): Connection = {
+    setAuthenticationConfigIfNeeded(driver, options)
     UserGroupInformation.loginUserFromKeytabAndReturnUGI(options.principal, options.keytab).doAs(
       new PrivilegedExceptionAction[Connection]() {
         override def run(): Connection = {
-          DB2ConnectionProvider.super.getConnection()
+          DB2ConnectionProvider.super.getConnection(driver, options)
         }
       }
     )
   }
 
-  override def getAdditionalProperties(): Properties = {
+  override def getAdditionalProperties(options: JDBCOptions): Properties = {
     val result = new Properties()
     // 11 is the integer value for kerberos
     result.put("securityMechanism", new String("11"))
     result.put("KerberosServerPrincipal", options.principal)
     result
   }
 
-  override def setAuthenticationConfigIfNeeded(): Unit = SecurityConfigurationLock.synchronized {
-    val (parent, configEntry) = getConfigWithAppEntry()
+  override def setAuthenticationConfigIfNeeded(
+      driver: Driver,
+      options: JDBCOptions): Unit = SecurityConfigurationLock.synchronized {
+    val (parent, configEntry) = getConfigWithAppEntry(driver, options)
     if (configEntry == null || configEntry.isEmpty) {
-      setAuthenticationConfig(parent)
+      setAuthenticationConfig(parent, driver, options)
     }
   }
 }
-
-private[sql] object DB2ConnectionProvider {
-  val driverClass = "com.ibm.db2.jcc.DB2Driver"
-}
diff --git a/...a/org/apache/spark/sql/execution/datasources/jdbc/connection/JdbcConnectionProvider.scala b/...a/org/apache/spark/sql/execution/datasources/jdbc/connection/JdbcConnectionProvider.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.jdbc.connection
+
+import java.sql.{Connection, Driver}
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
+
+/**
+ * ::DeveloperApi::
+ * Connection provider which opens connection toward various databases (database specific instance
+ * needed). If any authentication required then it's the provider's responsibility to set all
+ * the parameters. If global JVM security configuration is changed then
+ * <code>SecurityConfigurationLock</code> must be used as lock to avoid race.
+ * Important to mention connection providers within a JVM used from multiple threads so adding
+ * internal state is not advised. If any state added then it must be synchronized properly.
+ */
+@DeveloperApi
+trait JdbcConnectionProvider {
+  /**
+   * Checks if this connection provider instance can handle the connection initiated by the driver.
+   * There must be exactly one active connection provider which can handle the connection for a
+   * specific driver. If this requirement doesn't met then <code>IllegalArgumentException</code>
+   * will be thrown by the provider framework.
+   * @param driver Java driver which initiates the connection
+   * @param options Driver options which initiates the connection
+   * @return True if the connection provider can handle the driver with the given options.
+   */
+  def canHandle(driver: Driver, options: JDBCOptions): Boolean
+
+  /**
+   * Opens connection toward the database.
+   * @param driver Java driver which initiates the connection
+   * @param options Driver options which initiates the connection
+   * @return a <code>Connection</code> object that represents a connection to the URL
+   */
+  def getConnection(driver: Driver, options: JDBCOptions): Connection
+}
diff --git a/.../org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProvider.scala b/.../org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProvider.scala
@@ -23,14 +23,14 @@ import java.util.Properties
 
 import org.apache.hadoop.security.UserGroupInformation
 
+import org.apache.spark.security.SecurityConfigurationLock
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 
-private[sql] class MSSQLConnectionProvider(
-    driver: Driver,
-    options: JDBCOptions,
-    parserMethod: String = "parseAndMergeProperties"
-  ) extends SecureConnectionProvider(driver, options) {
-  override val appEntry: String = {
+private[sql] class MSSQLConnectionProvider extends SecureConnectionProvider {
+  override val driverClass = "com.microsoft.sqlserver.jdbc.SQLServerDriver"
+  val parserMethod: String = "parseAndMergeProperties"
+
+  override def appEntry(driver: Driver, options: JDBCOptions): String = {
     val configName = "jaasConfigurationName"
     val appEntryDefault = "SQLJDBCDriver"
 
@@ -58,27 +58,29 @@ private[sql] class MSSQLConnectionProvider(
     }
   }
 
-  override def getConnection(): Connection = {
-    setAuthenticationConfigIfNeeded()
+  override def getConnection(driver: Driver, options: JDBCOptions): Connection = {
+    setAuthenticationConfigIfNeeded(driver, options)
     UserGroupInformation.loginUserFromKeytabAndReturnUGI(options.principal, options.keytab).doAs(
       new PrivilegedExceptionAction[Connection]() {
         override def run(): Connection = {
-          MSSQLConnectionProvider.super.getConnection()
+          MSSQLConnectionProvider.super.getConnection(driver, options)
         }
       }
     )
   }
 
-  override def getAdditionalProperties(): Properties = {
+  override def getAdditionalProperties(options: JDBCOptions): Properties = {
     val result = new Properties()
     // These props needed to reach internal kerberos authentication in the JDBC driver
     result.put("integratedSecurity", "true")
     result.put("authenticationScheme", "JavaKerberos")
     result
   }
 
-  override def setAuthenticationConfigIfNeeded(): Unit = SecurityConfigurationLock.synchronized {
-    val (parent, configEntry) = getConfigWithAppEntry()
+  override def setAuthenticationConfigIfNeeded(
+      driver: Driver,
+      options: JDBCOptions): Unit = SecurityConfigurationLock.synchronized {
+    val (parent, configEntry) = getConfigWithAppEntry(driver, options)
     /**
      * Couple of things to mention here (v8.2.2 client):
      * 1. MS SQL supports JAAS application name configuration
@@ -87,11 +89,7 @@ private[sql] class MSSQLConnectionProvider(
     val entryUsesKeytab = configEntry != null &&
       configEntry.exists(_.getOptions().get("useKeyTab") == "true")
     if (configEntry == null || configEntry.isEmpty || !entryUsesKeytab) {
-      setAuthenticationConfig(parent)
+      setAuthenticationConfig(parent, driver, options)
     }
   }
 }
-
-private[sql] object MSSQLConnectionProvider {
-  val driverClass = "com.microsoft.sqlserver.jdbc.SQLServerDriver"
-}