apache · HyukjinKwon · May 28, 2018 · Jun 19, 2018 · Jun 19, 2018 · Jun 19, 2018
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
@@ -91,8 +91,6 @@ jackson-core-2.6.7.jar
 jackson-core-asl-1.9.13.jar
 jackson-databind-2.6.7.1.jar
 jackson-dataformat-yaml-2.6.7.jar
-jackson-jaxrs-base-2.7.8.jar
-jackson-jaxrs-json-provider-2.7.8.jar
 jackson-mapper-asl-1.9.13.jar
 jackson-module-jaxb-annotations-2.6.7.jar
 jackson-module-paranamer-2.7.9.jar

diff --git a/pom.xml b/pom.xml
@@ -1075,6 +1075,10 @@
             <groupId>com.sun.jersey.contribs</groupId>
             <artifactId>*</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>com.fasterxml.jackson.jaxrs</groupId>
+            <artifactId>jackson-jaxrs-json-provider</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
       <dependency>

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
@@ -323,7 +323,7 @@ object SparkBuild extends PomBuild {
   // Note ordering of these settings matter.
   /* Enable shared settings on all projects */
   (allProjects ++ optionallyEnabledProjects ++ assemblyProjects ++ copyJarsProjects ++ Seq(spark, tools))
-    .foreach(enable(sharedSettings ++ DependencyOverrides.settings ++
+    .foreach(enable(sharedSettings ++ DependencyOverrides.settings ++ ExcludeDependencies.settings ++
       ExcludedDependencies.settings ++ Checkstyle.settings))
 
   /* Enable tests settings for all projects except examples, assembly and tools */
@@ -471,7 +471,20 @@ object DockerIntegrationTests {
 object DependencyOverrides {
   lazy val settings = Seq(
     dependencyOverrides += "com.google.guava" % "guava" % "14.0.1",
-    dependencyOverrides += "jline" % "jline" % "2.14.6")
+    dependencyOverrides += "jline" % "jline" % "2.14.6",
+    dependencyOverrides += "com.fasterxml.jackson.core" % "jackson-annotations" % "2.6.7",
+    dependencyOverrides += "com.fasterxml.jackson.core" % "jackson-core" % "2.6.7",
+    dependencyOverrides += "com.fasterxml.jackson.core" % "jackson-module-jaxb-annotations" % "2.6.7",
+    dependencyOverrides += "com.fasterxml.jackson.core" % "jackson-databind" % "2.6.7")
+}
+
+/**
+  * Exclusions to work around sbt's dependency resolution being different from Maven's.
+  */
+object ExcludeDependencies {
+  lazy val settings = Seq(
+    excludeDependencies += "com.fasterxml.jackson.jaxrs" % "jackson-jaxrs-json-provider",
+    excludeDependencies += "javax.ws.rs" % "jsr311-api")
 }
 
 /**

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -22,6 +22,7 @@ import java.net.URI
 import java.util.Locale
 
 import org.apache.hadoop.fs.Path
+import org.apache.hadoop.util.VersionInfo
 import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
@@ -2697,6 +2698,11 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
 
     test(s"basic DDL using locale tr - caseSensitive $caseSensitive") {
+      // There seems a bug about dealing with non-ascii compatible characters in file names
+      // for the usage of File.toPath() in few specific JDKs, which looks updated in HADOOP-12045
+      // for local file systems specifically. This affects Hadoop 2.8.0+ per the JIRA.
+      // See https://stackoverflow.com/questions/37409379/invalidpathexception-for-chinese-filename
+      assume(VersionInfo.getVersion < "2.8.0")
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> s"$caseSensitive") {
         withLocale("tr") {
           val dbName = "DaTaBaSe_I"

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -177,7 +177,10 @@ private[hive] class IsolatedClientLoader(
 
   protected def isSharedClass(name: String): Boolean = {
     val isHadoopClass =
-      name.startsWith("org.apache.hadoop.") && !name.startsWith("org.apache.hadoop.hive.")
+      name.startsWith("org.apache.hadoop.") && !name.startsWith("org.apache.hadoop.hive.") ||
+      // Also, includes configuration2 as a min fix for Hadoop 3+ for now. This is failed
+      // during class resolution. It is fine when 'sharesHadoopClasses' is disabled.
+      name.startsWith("org.apache.commons.configuration2.")
 
     name.startsWith("org.slf4j") ||
     name.startsWith("org.apache.log4j") || // log4j1.x

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -22,6 +22,7 @@ import java.io.{BufferedWriter, File, FileWriter}
 import scala.util.Properties
 
 import org.apache.hadoop.fs.Path
+import org.apache.hadoop.util.VersionInfo
 import org.scalatest.{BeforeAndAfterEach, Matchers}
 
 import org.apache.spark._
@@ -123,6 +124,7 @@ class HiveSparkSubmitSuite
   }
 
   test("SPARK-8020: set sql conf in spark conf") {
+    assume(VersionInfo.getVersion < "3.0.0", "Only Hive 2.3+ supports Hadoop 3+. See HIVE-16081.")
     val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
     val args = Seq(
       "--class", SparkSQLConfTest.getClass.getName.stripSuffix("$"),
@@ -159,6 +161,7 @@ class HiveSparkSubmitSuite
   }
 
   test("SPARK-9757 Persist Parquet relation with decimal column") {
+    assume(VersionInfo.getVersion < "3.0.0", "Only Hive 2.3+ supports Hadoop 3+. See HIVE-16081.")
     val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
     val args = Seq(
       "--class", SPARK_9757.getClass.getName.stripSuffix("$"),
@@ -250,6 +253,7 @@ class HiveSparkSubmitSuite
   }
 
   test("SPARK-16901: set javax.jdo.option.ConnectionURL") {
+    assume(VersionInfo.getVersion < "3.0.0", "Only Hive 2.3+ supports Hadoop 3+. See HIVE-16081.")
     // In this test, we set javax.jdo.option.ConnectionURL and set metastore version to
     // 0.13. This test will make sure that javax.jdo.option.ConnectionURL will not be
     // overridden by hive's default settings when we create a HiveConf object inside

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
@@ -74,10 +74,7 @@ class HiveClientSuite(version: String)
     }
   }
 
-  override def beforeAll() {
-    super.beforeAll()
-    client = init(true)
-  }
+  private lazy val client: HiveClient = init(true)
 
   test(s"getPartitionsByFilter returns all partitions when $tryDirectSqlKey=false") {
     val client = init(false)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.hive.client
 
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.util.VersionInfo
 import org.scalactic.source.Position
 import org.scalatest.Tag
 
@@ -26,7 +27,6 @@ import org.apache.spark.sql.hive.HiveUtils
 
 private[client] abstract class HiveVersionSuite(version: String) extends SparkFunSuite {
   override protected val enableAutoThreadAudit = false
-  protected var client: HiveClient = null
 
   protected def buildClient(
       hadoopConf: Configuration,
@@ -49,6 +49,11 @@ private[client] abstract class HiveVersionSuite(version: String) extends SparkFu
 
   override protected def test(testName: String, testTags: Tag*)(testFun: => Any)
       (implicit pos: Position): Unit = {
-    super.test(s"$version: $testName", testTags: _*)(testFun)
+    super.test(s"$version: $testName", testTags: _*) {
+      assume(
+        VersionInfo.getVersion < "3.0.0" || version >= "2.3",
+        "Hive 2.3+ supports Hadoop 3+. See HIVE-16081.")
+      testFun
+    }
   }
 }