Skip to content

[SPARK-24590][BUILD] Make Jenkins tests passed with hadoop 3 profile #21588

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions dev/deps/spark-deps-hadoop-3.1
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,6 @@ jackson-core-2.6.7.jar
jackson-core-asl-1.9.13.jar
jackson-databind-2.6.7.1.jar
jackson-dataformat-yaml-2.6.7.jar
jackson-jaxrs-base-2.7.8.jar
jackson-jaxrs-json-provider-2.7.8.jar
jackson-mapper-asl-1.9.13.jar
jackson-module-jaxb-annotations-2.6.7.jar
jackson-module-paranamer-2.7.9.jar
Expand Down
4 changes: 4 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1075,6 +1075,10 @@
<groupId>com.sun.jersey.contribs</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.jaxrs</groupId>
<artifactId>jackson-jaxrs-json-provider</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
Expand Down
17 changes: 15 additions & 2 deletions project/SparkBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ object SparkBuild extends PomBuild {
// Note ordering of these settings matter.
/* Enable shared settings on all projects */
(allProjects ++ optionallyEnabledProjects ++ assemblyProjects ++ copyJarsProjects ++ Seq(spark, tools))
.foreach(enable(sharedSettings ++ DependencyOverrides.settings ++
.foreach(enable(sharedSettings ++ DependencyOverrides.settings ++ ExcludeDependencies.settings ++
ExcludedDependencies.settings ++ Checkstyle.settings))

/* Enable tests settings for all projects except examples, assembly and tools */
Expand Down Expand Up @@ -471,7 +471,20 @@ object DockerIntegrationTests {
object DependencyOverrides {
lazy val settings = Seq(
dependencyOverrides += "com.google.guava" % "guava" % "14.0.1",
dependencyOverrides += "jline" % "jline" % "2.14.6")
dependencyOverrides += "jline" % "jline" % "2.14.6",
dependencyOverrides += "com.fasterxml.jackson.core" % "jackson-annotations" % "2.6.7",
dependencyOverrides += "com.fasterxml.jackson.core" % "jackson-core" % "2.6.7",
dependencyOverrides += "com.fasterxml.jackson.core" % "jackson-module-jaxb-annotations" % "2.6.7",
dependencyOverrides += "com.fasterxml.jackson.core" % "jackson-databind" % "2.6.7")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These look coming from jackson-jaxrs-json-provider where it looks the resolution is different between Maven and SBT. I had to manually override and exclude.

}

/**
* Exclusions to work around sbt's dependency resolution being different from Maven's.
*/
object ExcludeDependencies {
lazy val settings = Seq(
excludeDependencies += "com.fasterxml.jackson.jaxrs" % "jackson-jaxrs-json-provider",
excludeDependencies += "javax.ws.rs" % "jsr311-api")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this also should have been excluded by Jersey. Seems it's difference between Maven and SBT if I am not mistaken.

}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import java.net.URI
import java.util.Locale

import org.apache.hadoop.fs.Path
import org.apache.hadoop.util.VersionInfo
import org.scalatest.BeforeAndAfterEach

import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
Expand Down Expand Up @@ -2697,6 +2698,11 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
}

test(s"basic DDL using locale tr - caseSensitive $caseSensitive") {
// There seems a bug about dealing with non-ascii compatible characters in file names
// for the usage of File.toPath() in few specific JDKs, which looks updated in HADOOP-12045
// for local file systems specifically. This affects Hadoop 2.8.0+ per the JIRA.
// See https://stackoverflow.com/questions/37409379/invalidpathexception-for-chinese-filename
assume(VersionInfo.getVersion < "2.8.0")
withSQLConf(SQLConf.CASE_SENSITIVE.key -> s"$caseSensitive") {
withLocale("tr") {
val dbName = "DaTaBaSe_I"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,10 @@ private[hive] class IsolatedClientLoader(

protected def isSharedClass(name: String): Boolean = {
val isHadoopClass =
name.startsWith("org.apache.hadoop.") && !name.startsWith("org.apache.hadoop.hive.")
name.startsWith("org.apache.hadoop.") && !name.startsWith("org.apache.hadoop.hive.") ||
// Also, includes configuration2 as a min fix for Hadoop 3+ for now. This is failed
// during class resolution. It is fine when 'sharesHadoopClasses' is disabled.
name.startsWith("org.apache.commons.configuration2.")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To all, BTW, this fix is needed to land Hadoop 3 support into Apache spark.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@wangyum, if you face some errors like:

	Suppressed: java.io.IOException: Failed to use org.apache.hadoop.mapred.LocalClientProtocolProvider due to error: 
		at org.apache.hadoop.mapreduce.Cluster.initialize(Cluster.java:148)
		... 72 more
	Caused by: org.apache.commons.configuration2.ex.ConfigurationRuntimeException: java.lang.IllegalArgumentException: Cannot invoke org.apache.commons.configuration2.AbstractConfiguration.setListDelimiterHandler on bean class 'class org.apache.commons.configuration2.PropertiesConfiguration' - argument type mismatch - had objects of type "org.apache.commons.configuration2.convert.DefaultListDelimiterHandler" but expected signature "org.apache.commons.configuration2.convert.ListDelimiterHandler"
		at org.apache.commons.configuration2.beanutils.BeanHelper.createBean(BeanHelper.java:463)
		at org.apache.commons.configuration2.beanutils.BeanHelper.createBean(BeanHelper.java:479)
		at org.apache.commons.configuration2.beanutils.BeanHelper.createBean(BeanHelper.java:492)
		at org.apache.commons.configuration2.builder.BasicConfigurationBuilder.createResultInstance(BasicConfigurationBuilder.java:447)
		at org.apache.commons.configuration2.builder.BasicConfigurationBuilder.createResult(BasicConfigurationBuilder.java:417)
		at org.apache.commons.configuration2.builder.BasicConfigurationBuilder.getConfiguration(BasicConfigurationBuilder.java:285)
		at org.apache.hadoop.metrics2.impl.MetricsConfig.loadFirst(MetricsConfig.java:119)
		at org.apache.hadoop.metrics2.impl.MetricsConfig.create(MetricsConfig.java:98)
		at org.apache.hadoop.metrics2.impl.MetricsSystemImpl.configure(MetricsSystemImpl.java:478)
		at org.apache.hadoop.metrics2.impl.MetricsSystemImpl.start(MetricsSystemImpl.java:188)
		at org.apache.hadoop.metrics2.impl.MetricsSystemImpl.init(MetricsSystemImpl.java:163)
		at org.apache.hadoop.metrics2.lib.DefaultMetricsSystem.init(DefaultMetricsSystem.java:62)
		at org.apache.hadoop.metrics2.lib.DefaultMetricsSystem.initialize(DefaultMetricsSystem.java:58)
		at org.apache.hadoop.mapred.LocalJobRunnerMetrics.create(LocalJobRunnerMetrics.java:45)
		at org.apache.hadoop.mapred.LocalJobRunner.<init>(LocalJobRunner.java:771)
		at org.apache.hadoop.mapred.LocalJobRunner.<init>(LocalJobRunner.java:764)

later, consider this fix. It's been too old so I forgot the exact context about this but you might need this fix as well.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @HyukjinKwon


name.startsWith("org.slf4j") ||
name.startsWith("org.apache.log4j") || // log4j1.x
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import java.io.{BufferedWriter, File, FileWriter}
import scala.util.Properties

import org.apache.hadoop.fs.Path
import org.apache.hadoop.util.VersionInfo
import org.scalatest.{BeforeAndAfterEach, Matchers}

import org.apache.spark._
Expand Down Expand Up @@ -123,6 +124,7 @@ class HiveSparkSubmitSuite
}

test("SPARK-8020: set sql conf in spark conf") {
assume(VersionInfo.getVersion < "3.0.0", "Only Hive 2.3+ supports Hadoop 3+. See HIVE-16081.")
val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
val args = Seq(
"--class", SparkSQLConfTest.getClass.getName.stripSuffix("$"),
Expand Down Expand Up @@ -159,6 +161,7 @@ class HiveSparkSubmitSuite
}

test("SPARK-9757 Persist Parquet relation with decimal column") {
assume(VersionInfo.getVersion < "3.0.0", "Only Hive 2.3+ supports Hadoop 3+. See HIVE-16081.")
val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
val args = Seq(
"--class", SPARK_9757.getClass.getName.stripSuffix("$"),
Expand Down Expand Up @@ -250,6 +253,7 @@ class HiveSparkSubmitSuite
}

test("SPARK-16901: set javax.jdo.option.ConnectionURL") {
assume(VersionInfo.getVersion < "3.0.0", "Only Hive 2.3+ supports Hadoop 3+. See HIVE-16081.")
// In this test, we set javax.jdo.option.ConnectionURL and set metastore version to
// 0.13. This test will make sure that javax.jdo.option.ConnectionURL will not be
// overridden by hive's default settings when we create a HiveConf object inside
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,7 @@ class HiveClientSuite(version: String)
}
}

override def beforeAll() {
super.beforeAll()
client = init(true)
}
private lazy val client: HiveClient = init(true)

test(s"getPartitionsByFilter returns all partitions when $tryDirectSqlKey=false") {
val client = init(false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.spark.sql.hive.client

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.util.VersionInfo
import org.scalactic.source.Position
import org.scalatest.Tag

Expand All @@ -26,7 +27,6 @@ import org.apache.spark.sql.hive.HiveUtils

private[client] abstract class HiveVersionSuite(version: String) extends SparkFunSuite {
override protected val enableAutoThreadAudit = false
protected var client: HiveClient = null
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was only used in HiveClientSuite.scala.


protected def buildClient(
hadoopConf: Configuration,
Expand All @@ -49,6 +49,11 @@ private[client] abstract class HiveVersionSuite(version: String) extends SparkFu

override protected def test(testName: String, testTags: Tag*)(testFun: => Any)
(implicit pos: Position): Unit = {
super.test(s"$version: $testName", testTags: _*)(testFun)
super.test(s"$version: $testName", testTags: _*) {
assume(
VersionInfo.getVersion < "3.0.0" || version >= "2.3",
"Hive 2.3+ supports Hadoop 3+. See HIVE-16081.")
testFun
}
}
}
Loading