diff --git a/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala new file mode 100644 index 0000000000000..08291859a32cc --- /dev/null +++ b/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala @@ -0,0 +1,658 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util + +import java.io.{File, IOException, PrintStream} +import java.net.URI +import java.text.ParseException +import java.util.UUID + +import org.apache.commons.lang3.StringUtils +import org.apache.ivy.Ivy +import org.apache.ivy.core.LogOptions +import org.apache.ivy.core.module.descriptor.{Artifact, DefaultDependencyDescriptor, DefaultExcludeRule, DefaultModuleDescriptor, ExcludeRule} +import org.apache.ivy.core.module.id.{ArtifactId, ModuleId, ModuleRevisionId} +import org.apache.ivy.core.report.{DownloadStatus, ResolveReport} +import org.apache.ivy.core.resolve.ResolveOptions +import org.apache.ivy.core.retrieve.RetrieveOptions +import org.apache.ivy.core.settings.IvySettings +import org.apache.ivy.plugins.matcher.GlobPatternMatcher +import org.apache.ivy.plugins.repository.file.FileRepository +import org.apache.ivy.plugins.resolver.{ChainResolver, FileSystemResolver, IBiblioResolver} + +import org.apache.spark.SparkException +import org.apache.spark.internal.Logging +import org.apache.spark.util.ArrayImplicits._ + +/** Provides utility functions to be used inside SparkSubmit. */ +private[spark] object MavenUtils extends Logging { + val JAR_IVY_SETTING_PATH_KEY: String = "spark.jars.ivySettings" + + // Exposed for testing + // var printStream = SparkSubmit.printStream + + // Exposed for testing. + // These components are used to make the default exclusion rules for Spark dependencies. + // We need to specify each component explicitly, otherwise we miss + // spark-streaming utility components. Underscore is there to differentiate between + // spark-streaming_2.1x and spark-streaming-kafka-0-10-assembly_2.1x + val IVY_DEFAULT_EXCLUDES: Seq[String] = Seq( + "catalyst_", + "core_", + "graphx_", + "kvstore_", + "launcher_", + "mllib_", + "mllib-local_", + "network-common_", + "network-shuffle_", + "repl_", + "sketch_", + "sql_", + "streaming_", + "tags_", + "unsafe_") + + /** + * Represents a Maven Coordinate + * + * @param groupId + * the groupId of the coordinate + * @param artifactId + * the artifactId of the coordinate + * @param version + * the version of the coordinate + */ + private[spark] case class MavenCoordinate( + groupId: String, + artifactId: String, + version: String) { + override def toString: String = s"$groupId:$artifactId:$version" + } + + /** + * Extracts maven coordinates from a comma-delimited string. Coordinates should be provided in + * the format `groupId:artifactId:version` or `groupId/artifactId:version`. + * + * @param coordinates + * Comma-delimited string of maven coordinates + * @return + * Sequence of Maven coordinates + */ + def extractMavenCoordinates(coordinates: String): Seq[MavenCoordinate] = { + coordinates.split(",").map { p => + val splits = p.replace("/", ":").split(":") + require( + splits.length == 3, + s"Provided Maven Coordinates must be in the form " + + s"'groupId:artifactId:version'. The coordinate provided is: $p") + require( + splits(0) != null && splits(0).trim.nonEmpty, + s"The groupId cannot be null or " + + s"be whitespace. The groupId provided is: ${splits(0)}") + require( + splits(1) != null && splits(1).trim.nonEmpty, + s"The artifactId cannot be null or " + + s"be whitespace. The artifactId provided is: ${splits(1)}") + require( + splits(2) != null && splits(2).trim.nonEmpty, + s"The version cannot be null or " + + s"be whitespace. The version provided is: ${splits(2)}") + MavenCoordinate(splits(0), splits(1), splits(2)) + }.toImmutableArraySeq + } + + /** Path of the local Maven cache. */ + private[util] def m2Path: File = { + if (SparkEnvUtils.isTesting) { + // test builds delete the maven cache, and this can cause flakiness + new File("dummy", ".m2" + File.separator + "repository") + } else { + new File(System.getProperty("user.home"), ".m2" + File.separator + "repository") + } + } + + /** + * Create a ChainResolver used by Ivy to search for and resolve dependencies. + * + * @param defaultIvyUserDir + * The default user path for Ivy + * @param useLocalM2AsCache + * Whether to use the local maven repo as a cache + * @return + * A ChainResolver used by Ivy to search for and resolve dependencies. + */ + private[util] def createRepoResolvers( + defaultIvyUserDir: File, + useLocalM2AsCache: Boolean = true): ChainResolver = { + // We need a chain resolver if we want to check multiple repositories + val cr = new ChainResolver + cr.setName("spark-list") + + if (useLocalM2AsCache) { + val localM2 = new IBiblioResolver + localM2.setM2compatible(true) + localM2.setRoot(m2Path.toURI.toString) + localM2.setUsepoms(true) + localM2.setName("local-m2-cache") + cr.add(localM2) + } + + val localIvy = new FileSystemResolver + val localIvyRoot = new File(defaultIvyUserDir, "local") + localIvy.setLocal(true) + localIvy.setRepository(new FileRepository(localIvyRoot)) + val ivyPattern = Seq( + localIvyRoot.getAbsolutePath, + "[organisation]", + "[module]", + "[revision]", + "ivys", + "ivy.xml").mkString(File.separator) + localIvy.addIvyPattern(ivyPattern) + val artifactPattern = Seq( + localIvyRoot.getAbsolutePath, + "[organisation]", + "[module]", + "[revision]", + "[type]s", + "[artifact](-[classifier]).[ext]").mkString(File.separator) + localIvy.addArtifactPattern(artifactPattern) + localIvy.setName("local-ivy-cache") + cr.add(localIvy) + + // the biblio resolver resolves POM declared dependencies + val br: IBiblioResolver = new IBiblioResolver + br.setM2compatible(true) + br.setUsepoms(true) + val defaultInternalRepo: Option[String] = sys.env.get("DEFAULT_ARTIFACT_REPOSITORY") + br.setRoot(defaultInternalRepo.getOrElse("https://repo1.maven.org/maven2/")) + br.setName("central") + cr.add(br) + + val sp: IBiblioResolver = new IBiblioResolver + sp.setM2compatible(true) + sp.setUsepoms(true) + sp.setRoot( + sys.env.getOrElse("DEFAULT_ARTIFACT_REPOSITORY", "https://repos.spark-packages.org/")) + sp.setName("spark-packages") + cr.add(sp) + cr + } + + /** + * Output a list of paths for the downloaded jars to be added to the classpath (will append to + * jars in SparkSubmit). + * + * @param artifacts + * Sequence of dependencies that were resolved and retrieved + * @param cacheDirectory + * Directory where jars are cached + * @return + * List of paths for the dependencies + */ + private[util] def resolveDependencyPaths( + artifacts: Array[AnyRef], + cacheDirectory: File): Seq[String] = { + artifacts + .map(_.asInstanceOf[Artifact]) + .filter { artifactInfo => + if (artifactInfo.getExt == "jar") { + true + } else { + logInfo(s"Skipping non-jar dependency ${artifactInfo.getId}") + false + } + } + .map { artifactInfo => + val artifact = artifactInfo.getModuleRevisionId + val extraAttrs = artifactInfo.getExtraAttributes + val classifier = if (extraAttrs.containsKey("classifier")) { + "-" + extraAttrs.get("classifier") + } else { + "" + } + cacheDirectory.getAbsolutePath + File.separator + + s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}$classifier.jar" + }.toImmutableArraySeq + } + + /** Adds the given maven coordinates to Ivy's module descriptor. */ + private[util] def addDependenciesToIvy( + md: DefaultModuleDescriptor, + artifacts: Seq[MavenCoordinate], + ivyConfName: String)(implicit printStream: PrintStream): Unit = { + artifacts.foreach { mvn => + val ri = ModuleRevisionId.newInstance(mvn.groupId, mvn.artifactId, mvn.version) + val dd = new DefaultDependencyDescriptor(ri, false, false) + dd.addDependencyConfiguration(ivyConfName, ivyConfName + "(runtime)") + // scalastyle:off println + printStream.println(s"${dd.getDependencyId} added as a dependency") + // scalastyle:on println + md.addDependency(dd) + } + } + + /** Add exclusion rules for dependencies already included in the spark-assembly */ + private def addExclusionRules( + ivySettings: IvySettings, + ivyConfName: String, + md: DefaultModuleDescriptor): Unit = { + // Add scala exclusion rule + md.addExcludeRule(createExclusion("*:scala-library:*", ivySettings, ivyConfName)) + + IVY_DEFAULT_EXCLUDES.foreach { comp => + md.addExcludeRule( + createExclusion(s"org.apache.spark:spark-$comp*:*", ivySettings, ivyConfName)) + } + } + + /** + * Build Ivy Settings using options with default resolvers + * + * @param remoteRepos + * Comma-delimited string of remote repositories other than maven central + * @param ivyPath + * The path to the local ivy repository + * @param useLocalM2AsCache + * Whether or not use `local-m2 repo` as cache + * @return + * An IvySettings object + */ + def buildIvySettings( + remoteRepos: Option[String], + ivyPath: Option[String], + useLocalM2AsCache: Boolean = true)(implicit printStream: PrintStream): IvySettings = { + val ivySettings: IvySettings = new IvySettings + processIvyPathArg(ivySettings, ivyPath) + + // create a pattern matcher + ivySettings.addMatcher(new GlobPatternMatcher) + // create the dependency resolvers + val repoResolver = createRepoResolvers(ivySettings.getDefaultIvyUserDir, useLocalM2AsCache) + ivySettings.addResolver(repoResolver) + ivySettings.setDefaultResolver(repoResolver.getName) + processRemoteRepoArg(ivySettings, remoteRepos) + // (since 2.5) Setting the property ivy.maven.lookup.sources to false + // disables the lookup of the sources artifact. + // And setting the property ivy.maven.lookup.javadoc to false + // disables the lookup of the javadoc artifact. + ivySettings.setVariable("ivy.maven.lookup.sources", "false") + ivySettings.setVariable("ivy.maven.lookup.javadoc", "false") + ivySettings + } + + /** + * Load Ivy settings from a given filename, using supplied resolvers + * + * @param settingsFile + * Path to Ivy settings file + * @param remoteRepos + * Comma-delimited string of remote repositories other than maven central + * @param ivyPath + * The path to the local ivy repository + * @return + * An IvySettings object + */ + def loadIvySettings(settingsFile: String, remoteRepos: Option[String], ivyPath: Option[String])( + implicit printStream: PrintStream): IvySettings = { + val uri = new URI(settingsFile) + val file = Option(uri.getScheme).getOrElse("file") match { + case "file" => new File(uri.getPath) + case scheme => + throw new IllegalArgumentException( + s"Scheme $scheme not supported in " + + JAR_IVY_SETTING_PATH_KEY) + } + require(file.exists(), s"Ivy settings file $file does not exist") + require(file.isFile, s"Ivy settings file $file is not a normal file") + val ivySettings: IvySettings = new IvySettings + try { + ivySettings.load(file) + if (ivySettings.getDefaultIvyUserDir == null && ivySettings.getDefaultCache == null) { + // To protect old Ivy-based systems like old Spark from Apache Ivy 2.5.2's incompatibility. + // `processIvyPathArg` can overwrite these later. + val alternateIvyDir = System.getProperty("ivy.home", + System.getProperty("user.home") + File.separator + ".ivy2.5.2") + ivySettings.setDefaultIvyUserDir(new File(alternateIvyDir)) + ivySettings.setDefaultCache(new File(alternateIvyDir, "cache")) + } + } catch { + case e @ (_: IOException | _: ParseException) => + throw new SparkException(s"Failed when loading Ivy settings from $settingsFile", e) + } + processIvyPathArg(ivySettings, ivyPath) + processRemoteRepoArg(ivySettings, remoteRepos) + ivySettings + } + + /* Set ivy settings for location of cache, if option is supplied */ + private def processIvyPathArg(ivySettings: IvySettings, ivyPath: Option[String]): Unit = { + val alternateIvyDir = ivyPath.filterNot(_.trim.isEmpty).getOrElse { + // To protect old Ivy-based systems like old Spark from Apache Ivy 2.5.2's incompatibility. + System.getProperty("ivy.home", + System.getProperty("user.home") + File.separator + ".ivy2.5.2") + } + ivySettings.setDefaultIvyUserDir(new File(alternateIvyDir)) + ivySettings.setDefaultCache(new File(alternateIvyDir, "cache")) + } + + /* Add any optional additional remote repositories */ + private def processRemoteRepoArg(ivySettings: IvySettings, remoteRepos: Option[String])(implicit + printStream: PrintStream): Unit = { + remoteRepos.filterNot(_.trim.isEmpty).map(_.split(",")).foreach { repositoryList => + val cr = new ChainResolver + cr.setName("user-list") + + // add current default resolver, if any + Option(ivySettings.getDefaultResolver).foreach(cr.add) + + // add additional repositories, last resolution in chain takes precedence + repositoryList.zipWithIndex.foreach { case (repo, i) => + val brr: IBiblioResolver = new IBiblioResolver + brr.setM2compatible(true) + brr.setUsepoms(true) + brr.setRoot(repo) + brr.setName(s"repo-${i + 1}") + cr.add(brr) + // scalastyle:off println + printStream.println(s"$repo added as a remote repository with the name: ${brr.getName}") + // scalastyle:on println + } + + ivySettings.addResolver(cr) + ivySettings.setDefaultResolver(cr.getName) + } + } + + /** A nice function to use in tests as well. Values are dummy strings. */ + private[util] def getModuleDescriptor: DefaultModuleDescriptor = + DefaultModuleDescriptor.newDefaultInstance(ModuleRevisionId + // Include UUID in module name, so multiple clients resolving maven coordinate at the + // same time do not modify the same resolution file concurrently. + .newInstance("org.apache.spark", s"spark-submit-parent-${UUID.randomUUID.toString}", "1.0")) + + /** + * Clear ivy resolution from current launch. The resolution file is usually at + * ~/.ivy2/org.apache.spark-spark-submit-parent-$UUID-default.xml, + * ~/.ivy2/resolved-org.apache.spark-spark-submit-parent-$UUID-1.0.xml, and + * ~/.ivy2/resolved-org.apache.spark-spark-submit-parent-$UUID-1.0.properties. Since each launch + * will have its own resolution files created, delete them after each resolution to prevent + * accumulation of these files in the ivy cache dir. + */ + private def clearIvyResolutionFiles( + mdId: ModuleRevisionId, + defaultCacheFile: File, + ivyConfName: String): Unit = { + val currentResolutionFiles = Seq( + s"${mdId.getOrganisation}-${mdId.getName}-$ivyConfName.xml", + s"resolved-${mdId.getOrganisation}-${mdId.getName}-${mdId.getRevision}.xml", + s"resolved-${mdId.getOrganisation}-${mdId.getName}-${mdId.getRevision}.properties") + currentResolutionFiles.foreach { filename => + new File(defaultCacheFile, filename).delete() + } + } + + /** + * Clear invalid cache files in ivy. The cache file is usually at + * ~/.ivy2/cache/${groupId}/${artifactId}/ivy-${version}.xml, + * ~/.ivy2/cache/${groupId}/${artifactId}/ivy-${version}.xml.original, and + * ~/.ivy2/cache/${groupId}/${artifactId}/ivydata-${version}.properties. + * Because when using `local-m2` repo as a cache, some invalid files were created. + * If not deleted here, an error prompt similar to `unknown resolver local-m2-cache` + * will be generated, making some confusion for users. + */ + private def clearInvalidIvyCacheFiles( + mdId: ModuleRevisionId, + defaultCacheFile: File): Unit = { + val cacheFiles = Seq( + s"${mdId.getOrganisation}${File.separator}${mdId.getName}${File.separator}" + + s"ivy-${mdId.getRevision}.xml", + s"${mdId.getOrganisation}${File.separator}${mdId.getName}${File.separator}" + + s"ivy-${mdId.getRevision}.xml.original", + s"${mdId.getOrganisation}${File.separator}${mdId.getName}${File.separator}" + + s"ivydata-${mdId.getRevision}.properties") + cacheFiles.foreach { filename => + new File(defaultCacheFile, filename).delete() + } + } + + /** + * Resolves any dependencies that were supplied through maven coordinates + * + * @param coordinates + * Comma-delimited string of maven coordinates + * @param ivySettings + * An IvySettings containing resolvers to use + * @param noCacheIvySettings + * An no-cache(local-m2-cache) IvySettings containing resolvers to use + * @param transitive + * Whether resolving transitive dependencies, default is true + * @param exclusions + * Exclusions to apply when resolving transitive dependencies + * @return + * Seq of path to the jars of the given maven artifacts including their transitive + * dependencies + */ + def resolveMavenCoordinates( + coordinates: String, + ivySettings: IvySettings, + noCacheIvySettings: Option[IvySettings] = None, + transitive: Boolean, + exclusions: Seq[String] = Nil, + isTest: Boolean = false)(implicit printStream: PrintStream): Seq[String] = { + if (coordinates == null || coordinates.trim.isEmpty) { + Nil + } else { + val sysOut = System.out + // Default configuration name for ivy + val ivyConfName = "default" + + // A Module descriptor must be specified. Entries are dummy strings + val md = getModuleDescriptor + + md.setDefaultConf(ivyConfName) + try { + // To prevent ivy from logging to system out + System.setOut(printStream) + val artifacts = extractMavenCoordinates(coordinates) + // Directories for caching downloads through ivy and storing the jars when maven coordinates + // are supplied to spark-submit + val packagesDirectory: File = new File(ivySettings.getDefaultIvyUserDir, "jars") + // scalastyle:off println + printStream.println( + s"Ivy Default Cache set to: ${ivySettings.getDefaultCache.getAbsolutePath}") + printStream.println(s"The jars for the packages stored in: $packagesDirectory") + // scalastyle:on println + + val ivy = Ivy.newInstance(ivySettings) + ivy.pushContext() + + // Set resolve options to download transitive dependencies as well + val resolveOptions = new ResolveOptions + resolveOptions.setTransitive(transitive) + val retrieveOptions = new RetrieveOptions + // Turn downloading and logging off for testing + if (isTest) { + resolveOptions.setDownload(false) + resolveOptions.setLog(LogOptions.LOG_QUIET) + retrieveOptions.setLog(LogOptions.LOG_QUIET) + } else { + resolveOptions.setDownload(true) + } + // retrieve all resolved dependencies + retrieveOptions.setDestArtifactPattern( + packagesDirectory.getAbsolutePath + File.separator + + "[organization]_[artifact]-[revision](-[classifier]).[ext]") + retrieveOptions.setConfs(Array(ivyConfName)) + + // Add exclusion rules for Spark and Scala Library + addExclusionRules(ivySettings, ivyConfName, md) + // add all supplied maven artifacts as dependencies + addDependenciesToIvy(md, artifacts, ivyConfName) + exclusions.foreach { e => + md.addExcludeRule(createExclusion(e + ":*", ivySettings, ivyConfName)) + } + // resolve dependencies + val rr: ResolveReport = ivy.resolve(md, resolveOptions) + if (rr.hasError) { + // SPARK-46302: When there are some corrupted jars in the local maven repo, + // we try to continue without the cache + val failedReports = rr.getArtifactsReports(DownloadStatus.FAILED, true) + if (failedReports.nonEmpty && noCacheIvySettings.isDefined) { + val failedArtifacts = failedReports.map(r => r.getArtifact) + logInfo(s"Download failed: ${failedArtifacts.mkString("[", ", ", "]")}, " + + s"attempt to retry while skipping local-m2-cache.") + failedArtifacts.foreach(artifact => { + clearInvalidIvyCacheFiles(artifact.getModuleRevisionId, ivySettings.getDefaultCache) + }) + ivy.popContext() + + val noCacheIvy = Ivy.newInstance(noCacheIvySettings.get) + noCacheIvy.pushContext() + + val noCacheRr = noCacheIvy.resolve(md, resolveOptions) + if (noCacheRr.hasError) { + throw new RuntimeException(noCacheRr.getAllProblemMessages.toString) + } + noCacheIvy.retrieve(noCacheRr.getModuleDescriptor.getModuleRevisionId, retrieveOptions) + val dependencyPaths = resolveDependencyPaths( + noCacheRr.getArtifacts.toArray, packagesDirectory) + noCacheIvy.popContext() + + dependencyPaths + } else { + throw new RuntimeException(rr.getAllProblemMessages.toString) + } + } else { + ivy.retrieve(rr.getModuleDescriptor.getModuleRevisionId, retrieveOptions) + val dependencyPaths = resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory) + ivy.popContext() + + dependencyPaths + } + } finally { + System.setOut(sysOut) + clearIvyResolutionFiles(md.getModuleRevisionId, ivySettings.getDefaultCache, ivyConfName) + } + } + } + + private[util] def createExclusion( + coords: String, + ivySettings: IvySettings, + ivyConfName: String): ExcludeRule = { + val c = extractMavenCoordinates(coords).head + val id = new ArtifactId(new ModuleId(c.groupId, c.artifactId), "*", "*", "*") + val rule = new DefaultExcludeRule(id, ivySettings.getMatcher("glob"), null) + rule.addConfiguration(ivyConfName) + rule + } + + private def isInvalidQueryString(tokens: Array[String]): Boolean = { + tokens.length != 2 || StringUtils.isBlank(tokens(0)) || StringUtils.isBlank(tokens(1)) + } + + /** + * Parse URI query string's parameter value of `transitive`, `exclude` and `repos`. + * Other invalid parameters will be ignored. + * + * @param uri + * Ivy URI need to be downloaded. + * @return + * Tuple value of parameter `transitive`, `exclude` and `repos` value. + * + * 1. transitive: whether to download dependency jar of Ivy URI, default value is true and + * this parameter value is case-insensitive. This mimics Hive's behaviour for parsing the + * transitive parameter. Invalid value will be treat as false. Example: Input: + * exclude=org.mortbay.jetty:jetty&transitive=true Output: true + * + * 2. exclude: comma separated exclusions to apply when resolving transitive dependencies, + * consists of `group:module` pairs separated by commas. Example: Input: + * excludeorg.mortbay.jetty:jetty,org.eclipse.jetty:jetty-http Output: + * [org.mortbay.jetty:jetty,org.eclipse.jetty:jetty-http] + * + * 3. repos: comma separated repositories to use when resolving dependencies. + */ + def parseQueryParams(uri: URI): (Boolean, String, String) = { + val uriQuery = uri.getQuery + if (uriQuery == null) { + (true, "", "") + } else { + val mapTokens = uriQuery.split("&").map(_.split("=")) + if (mapTokens.exists(MavenUtils.isInvalidQueryString)) { + throw new IllegalArgumentException( + s"Invalid query string in Ivy URI ${uri.toString}: $uriQuery") + } + val groupedParams = mapTokens.map(kv => (kv(0), kv(1))).groupBy(_._1) + + // Parse transitive parameters (e.g., transitive=true) in an Ivy URI, default value is true + val transitiveParams = groupedParams.get("transitive") + if (transitiveParams.map(_.length).getOrElse(0) > 1) { + logWarning( + "It's best to specify `transitive` parameter in ivy URI query only once." + + " If there are multiple `transitive` parameter, we will select the last one") + } + val transitive = + transitiveParams + .flatMap(_.takeRight(1).map(_._2.equalsIgnoreCase("true")).headOption) + .getOrElse(true) + + // Parse an excluded list (e.g., exclude=org.mortbay.jetty:jetty,org.eclipse.jetty:jetty-http) + // in an Ivy URI. When download Ivy URI jar, Spark won't download transitive jar + // in a excluded list. + val exclusionList = groupedParams + .get("exclude") + .map { params => + params + .map(_._2) + .flatMap { excludeString => + val excludes = excludeString.split(",") + if (excludes.map(_.split(":")).exists(MavenUtils.isInvalidQueryString)) { + throw new IllegalArgumentException( + s"Invalid exclude string in Ivy URI ${uri.toString}:" + + " expected 'org:module,org:module,..', found " + excludeString) + } + excludes + } + .mkString(",") + } + .getOrElse("") + + val repos = groupedParams + .get("repos") + .map { params => + params + .map(_._2) + .flatMap(_.split(",")) + .mkString(",") + } + .getOrElse("") + + val validParams = Set("transitive", "exclude", "repos") + val invalidParams = groupedParams.keys.filterNot(validParams.contains).toSeq + if (invalidParams.nonEmpty) { + logWarning( + s"Invalid parameters `${invalidParams.sorted.mkString(",")}` found " + + s"in Ivy URI query `$uriQuery`.") + } + + (transitive, exclusionList, repos) + } + } +} diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index c5e23cae1f847..610a6726dc35c 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -2425,10 +2425,10 @@ package object config { .doc("Path to specify the Ivy user directory, used for the local Ivy cache and " + "package files from spark.jars.packages. " + "This will override the Ivy property ivy.default.ivy.user.dir " + - "which defaults to ~/.ivy2.") + "which defaults to ~/.ivy2.5.2") .version("1.3.0") .stringConf - .createOptional + .createWithDefault("~/.ivy2.5.2") private[spark] val JAR_IVY_SETTING_PATH = ConfigBuilder("spark.jars.ivySettings") diff --git a/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala b/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala index 0dcdba3dfb86b..50a7d9780c800 100644 --- a/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala +++ b/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala @@ -378,7 +378,8 @@ private[deploy] object IvyTestUtils { f(repo.toURI.toString) } finally { // Clean up - if (repo.toString.contains(".m2") || repo.toString.contains(".ivy2")) { + if (repo.toString.contains(".m2") || repo.toString.contains(".ivy2") || + repo.toString.contains(".ivy2.5.2")) { val groupDir = getBaseGroupDirectory(artifact, useIvyLayout) FileUtils.deleteDirectory(new File(repo, groupDir + File.separator + artifact.artifactId)) deps.foreach { _.foreach { dep => diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index 960b9959f0c21..ba208517507bb 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -1,43 +1,44 @@ HikariCP/2.5.1//HikariCP-2.5.1.jar JLargeArrays/1.5//JLargeArrays-1.5.jar JTransforms/3.1//JTransforms-3.1.jar -RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar +RoaringBitmap/0.9.35//RoaringBitmap-0.9.35.jar ST4/4.0.4//ST4-4.0.4.jar activation/1.1.1//activation-1.1.1.jar aircompressor/0.21//aircompressor-0.21.jar algebra_2.12/2.0.1//algebra_2.12-2.0.1.jar annotations/17.0.0//annotations-17.0.0.jar antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar -antlr4-runtime/4.8//antlr4-runtime-4.8.jar +antlr4-runtime/4.9.3//antlr4-runtime-4.9.3.jar aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar aopalliance/1.0//aopalliance-1.0.jar apacheds-i18n/2.0.0-M15//apacheds-i18n-2.0.0-M15.jar apacheds-kerberos-codec/2.0.0-M15//apacheds-kerberos-codec-2.0.0-M15.jar api-asn1-api/1.0.0-M20//api-asn1-api-1.0.0-M20.jar api-util/1.0.0-M20//api-util-1.0.0-M20.jar -arpack/2.2.1//arpack-2.2.1.jar +arpack/3.0.2//arpack-3.0.2.jar arpack_combined_all/0.1//arpack_combined_all-0.1.jar -arrow-format/2.0.0//arrow-format-2.0.0.jar -arrow-memory-core/2.0.0//arrow-memory-core-2.0.0.jar -arrow-memory-netty/2.0.0//arrow-memory-netty-2.0.0.jar -arrow-vector/2.0.0//arrow-vector-2.0.0.jar +arrow-format/10.0.0//arrow-format-10.0.0.jar +arrow-memory-core/10.0.0//arrow-memory-core-10.0.0.jar +arrow-memory-netty/10.0.0//arrow-memory-netty-10.0.0.jar +arrow-vector/10.0.0//arrow-vector-10.0.0.jar audience-annotations/0.5.0//audience-annotations-0.5.0.jar -automaton/1.11-8//automaton-1.11-8.jar -avro-ipc/1.10.2//avro-ipc-1.10.2.jar -avro-mapred/1.10.2//avro-mapred-1.10.2.jar -avro/1.10.2//avro-1.10.2.jar -blas/2.2.1//blas-2.2.1.jar +avro-ipc/1.11.1//avro-ipc-1.11.1.jar +avro-mapred/1.11.1//avro-mapred-1.11.1.jar +avro/1.11.1//avro-1.11.1.jar +azure-storage/2.0.0//azure-storage-2.0.0.jar +blas/3.0.2//blas-3.0.2.jar bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar -breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar -breeze_2.12/1.2//breeze_2.12-1.2.jar +breeze-macros_2.12/2.1.0//breeze-macros_2.12-2.1.0.jar +breeze_2.12/2.1.0//breeze_2.12-2.1.0.jar cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar chill-java/0.10.0//chill-java-0.10.0.jar chill_2.12/0.10.0//chill_2.12-0.10.0.jar commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar -commons-cli/1.2//commons-cli-1.2.jar +commons-cli/1.5.0//commons-cli-1.5.0.jar commons-codec/1.15//commons-codec-1.15.jar commons-collections/3.2.2//commons-collections-3.2.2.jar -commons-compiler/3.0.16//commons-compiler-3.0.16.jar +commons-collections4/4.4//commons-collections4-4.4.jar +commons-compiler/3.1.7//commons-compiler-3.1.7.jar commons-compress/1.21//commons-compress-1.21.jar commons-configuration/1.6//commons-configuration-1.6.jar commons-crypto/1.1.0//commons-crypto-1.1.0.jar @@ -51,8 +52,8 @@ commons-logging/1.1.3//commons-logging-1.1.3.jar commons-math3/3.4.1//commons-math3-3.4.1.jar commons-net/3.1//commons-net-3.1.jar commons-pool/1.5.4//commons-pool-1.5.4.jar -commons-text/1.6//commons-text-1.6.jar -compress-lzf/1.0.3//compress-lzf-1.0.3.jar +commons-text/1.10.0//commons-text-1.10.0.jar +compress-lzf/1.1//compress-lzf-1.1.jar core/1.1.2//core-1.1.2.jar curator-client/2.7.1//curator-client-2.7.1.jar curator-framework/2.7.1//curator-framework-2.7.1.jar @@ -62,7 +63,9 @@ datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar derby/10.14.2.0//derby-10.14.2.0.jar dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar -flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar +flatbuffers-java/1.12.0//flatbuffers-java-1.12.0.jar +gcs-connector/hadoop2-2.2.7/shaded/gcs-connector-hadoop2-2.2.7-shaded.jar +gmetric4j/1.0.10//gmetric4j-1.0.10.jar generex/1.0.2//generex-1.0.2.jar gson/2.2.4//gson-2.2.4.jar guava/14.0.1//guava-14.0.1.jar @@ -91,12 +94,12 @@ hive-jdbc/2.3.9//hive-jdbc-2.3.9.jar hive-llap-common/2.3.9//hive-llap-common-2.3.9.jar hive-metastore/2.3.9//hive-metastore-2.3.9.jar hive-serde/2.3.9//hive-serde-2.3.9.jar -hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar +hive-service-rpc/3.1.3//hive-service-rpc-3.1.3.jar hive-shims-0.23/2.3.9//hive-shims-0.23-2.3.9.jar hive-shims-common/2.3.9//hive-shims-common-2.3.9.jar hive-shims-scheduler/2.3.9//hive-shims-scheduler-2.3.9.jar hive-shims/2.3.9//hive-shims-2.3.9.jar -hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar +hive-storage-api/2.7.3//hive-storage-api-2.7.3.jar hive-vector-code-gen/2.3.9//hive-vector-code-gen-2.3.9.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar @@ -105,16 +108,17 @@ htrace-core/3.1.0-incubating//htrace-core-3.1.0-incubating.jar httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.14//httpcore-4.4.14.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar -ivy/2.5.0//ivy-2.5.0.jar -jackson-annotations/2.12.3//jackson-annotations-2.12.3.jar +ivy/2.5.1//ivy-2.5.1.jar +jackson-annotations/2.14.0//jackson-annotations-2.14.0.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar -jackson-core/2.12.3//jackson-core-2.12.3.jar -jackson-databind/2.12.3//jackson-databind-2.12.3.jar -jackson-dataformat-yaml/2.12.3//jackson-dataformat-yaml-2.12.3.jar -jackson-datatype-jsr310/2.11.2//jackson-datatype-jsr310-2.11.2.jar +jackson-core/2.14.0//jackson-core-2.14.0.jar +jackson-databind/2.14.0//jackson-databind-2.14.0.jar +jackson-dataformat-cbor/2.14.0//jackson-dataformat-cbor-2.14.0.jar +jackson-dataformat-yaml/2.14.0//jackson-dataformat-yaml-2.14.0.jar +jackson-datatype-jsr310/2.14.0//jackson-datatype-jsr310-2.14.0.jar jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar -jackson-module-scala_2.12/2.12.3//jackson-module-scala_2.12-2.12.3.jar +jackson-module-scala_2.12/2.14.0//jackson-module-scala_2.12-2.14.0.jar jackson-xc/1.9.13//jackson-xc-1.9.13.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar @@ -122,14 +126,14 @@ jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar -janino/3.0.16//janino-3.0.16.jar +janino/3.1.7//janino-3.1.7.jar javassist/3.25.0-GA//javassist-3.25.0-GA.jar javax.inject/1//javax.inject-1.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javolution/5.5.1//javolution-5.5.1.jar jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar -jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar +jcl-over-slf4j/2.0.3//jcl-over-slf4j-2.0.3.jar jdo-api/3.0.1//jdo-api-3.0.1.jar jersey-client/2.34//jersey-client-2.34.jar jersey-common/2.34//jersey-common-2.34.jar @@ -139,9 +143,10 @@ jersey-hk2/2.34//jersey-hk2-2.34.jar jersey-server/2.34//jersey-server-2.34.jar jetty-sslengine/6.1.26//jetty-sslengine-6.1.26.jar jetty-util/6.1.26//jetty-util-6.1.26.jar +jetty-util/9.4.49.v20220914//jetty-util-9.4.49.v20220914.jar jetty/6.1.26//jetty-6.1.26.jar jline/2.14.6//jline-2.14.6.jar -joda-time/2.10.10//joda-time-2.10.10.jar +joda-time/2.12.0//joda-time-2.12.0.jar jodd-core/3.5.2//jodd-core-3.5.2.jar jpam/1.1//jpam-1.1.jar json/1.8//json-1.8.jar @@ -152,76 +157,80 @@ json4s-scalap_2.12/3.7.0-M11//json4s-scalap_2.12-3.7.0-M11.jar jsp-api/2.1//jsp-api-2.1.jar jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar -jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar +jul-to-slf4j/2.0.3//jul-to-slf4j-2.0.3.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar -kubernetes-client/5.4.1//kubernetes-client-5.4.1.jar -kubernetes-model-admissionregistration/5.4.1//kubernetes-model-admissionregistration-5.4.1.jar -kubernetes-model-apiextensions/5.4.1//kubernetes-model-apiextensions-5.4.1.jar -kubernetes-model-apps/5.4.1//kubernetes-model-apps-5.4.1.jar -kubernetes-model-autoscaling/5.4.1//kubernetes-model-autoscaling-5.4.1.jar -kubernetes-model-batch/5.4.1//kubernetes-model-batch-5.4.1.jar -kubernetes-model-certificates/5.4.1//kubernetes-model-certificates-5.4.1.jar -kubernetes-model-common/5.4.1//kubernetes-model-common-5.4.1.jar -kubernetes-model-coordination/5.4.1//kubernetes-model-coordination-5.4.1.jar -kubernetes-model-core/5.4.1//kubernetes-model-core-5.4.1.jar -kubernetes-model-discovery/5.4.1//kubernetes-model-discovery-5.4.1.jar -kubernetes-model-events/5.4.1//kubernetes-model-events-5.4.1.jar -kubernetes-model-extensions/5.4.1//kubernetes-model-extensions-5.4.1.jar -kubernetes-model-flowcontrol/5.4.1//kubernetes-model-flowcontrol-5.4.1.jar -kubernetes-model-metrics/5.4.1//kubernetes-model-metrics-5.4.1.jar -kubernetes-model-networking/5.4.1//kubernetes-model-networking-5.4.1.jar -kubernetes-model-node/5.4.1//kubernetes-model-node-5.4.1.jar -kubernetes-model-policy/5.4.1//kubernetes-model-policy-5.4.1.jar -kubernetes-model-rbac/5.4.1//kubernetes-model-rbac-5.4.1.jar -kubernetes-model-scheduling/5.4.1//kubernetes-model-scheduling-5.4.1.jar -kubernetes-model-storageclass/5.4.1//kubernetes-model-storageclass-5.4.1.jar -lapack/2.2.1//lapack-2.2.1.jar +kubernetes-client-api/6.2.0//kubernetes-client-api-6.2.0.jar +kubernetes-client/6.2.0//kubernetes-client-6.2.0.jar +kubernetes-httpclient-okhttp/6.2.0//kubernetes-httpclient-okhttp-6.2.0.jar +kubernetes-model-admissionregistration/6.2.0//kubernetes-model-admissionregistration-6.2.0.jar +kubernetes-model-apiextensions/6.2.0//kubernetes-model-apiextensions-6.2.0.jar +kubernetes-model-apps/6.2.0//kubernetes-model-apps-6.2.0.jar +kubernetes-model-autoscaling/6.2.0//kubernetes-model-autoscaling-6.2.0.jar +kubernetes-model-batch/6.2.0//kubernetes-model-batch-6.2.0.jar +kubernetes-model-certificates/6.2.0//kubernetes-model-certificates-6.2.0.jar +kubernetes-model-common/6.2.0//kubernetes-model-common-6.2.0.jar +kubernetes-model-coordination/6.2.0//kubernetes-model-coordination-6.2.0.jar +kubernetes-model-core/6.2.0//kubernetes-model-core-6.2.0.jar +kubernetes-model-discovery/6.2.0//kubernetes-model-discovery-6.2.0.jar +kubernetes-model-events/6.2.0//kubernetes-model-events-6.2.0.jar +kubernetes-model-extensions/6.2.0//kubernetes-model-extensions-6.2.0.jar +kubernetes-model-flowcontrol/6.2.0//kubernetes-model-flowcontrol-6.2.0.jar +kubernetes-model-gatewayapi/6.2.0//kubernetes-model-gatewayapi-6.2.0.jar +kubernetes-model-metrics/6.2.0//kubernetes-model-metrics-6.2.0.jar +kubernetes-model-networking/6.2.0//kubernetes-model-networking-6.2.0.jar +kubernetes-model-node/6.2.0//kubernetes-model-node-6.2.0.jar +kubernetes-model-policy/6.2.0//kubernetes-model-policy-6.2.0.jar +kubernetes-model-rbac/6.2.0//kubernetes-model-rbac-6.2.0.jar +kubernetes-model-scheduling/6.2.0//kubernetes-model-scheduling-6.2.0.jar +kubernetes-model-storageclass/6.2.0//kubernetes-model-storageclass-6.2.0.jar +lapack/3.0.2//lapack-3.0.2.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar -libthrift/0.14.1//libthrift-0.12.0.jar -log4j/1.2.17//log4j-1.2.17.jar +libthrift/0.12.0//libthrift-0.12.0.jar +log4j-1.2-api/2.19.0//log4j-1.2-api-2.19.0.jar +log4j-api/2.19.0//log4j-api-2.19.0.jar +log4j-core/2.19.0//log4j-core-2.19.0.jar +log4j-slf4j2-impl/2.19.0//log4j-slf4j2-impl-2.19.0.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar -lz4-java/1.7.1//lz4-java-1.7.1.jar -macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar -mesos/1.4.0/shaded-protobuf/mesos-1.4.0-shaded-protobuf.jar -metrics-core/4.2.0//metrics-core-4.2.0.jar -metrics-graphite/4.2.0//metrics-graphite-4.2.0.jar -metrics-jmx/4.2.0//metrics-jmx-4.2.0.jar -metrics-json/4.2.0//metrics-json-4.2.0.jar -metrics-jvm/4.2.0//metrics-jvm-4.2.0.jar +lz4-java/1.8.0//lz4-java-1.8.0.jar +mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar +metrics-core/4.2.12//metrics-core-4.2.12.jar +metrics-graphite/4.2.12//metrics-graphite-4.2.12.jar +metrics-jmx/4.2.12//metrics-jmx-4.2.12.jar +metrics-json/4.2.12//metrics-json-4.2.12.jar +metrics-jvm/4.2.12//metrics-jvm-4.2.12.jar minlog/1.3.0//minlog-1.3.0.jar netty-all/4.1.68.Final//netty-all-4.1.68.Final.jar objenesis/2.6//objenesis-2.6.jar okhttp/3.12.12//okhttp-3.12.12.jar -okio/1.14.0//okio-1.14.0.jar +okio/1.15.0//okio-1.15.0.jar opencsv/2.3//opencsv-2.3.jar -orc-core/1.6.14//orc-core-1.6.14.jar -orc-mapreduce/1.6.14//orc-mapreduce-1.6.14.jar -orc-shims/1.6.14//orc-shims-1.6.14.jar +orc-core/1.8.0/shaded-protobuf/orc-core-1.8.0-shaded-protobuf.jar +orc-mapreduce/1.8.0/shaded-protobuf/orc-mapreduce-1.8.0-shaded-protobuf.jar +orc-shims/1.8.0//orc-shims-1.8.0.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar -parquet-column/1.12.2//parquet-column-1.12.2.jar -parquet-common/1.12.2//parquet-common-1.12.2.jar -parquet-encoding/1.12.2//parquet-encoding-1.12.2.jar -parquet-format-structures/1.12.2//parquet-format-structures-1.12.2.jar -parquet-hadoop/1.12.2//parquet-hadoop-1.12.2.jar -parquet-jackson/1.12.2//parquet-jackson-1.12.2.jar +parquet-column/1.12.3//parquet-column-1.12.3.jar +parquet-common/1.12.3//parquet-common-1.12.3.jar +parquet-encoding/1.12.3//parquet-encoding-1.12.3.jar +parquet-format-structures/1.12.3//parquet-format-structures-1.12.3.jar +parquet-hadoop/1.12.3//parquet-hadoop-1.12.3.jar +parquet-jackson/1.12.3//parquet-jackson-1.12.3.jar +pickle/1.2//pickle-1.2.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar -py4j/0.10.9.5//py4j-0.10.9.5.jar -pyrolite/4.30//pyrolite-4.30.jar -rocksdbjni/6.20.3//rocksdbjni-6.20.3.jar -scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar -scala-compiler/2.12.15//scala-compiler-2.12.15.jar -scala-library/2.12.15//scala-library-2.12.15.jar +py4j/0.10.9.7//py4j-0.10.9.7.jar +remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar +rocksdbjni/7.7.3//rocksdbjni-7.7.3.jar +scala-collection-compat_2.12/2.7.0//scala-collection-compat_2.12-2.7.0.jar +scala-compiler/2.12.17//scala-compiler-2.12.17.jar +scala-library/2.12.17//scala-library-2.12.17.jar scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar -scala-reflect/2.12.15//scala-reflect-2.12.15.jar -scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar -shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar -shims/0.9.0//shims-0.9.0.jar -slf4j-api/1.7.30//slf4j-api-1.7.30.jar -slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar -snakeyaml/1.27//snakeyaml-1.27.jar +scala-reflect/2.12.17//scala-reflect-2.12.17.jar +scala-xml_2.12/2.1.0//scala-xml_2.12-2.1.0.jar +shims/0.9.35//shims-0.9.35.jar +slf4j-api/2.0.3//slf4j-api-2.0.3.jar +snakeyaml/1.33//snakeyaml-1.33.jar snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar @@ -230,12 +239,12 @@ spire_2.12/0.17.0//spire_2.12-0.17.0.jar stax-api/1.0.1//stax-api-1.0.1.jar stream/2.9.6//stream-2.9.6.jar super-csv/2.2.0//super-csv-2.2.0.jar -threeten-extra/1.5.0//threeten-extra-1.5.0.jar -tink/1.6.0//tink-1.6.0.jar +threeten-extra/1.7.1//threeten-extra-1.7.1.jar +tink/1.7.0//tink-1.7.0.jar transaction-api/1.1//transaction-api-1.1.jar univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar velocity/1.5//velocity-1.5.jar -xbean-asm9-shaded/4.20//xbean-asm9-shaded-4.20.jar +xbean-asm9-shaded/4.22//xbean-asm9-shaded-4.22.jar xercesImpl/2.12.2//xercesImpl-2.12.2.jar xml-apis/1.4.01//xml-apis-1.4.01.jar xmlenc/0.52//xmlenc-0.52.jar @@ -243,4 +252,4 @@ xz/1.8//xz-1.8.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar zookeeper-jute/3.6.2//zookeeper-jute-3.6.2.jar zookeeper/3.6.2//zookeeper-3.6.2.jar -zstd-jni/1.5.0-4//zstd-jni-1.5.0-4.jar +zstd-jni/1.5.2-5//zstd-jni-1.5.2-5.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index c76702cd0af01..83d98ba60d653 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -4,7 +4,7 @@ JTransforms/3.1//JTransforms-3.1.jar RoaringBitmap/0.9.45//RoaringBitmap-0.9.45.jar ST4/4.0.4//ST4-4.0.4.jar activation/1.1.1//activation-1.1.1.jar -aircompressor/0.26//aircompressor-0.26.jar +aircompressor/0.25//aircompressor-0.25.jar algebra_2.12/2.0.1//algebra_2.12-2.0.1.jar aliyun-java-sdk-core/4.5.10//aliyun-java-sdk-core-4.5.10.jar aliyun-java-sdk-kms/2.11.0//aliyun-java-sdk-kms-2.11.0.jar @@ -24,7 +24,7 @@ audience-annotations/0.5.0//audience-annotations-0.5.0.jar avro-ipc/1.11.2//avro-ipc-1.11.2.jar avro-mapred/1.11.2//avro-mapred-1.11.2.jar avro/1.11.2//avro-1.11.2.jar -aws-java-sdk-bundle/1.12.262//aws-java-sdk-bundle-1.12.262.jar +aws-java-sdk-bundle/1.12.367//aws-java-sdk-bundle-1.12.367.jar azure-data-lake-store-sdk/2.3.9//azure-data-lake-store-sdk-2.3.9.jar azure-keyvault-core/1.0.0//azure-keyvault-core-1.0.0.jar azure-storage/7.0.1//azure-storage-7.0.1.jar @@ -45,15 +45,15 @@ commons-crypto/1.1.0//commons-crypto-1.1.0.jar commons-dbcp/1.4//commons-dbcp-1.4.jar commons-io/2.13.0//commons-io-2.13.0.jar commons-lang/2.6//commons-lang-2.6.jar -commons-lang3/3.12.0//commons-lang3-3.12.0.jar +commons-lang3/3.13.0//commons-lang3-3.13.0.jar commons-logging/1.1.3//commons-logging-1.1.3.jar commons-math3/3.6.1//commons-math3-3.6.1.jar commons-pool/1.5.4//commons-pool-1.5.4.jar commons-text/1.10.0//commons-text-1.10.0.jar compress-lzf/1.1.2//compress-lzf-1.1.2.jar -curator-client/2.13.0//curator-client-2.13.0.jar -curator-framework/2.13.0//curator-framework-2.13.0.jar -curator-recipes/2.13.0//curator-recipes-2.13.0.jar +curator-client/5.2.0//curator-client-5.2.0.jar +curator-framework/5.2.0//curator-framework-5.2.0.jar +curator-recipes/5.2.0//curator-recipes-5.2.0.jar datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar @@ -62,21 +62,21 @@ datasketches-memory/2.1.0//datasketches-memory-2.1.0.jar derby/10.14.2.0//derby-10.14.2.0.jar dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar flatbuffers-java/1.12.0//flatbuffers-java-1.12.0.jar -gcs-connector/hadoop3-2.2.14/shaded/gcs-connector-hadoop3-2.2.14-shaded.jar +gcs-connector/hadoop3-2.2.17/shaded/gcs-connector-hadoop3-2.2.17-shaded.jar gmetric4j/1.0.10//gmetric4j-1.0.10.jar gson/2.2.4//gson-2.2.4.jar guava/14.0.1//guava-14.0.1.jar -hadoop-aliyun/3.3.4//hadoop-aliyun-3.3.4.jar -hadoop-annotations/3.3.4//hadoop-annotations-3.3.4.jar -hadoop-aws/3.3.4//hadoop-aws-3.3.4.jar -hadoop-azure-datalake/3.3.4//hadoop-azure-datalake-3.3.4.jar -hadoop-azure/3.3.4//hadoop-azure-3.3.4.jar -hadoop-client-api/3.3.4//hadoop-client-api-3.3.4.jar -hadoop-client-runtime/3.3.4//hadoop-client-runtime-3.3.4.jar -hadoop-cloud-storage/3.3.4//hadoop-cloud-storage-3.3.4.jar -hadoop-openstack/3.3.4//hadoop-openstack-3.3.4.jar +hadoop-aliyun/3.3.6//hadoop-aliyun-3.3.6.jar +hadoop-annotations/3.3.6//hadoop-annotations-3.3.6.jar +hadoop-aws/3.3.6//hadoop-aws-3.3.6.jar +hadoop-azure-datalake/3.3.6//hadoop-azure-datalake-3.3.6.jar +hadoop-azure/3.3.6//hadoop-azure-3.3.6.jar +hadoop-client-api/3.3.6//hadoop-client-api-3.3.6.jar +hadoop-client-runtime/3.3.6//hadoop-client-runtime-3.3.6.jar +hadoop-cloud-storage/3.3.6//hadoop-cloud-storage-3.3.6.jar hadoop-shaded-guava/1.1.1//hadoop-shaded-guava-1.1.1.jar -hadoop-yarn-server-web-proxy/3.3.4//hadoop-yarn-server-web-proxy-3.3.4.jar +hadoop-openstack/3.3.6//hadoop-openstack-3.3.6.jar +hadoop-yarn-server-web-proxy/3.3.6//hadoop-yarn-server-web-proxy-3.3.6.jar hive-beeline/2.3.9//hive-beeline-2.3.9.jar hive-cli/2.3.9//hive-cli-2.3.9.jar hive-common/2.3.9//hive-common-2.3.9.jar @@ -98,8 +98,8 @@ httpclient/4.5.14//httpclient-4.5.14.jar httpcore/4.4.16//httpcore-4.4.16.jar ini4j/0.5.4//ini4j-0.5.4.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar -ivy/2.5.1//ivy-2.5.1.jar -jackson-annotations/2.15.2//jackson-annotations-2.15.2.jar +ivy/2.5.2//ivy-2.5.2.jar +jackson-annotations/2.16.1//jackson-annotations-2.16.1.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.15.2//jackson-core-2.15.2.jar jackson-databind/2.15.2//jackson-databind-2.15.2.jar @@ -118,7 +118,6 @@ janino/3.1.9//janino-3.1.9.jar javassist/3.29.2-GA//javassist-3.29.2-GA.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javolution/5.5.1//javolution-5.5.1.jar -jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar jcl-over-slf4j/2.0.7//jcl-over-slf4j-2.0.7.jar jdo-api/3.0.1//jdo-api-3.0.1.jar @@ -129,9 +128,9 @@ jersey-container-servlet-core/2.40//jersey-container-servlet-core-2.40.jar jersey-container-servlet/2.40//jersey-container-servlet-2.40.jar jersey-hk2/2.40//jersey-hk2-2.40.jar jersey-server/2.40//jersey-server-2.40.jar -jettison/1.1//jettison-1.1.jar -jetty-util-ajax/9.4.52.v20230823//jetty-util-ajax-9.4.52.v20230823.jar -jetty-util/9.4.52.v20230823//jetty-util-9.4.52.v20230823.jar +jettison/1.5.4//jettison-1.5.4.jar +jetty-util-ajax/9.4.51.v20230217//jetty-util-ajax-9.4.51.v20230217.jar +jetty-util/9.4.51.v20230217//jetty-util-9.4.51.v20230217.jar jline/2.14.6//jline-2.14.6.jar joda-time/2.12.5//joda-time-2.12.5.jar jodd-core/3.5.2//jodd-core-3.5.2.jar @@ -145,31 +144,31 @@ jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar jul-to-slf4j/2.0.7//jul-to-slf4j-2.0.7.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar -kubernetes-client-api/6.7.2//kubernetes-client-api-6.7.2.jar -kubernetes-client/6.7.2//kubernetes-client-6.7.2.jar -kubernetes-httpclient-okhttp/6.7.2//kubernetes-httpclient-okhttp-6.7.2.jar -kubernetes-model-admissionregistration/6.7.2//kubernetes-model-admissionregistration-6.7.2.jar -kubernetes-model-apiextensions/6.7.2//kubernetes-model-apiextensions-6.7.2.jar -kubernetes-model-apps/6.7.2//kubernetes-model-apps-6.7.2.jar -kubernetes-model-autoscaling/6.7.2//kubernetes-model-autoscaling-6.7.2.jar -kubernetes-model-batch/6.7.2//kubernetes-model-batch-6.7.2.jar -kubernetes-model-certificates/6.7.2//kubernetes-model-certificates-6.7.2.jar -kubernetes-model-common/6.7.2//kubernetes-model-common-6.7.2.jar -kubernetes-model-coordination/6.7.2//kubernetes-model-coordination-6.7.2.jar -kubernetes-model-core/6.7.2//kubernetes-model-core-6.7.2.jar -kubernetes-model-discovery/6.7.2//kubernetes-model-discovery-6.7.2.jar -kubernetes-model-events/6.7.2//kubernetes-model-events-6.7.2.jar -kubernetes-model-extensions/6.7.2//kubernetes-model-extensions-6.7.2.jar -kubernetes-model-flowcontrol/6.7.2//kubernetes-model-flowcontrol-6.7.2.jar -kubernetes-model-gatewayapi/6.7.2//kubernetes-model-gatewayapi-6.7.2.jar -kubernetes-model-metrics/6.7.2//kubernetes-model-metrics-6.7.2.jar -kubernetes-model-networking/6.7.2//kubernetes-model-networking-6.7.2.jar -kubernetes-model-node/6.7.2//kubernetes-model-node-6.7.2.jar -kubernetes-model-policy/6.7.2//kubernetes-model-policy-6.7.2.jar -kubernetes-model-rbac/6.7.2//kubernetes-model-rbac-6.7.2.jar -kubernetes-model-resource/6.7.2//kubernetes-model-resource-6.7.2.jar -kubernetes-model-scheduling/6.7.2//kubernetes-model-scheduling-6.7.2.jar -kubernetes-model-storageclass/6.7.2//kubernetes-model-storageclass-6.7.2.jar +kubernetes-client-api/6.8.1//kubernetes-client-api-6.8.1.jar +kubernetes-client/6.8.1//kubernetes-client-6.8.1.jar +kubernetes-httpclient-okhttp/6.8.1//kubernetes-httpclient-okhttp-6.8.1.jar +kubernetes-model-admissionregistration/6.8.1//kubernetes-model-admissionregistration-6.8.1.jar +kubernetes-model-apiextensions/6.8.1//kubernetes-model-apiextensions-6.8.1.jar +kubernetes-model-apps/6.8.1//kubernetes-model-apps-6.8.1.jar +kubernetes-model-autoscaling/6.8.1//kubernetes-model-autoscaling-6.8.1.jar +kubernetes-model-batch/6.8.1//kubernetes-model-batch-6.8.1.jar +kubernetes-model-certificates/6.8.1//kubernetes-model-certificates-6.8.1.jar +kubernetes-model-common/6.8.1//kubernetes-model-common-6.8.1.jar +kubernetes-model-coordination/6.8.1//kubernetes-model-coordination-6.8.1.jar +kubernetes-model-core/6.8.1//kubernetes-model-core-6.8.1.jar +kubernetes-model-discovery/6.8.1//kubernetes-model-discovery-6.8.1.jar +kubernetes-model-events/6.8.1//kubernetes-model-events-6.8.1.jar +kubernetes-model-extensions/6.8.1//kubernetes-model-extensions-6.8.1.jar +kubernetes-model-flowcontrol/6.8.1//kubernetes-model-flowcontrol-6.8.1.jar +kubernetes-model-gatewayapi/6.8.1//kubernetes-model-gatewayapi-6.8.1.jar +kubernetes-model-metrics/6.8.1//kubernetes-model-metrics-6.8.1.jar +kubernetes-model-networking/6.8.1//kubernetes-model-networking-6.8.1.jar +kubernetes-model-node/6.8.1//kubernetes-model-node-6.8.1.jar +kubernetes-model-policy/6.8.1//kubernetes-model-policy-6.8.1.jar +kubernetes-model-rbac/6.8.1//kubernetes-model-rbac-6.8.1.jar +kubernetes-model-resource/6.8.1//kubernetes-model-resource-6.8.1.jar +kubernetes-model-scheduling/6.8.1//kubernetes-model-scheduling-6.8.1.jar +kubernetes-model-storageclass/6.8.1//kubernetes-model-storageclass-6.8.1.jar lapack/3.0.3//lapack-3.0.3.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar @@ -212,9 +211,9 @@ opencsv/2.3//opencsv-2.3.jar opentracing-api/0.33.0//opentracing-api-0.33.0.jar opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar opentracing-util/0.33.0//opentracing-util-0.33.0.jar -orc-core/1.9.2/shaded-protobuf/orc-core-1.9.2-shaded-protobuf.jar -orc-mapreduce/1.9.2/shaded-protobuf/orc-mapreduce-1.9.2-shaded-protobuf.jar -orc-shims/1.9.2//orc-shims-1.9.2.jar +orc-core/1.9.1/shaded-protobuf/orc-core-1.9.1-shaded-protobuf.jar +orc-mapreduce/1.9.1/shaded-protobuf/orc-mapreduce-1.9.1-shaded-protobuf.jar +orc-shims/1.9.1//orc-shims-1.9.1.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar @@ -233,7 +232,7 @@ scala-compiler/2.12.18//scala-compiler-2.12.18.jar scala-library/2.12.18//scala-library-2.12.18.jar scala-parser-combinators_2.12/2.3.0//scala-parser-combinators_2.12-2.3.0.jar scala-reflect/2.12.18//scala-reflect-2.12.18.jar -scala-xml_2.12/2.1.0//scala-xml_2.12-2.1.0.jar +scala-xml_2.12/2.2.0//scala-xml_2.12-2.2.0.jar shims/0.9.45//shims-0.9.45.jar slf4j-api/2.0.7//slf4j-api-2.0.7.jar snakeyaml-engine/2.6//snakeyaml-engine-2.6.jar @@ -249,11 +248,13 @@ super-csv/2.2.0//super-csv-2.2.0.jar threeten-extra/1.7.1//threeten-extra-1.7.1.jar tink/1.9.0//tink-1.9.0.jar transaction-api/1.1//transaction-api-1.1.jar +txw2/3.0.2//txw2-3.0.2.jar univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar -wildfly-openssl/1.0.7.Final//wildfly-openssl-1.0.7.Final.jar +wildfly-openssl/1.1.3.Final//wildfly-openssl-1.1.3.Final.jar xbean-asm9-shaded/4.23//xbean-asm9-shaded-4.23.jar +xmlschema-core/2.3.0//xmlschema-core-2.3.0.jar xz/1.9//xz-1.9.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar zookeeper-jute/3.6.3//zookeeper-jute-3.6.3.jar zookeeper/3.6.3//zookeeper-3.6.3.jar -zstd-jni/1.5.5-4//zstd-jni-1.5.5-4.jar +zstd-jni/1.5.5-5//zstd-jni-1.5.5-5.jar diff --git a/dev/run-tests.py b/dev/run-tests.py index 9bf3095edb71f..688e399e7ff88 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -472,6 +472,8 @@ def main(): rm_r(os.path.join(SPARK_HOME, "work")) rm_r(os.path.join(USER_HOME, ".ivy2", "local", "org.apache.spark")) rm_r(os.path.join(USER_HOME, ".ivy2", "cache", "org.apache.spark")) + rm_r(os.path.join(USER_HOME, ".ivy2.5.2", "local", "org.apache.spark")) + rm_r(os.path.join(USER_HOME, ".ivy2.5.2", "cache", "org.apache.spark")) os.environ["CURRENT_BLOCK"] = str(ERROR_CODES["BLOCK_GENERAL"]) diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md index 36465cc3f4e86..3adfbeca8fd96 100644 --- a/docs/core-migration-guide.md +++ b/docs/core-migration-guide.md @@ -22,6 +22,22 @@ license: | * Table of contents {:toc} +## Upgrading from Core 3.5 to 4.0 + +- Since Spark 4.0, Spark will roll event logs to archive them incrementally. To restore the behavior before Spark 4.0, you can set `spark.eventLog.rolling.enabled` to `false`. + +- Since Spark 4.0, Spark will compress event logs. To restore the behavior before Spark 4.0, you can set `spark.eventLog.compress` to `false`. + +- Since Spark 4.0, Spark workers will clean up worker and stopped application directories periodically. To restore the behavior before Spark 4.0, you can set `spark.worker.cleanup.enabled` to `false`. + +- Since Spark 4.0, `spark.shuffle.service.db.backend` is set to `ROCKSDB` by default which means Spark will use RocksDB store for shuffle service. To restore the behavior before Spark 4.0, you can set `spark.shuffle.service.db.backend` to `LEVELDB`. + +- In Spark 4.0, support for Apache Mesos as a resource manager was removed. + +- Since Spark 4.0, Spark uses `ReadWriteOncePod` instead of `ReadWriteOnce` access mode in persistence volume claims. To restore the legacy behavior, you can set `spark.kubernetes.legacy.useReadWriteOnceAccessMode` to `true`. + +- Since Spark 4.0, Spark uses `~/.ivy2.5.2` as Ivy user directory by default to isolate the existing systems from Apache Ivy's incompatibility. To restore the legacy behavior, you can set `spark.jars.ivy` to `~/.ivy2`. + ## Upgrading from Core 3.4 to 3.5 - Since Spark 3.5, `spark.yarn.executor.failuresValidityInterval` is deprecated. Use `spark.executor.failuresValidityInterval` instead.