From 1c6383081a4df935c70516333449513a46926b8c Mon Sep 17 00:00:00 2001 From: Prabhjyot Date: Thu, 2 May 2024 13:48:22 -0400 Subject: [PATCH] ODP-1095 Critical CVE fixes patch (#6) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ODP-1304 [SPARK-44914][BUILD] Upgrade Apache Ivy to 2.5.2 This PR aims to upgrade Apache Ivy to 2.5.2 and protect old Ivy-based systems like old Spark from Apache Ivy 2.5.2's incompatibility by introducing a new `.ivy2.5.2` directory. - Apache Spark 4.0.0 will create this once and reuse this directory while all the other systems like old Sparks uses the old one, `.ivy2`. So, the behavior is the same with the case where Apache Spark 4.0.0 is installed and used in a new machine. - For the environments with `User-provided Ivy-path`es, the user might hit the incompatibility still. However, the users can mitigate them because they already have full control on `Ivy-path`es. This was tried once and reverted logically due to Java 11 and Java 17 failures in Daily CIs. - #42613 - #42668 Currently, PR Builder also fails as of now. If the PR passes CIes, we can achieve the following. - [Release notes](https://lists.apache.org/thread/9gcz4xrsn8c7o9gb377xfzvkb8jltffr) - FIX: CVE-2022-46751: Apache Ivy Is Vulnerable to XML External Entity Injections No. Pass the CIs including `HiveExternalCatalogVersionsSuite`. No. Closes #45075 from dongjoon-hyun/SPARK-44914. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 3baa60afe25c821ced1e956502f7c77b719f73dd) [SPARK-44968][BUILD] Downgrade ivy from 2.5.2 to 2.5.1 ### What changes were proposed in this pull request? After upgrading Ivy from 2.5.1 to 2.5.2 in SPARK-44914, daily tests for Java 11 and Java 17 began to experience ABORTED in the `HiveExternalCatalogVersionsSuite` test. Java 11 - https://github.com/apache/spark/actions/runs/5953716283/job/16148657660 - https://github.com/apache/spark/actions/runs/5966131923/job/16185159550 Java 17 - https://github.com/apache/spark/actions/runs/5956925790/job/16158714165 - https://github.com/apache/spark/actions/runs/5969348559/job/16195073478 ``` 2023-08-23T23:00:49.6547573Z [info] 2023-08-23 16:00:48.209 - stdout> : java.lang.RuntimeException: problem during retrieve of org.apache.spark#spark-submit-parent-4c061f04-b951-4d06-8909-cde5452988d9: java.lang.RuntimeException: Multiple artifacts of the module log4j#log4j;1.2.17 are retrieved to the same file! Update the retrieve pattern to fix this error. 2023-08-23T23:00:49.6548745Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.ivy.core.retrieve.RetrieveEngine.retrieve(RetrieveEngine.java:238) 2023-08-23T23:00:49.6549572Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.ivy.core.retrieve.RetrieveEngine.retrieve(RetrieveEngine.java:89) 2023-08-23T23:00:49.6550334Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.ivy.Ivy.retrieve(Ivy.java:551) 2023-08-23T23:00:49.6551079Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.deploy.SparkSubmitUtils$.resolveMavenCoordinates(SparkSubmit.scala:1464) 2023-08-23T23:00:49.6552024Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.hive.client.IsolatedClientLoader$.$anonfun$downloadVersion$2(IsolatedClientLoader.scala:138) 2023-08-23T23:00:49.6552884Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.catalyst.util.package$.quietly(package.scala:42) 2023-08-23T23:00:49.6553755Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.hive.client.IsolatedClientLoader$.downloadVersion(IsolatedClientLoader.scala:138) 2023-08-23T23:00:49.6554705Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.hive.client.IsolatedClientLoader$.liftedTree1$1(IsolatedClientLoader.scala:65) 2023-08-23T23:00:49.6555637Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.hive.client.IsolatedClientLoader$.forVersion(IsolatedClientLoader.scala:64) 2023-08-23T23:00:49.6556554Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:443) 2023-08-23T23:00:49.6557340Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:356) 2023-08-23T23:00:49.6558187Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:71) 2023-08-23T23:00:49.6559061Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:70) 2023-08-23T23:00:49.6559962Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$databaseExists$1(HiveExternalCatalog.scala:224) 2023-08-23T23:00:49.6560766Z [info] 2023-08-23 16:00:48.209 - stdout> at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23) 2023-08-23T23:00:49.6561584Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:102) 2023-08-23T23:00:49.6562510Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:224) 2023-08-23T23:00:49.6563435Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:150) 2023-08-23T23:00:49.6564323Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:140) 2023-08-23T23:00:49.6565340Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.hive.HiveSessionStateBuilder.externalCatalog(HiveSessionStateBuilder.scala:45) 2023-08-23T23:00:49.6566321Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.hive.HiveSessionStateBuilder.$anonfun$catalog$1(HiveSessionStateBuilder.scala:60) 2023-08-23T23:00:49.6567363Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.catalyst.catalog.SessionCatalog.externalCatalog$lzycompute(SessionCatalog.scala:118) 2023-08-23T23:00:49.6568372Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.catalyst.catalog.SessionCatalog.externalCatalog(SessionCatalog.scala:118) 2023-08-23T23:00:49.6569393Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.catalyst.catalog.SessionCatalog.tableExists(SessionCatalog.scala:490) 2023-08-23T23:00:49.6570685Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.execution.command.CreateDataSourceTableAsSelectCommand.run(createDataSourceTables.scala:155) 2023-08-23T23:00:49.6571842Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113) 2023-08-23T23:00:49.6572932Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111) 2023-08-23T23:00:49.6573996Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125) 2023-08-23T23:00:49.6575045Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:97) 2023-08-23T23:00:49.6576066Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103) 2023-08-23T23:00:49.6576937Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163) 2023-08-23T23:00:49.6577807Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90) 2023-08-23T23:00:49.6578620Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775) 2023-08-23T23:00:49.6579432Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) 2023-08-23T23:00:49.6580357Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:97) 2023-08-23T23:00:49.6581331Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:93) 2023-08-23T23:00:49.6582239Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481) 2023-08-23T23:00:49.6583101Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82) 2023-08-23T23:00:49.6584088Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481) 2023-08-23T23:00:49.6585236Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30) 2023-08-23T23:00:49.6586519Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267) 2023-08-23T23:00:49.6587686Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263) 2023-08-23T23:00:49.6588898Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30) 2023-08-23T23:00:49.6590014Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30) 2023-08-23T23:00:49.6590993Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457) 2023-08-23T23:00:49.6591930Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:93) 2023-08-23T23:00:49.6592914Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:80) 2023-08-23T23:00:49.6593856Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:78) 2023-08-23T23:00:49.6594687Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.Dataset.(Dataset.scala:219) 2023-08-23T23:00:49.6595379Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99) 2023-08-23T23:00:49.6596103Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775) 2023-08-23T23:00:49.6596807Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96) 2023-08-23T23:00:49.6597520Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:618) 2023-08-23T23:00:49.6598276Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775) 2023-08-23T23:00:49.6599022Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:613) 2023-08-23T23:00:49.6599819Z [info] 2023-08-23 16:00:48.209 - stdout> at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 2023-08-23T23:00:49.6600723Z [info] 2023-08-23 16:00:48.209 - stdout> at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77) 2023-08-23T23:00:49.6601707Z [info] 2023-08-23 16:00:48.209 - stdout> at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 2023-08-23T23:00:49.6602513Z [info] 2023-08-23 16:00:48.209 - stdout> at java.base/java.lang.reflect.Method.invoke(Method.java:568) 2023-08-23T23:00:49.6603272Z [info] 2023-08-23 16:00:48.209 - stdout> at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) 2023-08-23T23:00:49.6604007Z [info] 2023-08-23 16:00:48.209 - stdout> at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) 2023-08-23T23:00:49.6604724Z [info] 2023-08-23 16:00:48.209 - stdout> at py4j.Gateway.invoke(Gateway.java:282) 2023-08-23T23:00:49.6605416Z [info] 2023-08-23 16:00:48.209 - stdout> at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) 2023-08-23T23:00:49.6606209Z [info] 2023-08-23 16:00:48.209 - stdout> at py4j.commands.CallCommand.execute(CallCommand.java:79) 2023-08-23T23:00:49.6606969Z [info] 2023-08-23 16:00:48.209 - stdout> at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182) 2023-08-23T23:00:49.6607743Z [info] 2023-08-23 16:00:48.209 - stdout> at py4j.ClientServerConnection.run(ClientServerConnection.java:106) 2023-08-23T23:00:49.6608415Z [info] 2023-08-23 16:00:48.209 - stdout> at java.base/java.lang.Thread.run(Thread.java:833) 2023-08-23T23:00:49.6609288Z [info] 2023-08-23 16:00:48.209 - stdout> Caused by: java.lang.RuntimeException: Multiple artifacts of the module log4j#log4j;1.2.17 are retrieved to the same file! Update the retrieve pattern to fix this error. 2023-08-23T23:00:49.6610288Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.ivy.core.retrieve.RetrieveEngine.determineArtifactsToCopy(RetrieveEngine.java:426) 2023-08-23T23:00:49.6611332Z [info] 2023-08-23 16:00:48.209 - stdout> at org.apache.ivy.core.retrieve.RetrieveEngine.retrieve(RetrieveEngine.java:122) 2023-08-23T23:00:49.6612046Z [info] 2023-08-23 16:00:48.209 - stdout> ... 66 more 2023-08-23T23:00:49.6612498Z [info] 2023-08-23 16:00:48.209 - stdout> ``` So this pr downgrade ivy from 2.5.2 to 2.5.1 to restore Java 11/17 daily tests. ### Why are the changes needed? To restore Java 11/17 daily tests. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By changing the default Java version in `build_and_test.yml` to 17 for verification, the tests succeed after downgrading the Ivy to 2.5.1. - https://github.com/LuciferYang/spark/actions/runs/5972232677/job/16209970934 image ### Was this patch authored or co-authored using generative AI tooling? No Closes #42668 from LuciferYang/test-java17. Authored-by: yangjie01 Signed-off-by: yangjie01 (cherry picked from commit 4f8a1991e793bba2a6620760b6ee2cdc8f3ff21d) [SPARK-44914][BUILD] Upgrade `Apache ivy` from 2.5.1 to 2.5.2 Upgrade Apache ivy from 2.5.1 to 2.5.2 [Release notes](https://lists.apache.org/thread/9gcz4xrsn8c7o9gb377xfzvkb8jltffr) [CVE-2022-46751](https://www.cve.org/CVERecord?id=CVE-2022-46751) The fix https://github.com/apache/ant-ivy/commit/2be17bc18b0e1d4123007d579e43ba1a4b6fab3d No. Pass GA No. Closes #42613 from bjornjorgensen/ivy-2.5.2. Authored-by: Bjørn Jørgensen Signed-off-by: yangjie01 (cherry picked from commit 611e17e89260cd8d2b12edfc060f31a73773fa02) [SPARK-41030][BUILD] Upgrade `Apache Ivy` to 2.5.1 Upgrade `Apache Ivy` from 2.5.0 to 2.5.1 [Release notes](https://ant.apache.org/ivy/history/2.5.1/release-notes.html) [CVE-2022-37865](https://www.cve.org/CVERecord?id=CVE-2022-37865) and [CVE-2022-37866](https://nvd.nist.gov/vuln/detail/CVE-2022-37866) No. Pass GA Closes #38539 from bjornjorgensen/ivy-2.5.1. Authored-by: Bjørn Signed-off-by: Dongjoon Hyun (cherry picked from commit 4bbdca600494c625f41c1f4c03fa85d97fe42db4) (cherry picked from commit 0e5fa79b6161e2c9d54c2ee66a89bfb614d496fe) # Conflicts: # dev/deps/spark-deps-hadoop-2-hive-2.3 # dev/deps/spark-deps-hadoop-3-hive-2.3 # docs/core-migration-guide.md # pom.xml * ODP-1303 [SPARK-45732][BUILD] Upgrade commons-text to 1.11.0 The pr aims to upgrade `commons-text` from `1.10.0` to `1.11.0`. Release note: https://commons.apache.org/proper/commons-text/changes-report.html#a1.11.0 includes some bug fix, eg: - Fix StringTokenizer.getTokenList to return an independent modifiable list. Fixes [TEXT-219](https://issues.apache.org/jira/browse/TEXT-219). - Fix TextStringBuilder to over-allocate when ensuring capacity #452. Fixes [TEXT-228](https://issues.apache.org/jira/browse/TEXT-228). - TextStringBuidler#hashCode() allocates a String on each call #387. No. Pass GA. No. Closes #43590 from panbingkun/SPARK-45732. Authored-by: panbingkun Signed-off-by: Hyukjin Kwon (cherry picked from commit d38f07459494a99177b0436c1b4a784a8af8cbab) [SPARK-40801][BUILD] Upgrade `Apache commons-text` to 1.10 Upgrade Apache commons-text from 1.9 to 1.10.0 [CVE-2022-42889](https://nvd.nist.gov/vuln/detail/CVE-2022-42889) No. Pass github action Closes #38262 from bjornjorgensen/commons-text-1.10. Authored-by: Bjørn Signed-off-by: Yuming Wang (cherry picked from commit 99abc94039e3c069d0fc8b8e7025522fea124cbb) [SPARK-38231][BUILD] Upgrade commons-text to 1.9 This PR aims to upgrade commons-text to 1.9. 1.9 is the latest and popular than 1.6. - https://commons.apache.org/proper/commons-text/changes-report.html#a1.9 - https://mvnrepository.com/artifact/org.apache.commons/commons-text No Pass GA Closes #35542 from LuciferYang/upgrade-common-text. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun (cherry picked from commit 70f5bfd665b449fb3d7223c81fbd5a53d7985b9d) (cherry picked from commit 5cb61e758a955ca8c6e28bbdc95468facdf2b20a) # Conflicts: # pom.xml * ODP-1302 [SPARK-43225][BUILD][SQL] Remove jackson-core-asl and jackson-mapper-asl from pre-built distribution - Remove `jackson-core-asl` from maven dependency. - Change the scope of `jackson-mapper-asl` from compile to test. - Replace all `Hive.get(conf)` with `Hive.getWithoutRegisterFns(conf)`. To fix CVE issue: https://github.com/apache/spark/security/dependabot/50. No. manual test. Closes #40893 from wangyum/SPARK-43225. Lead-authored-by: Yuming Wang Co-authored-by: Yuming Wang Signed-off-by: Sean Owen (cherry picked from commit 9c237d7bc7ba201c8c7e728f3488dab83995d9cb) [SPARK-43868][SQL][TESTS] Remove `originalUDFs` from `TestHive` to ensure `ObjectHashAggregateExecBenchmark` can run successfully on Github Action This pr remove `originalUDFs` from `TestHive` to ensure `ObjectHashAggregateExecBenchmark` can run successfully on Github Action. After SPARK-43225, `org.codehaus.jackson:jackson-mapper-asl` becomes a test scope dependency, so when using GA to run benchmark, it is not in the classpath because GA uses https://github.com/apache/spark/blob/d61c77cac17029ee27319e6b766b48d314a4dd31/.github/workflows/benchmark.yml#L179-L183 iunstead of the sbt `Test/runMain`. `ObjectHashAggregateExecBenchmark` used `TestHive`, and `TestHive` will always call `org.apache.hadoop.hive.ql.exec.FunctionRegistry#getFunctionNames` to init `originalUDFs` before this pr, so when we run `ObjectHashAggregateExecBenchmark` on GitHub Actions, there will be the following exceptions: (cherry picked from commit 1c10e28caa54f32aa44a5406046894b8fcb335a4) # Conflicts: # pom.xml --------- Co-authored-by: Dongjoon Hyun Co-authored-by: Yuming Wang --- .../types/UTF8StringPropertyCheckSuite.scala | 8 - .../org/apache/spark/util/MavenUtils.scala | 658 ++++++++++++++++++ core/pom.xml | 8 - .../spark/internal/config/package.scala | 4 +- .../apache/spark/deploy/IvyTestUtils.scala | 3 +- dev/deps/spark-deps-hadoop-2-hive-2.3 | 200 +++--- dev/deps/spark-deps-hadoop-3-hive-2.3 | 248 +++---- dev/run-tests.py | 2 + docs/core-migration-guide.md | 34 +- pom.xml | 31 +- resource-managers/yarn/pom.xml | 10 + sql/core/pom.xml | 10 - .../apache/hive/service/cli/CLIService.java | 2 +- .../service/cli/session/HiveSessionImpl.java | 4 +- .../cli/session/HiveSessionImplwithUGI.java | 4 +- sql/hive/pom.xml | 4 - .../apache/spark/sql/hive/test/TestHive.scala | 8 - 17 files changed, 940 insertions(+), 298 deletions(-) create mode 100644 common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala index ab488e18ba3f4..9693a037d50c3 100644 --- a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala +++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala @@ -17,7 +17,6 @@ package org.apache.spark.unsafe.types -import org.apache.commons.text.similarity.LevenshteinDistance import org.scalacheck.{Arbitrary, Gen} import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks // scalastyle:off @@ -230,13 +229,6 @@ class UTF8StringPropertyCheckSuite extends AnyFunSuite with ScalaCheckDrivenProp } } - test("levenshteinDistance") { - forAll { (one: String, another: String) => - assert(toUTF8(one).levenshteinDistance(toUTF8(another)) === - LevenshteinDistance.getDefaultInstance.apply(one, another)) - } - } - test("hashCode") { forAll { (s: String) => assert(toUTF8(s).hashCode() === toUTF8(s).hashCode()) diff --git a/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala new file mode 100644 index 0000000000000..08291859a32cc --- /dev/null +++ b/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala @@ -0,0 +1,658 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util + +import java.io.{File, IOException, PrintStream} +import java.net.URI +import java.text.ParseException +import java.util.UUID + +import org.apache.commons.lang3.StringUtils +import org.apache.ivy.Ivy +import org.apache.ivy.core.LogOptions +import org.apache.ivy.core.module.descriptor.{Artifact, DefaultDependencyDescriptor, DefaultExcludeRule, DefaultModuleDescriptor, ExcludeRule} +import org.apache.ivy.core.module.id.{ArtifactId, ModuleId, ModuleRevisionId} +import org.apache.ivy.core.report.{DownloadStatus, ResolveReport} +import org.apache.ivy.core.resolve.ResolveOptions +import org.apache.ivy.core.retrieve.RetrieveOptions +import org.apache.ivy.core.settings.IvySettings +import org.apache.ivy.plugins.matcher.GlobPatternMatcher +import org.apache.ivy.plugins.repository.file.FileRepository +import org.apache.ivy.plugins.resolver.{ChainResolver, FileSystemResolver, IBiblioResolver} + +import org.apache.spark.SparkException +import org.apache.spark.internal.Logging +import org.apache.spark.util.ArrayImplicits._ + +/** Provides utility functions to be used inside SparkSubmit. */ +private[spark] object MavenUtils extends Logging { + val JAR_IVY_SETTING_PATH_KEY: String = "spark.jars.ivySettings" + + // Exposed for testing + // var printStream = SparkSubmit.printStream + + // Exposed for testing. + // These components are used to make the default exclusion rules for Spark dependencies. + // We need to specify each component explicitly, otherwise we miss + // spark-streaming utility components. Underscore is there to differentiate between + // spark-streaming_2.1x and spark-streaming-kafka-0-10-assembly_2.1x + val IVY_DEFAULT_EXCLUDES: Seq[String] = Seq( + "catalyst_", + "core_", + "graphx_", + "kvstore_", + "launcher_", + "mllib_", + "mllib-local_", + "network-common_", + "network-shuffle_", + "repl_", + "sketch_", + "sql_", + "streaming_", + "tags_", + "unsafe_") + + /** + * Represents a Maven Coordinate + * + * @param groupId + * the groupId of the coordinate + * @param artifactId + * the artifactId of the coordinate + * @param version + * the version of the coordinate + */ + private[spark] case class MavenCoordinate( + groupId: String, + artifactId: String, + version: String) { + override def toString: String = s"$groupId:$artifactId:$version" + } + + /** + * Extracts maven coordinates from a comma-delimited string. Coordinates should be provided in + * the format `groupId:artifactId:version` or `groupId/artifactId:version`. + * + * @param coordinates + * Comma-delimited string of maven coordinates + * @return + * Sequence of Maven coordinates + */ + def extractMavenCoordinates(coordinates: String): Seq[MavenCoordinate] = { + coordinates.split(",").map { p => + val splits = p.replace("/", ":").split(":") + require( + splits.length == 3, + s"Provided Maven Coordinates must be in the form " + + s"'groupId:artifactId:version'. The coordinate provided is: $p") + require( + splits(0) != null && splits(0).trim.nonEmpty, + s"The groupId cannot be null or " + + s"be whitespace. The groupId provided is: ${splits(0)}") + require( + splits(1) != null && splits(1).trim.nonEmpty, + s"The artifactId cannot be null or " + + s"be whitespace. The artifactId provided is: ${splits(1)}") + require( + splits(2) != null && splits(2).trim.nonEmpty, + s"The version cannot be null or " + + s"be whitespace. The version provided is: ${splits(2)}") + MavenCoordinate(splits(0), splits(1), splits(2)) + }.toImmutableArraySeq + } + + /** Path of the local Maven cache. */ + private[util] def m2Path: File = { + if (SparkEnvUtils.isTesting) { + // test builds delete the maven cache, and this can cause flakiness + new File("dummy", ".m2" + File.separator + "repository") + } else { + new File(System.getProperty("user.home"), ".m2" + File.separator + "repository") + } + } + + /** + * Create a ChainResolver used by Ivy to search for and resolve dependencies. + * + * @param defaultIvyUserDir + * The default user path for Ivy + * @param useLocalM2AsCache + * Whether to use the local maven repo as a cache + * @return + * A ChainResolver used by Ivy to search for and resolve dependencies. + */ + private[util] def createRepoResolvers( + defaultIvyUserDir: File, + useLocalM2AsCache: Boolean = true): ChainResolver = { + // We need a chain resolver if we want to check multiple repositories + val cr = new ChainResolver + cr.setName("spark-list") + + if (useLocalM2AsCache) { + val localM2 = new IBiblioResolver + localM2.setM2compatible(true) + localM2.setRoot(m2Path.toURI.toString) + localM2.setUsepoms(true) + localM2.setName("local-m2-cache") + cr.add(localM2) + } + + val localIvy = new FileSystemResolver + val localIvyRoot = new File(defaultIvyUserDir, "local") + localIvy.setLocal(true) + localIvy.setRepository(new FileRepository(localIvyRoot)) + val ivyPattern = Seq( + localIvyRoot.getAbsolutePath, + "[organisation]", + "[module]", + "[revision]", + "ivys", + "ivy.xml").mkString(File.separator) + localIvy.addIvyPattern(ivyPattern) + val artifactPattern = Seq( + localIvyRoot.getAbsolutePath, + "[organisation]", + "[module]", + "[revision]", + "[type]s", + "[artifact](-[classifier]).[ext]").mkString(File.separator) + localIvy.addArtifactPattern(artifactPattern) + localIvy.setName("local-ivy-cache") + cr.add(localIvy) + + // the biblio resolver resolves POM declared dependencies + val br: IBiblioResolver = new IBiblioResolver + br.setM2compatible(true) + br.setUsepoms(true) + val defaultInternalRepo: Option[String] = sys.env.get("DEFAULT_ARTIFACT_REPOSITORY") + br.setRoot(defaultInternalRepo.getOrElse("https://repo1.maven.org/maven2/")) + br.setName("central") + cr.add(br) + + val sp: IBiblioResolver = new IBiblioResolver + sp.setM2compatible(true) + sp.setUsepoms(true) + sp.setRoot( + sys.env.getOrElse("DEFAULT_ARTIFACT_REPOSITORY", "https://repos.spark-packages.org/")) + sp.setName("spark-packages") + cr.add(sp) + cr + } + + /** + * Output a list of paths for the downloaded jars to be added to the classpath (will append to + * jars in SparkSubmit). + * + * @param artifacts + * Sequence of dependencies that were resolved and retrieved + * @param cacheDirectory + * Directory where jars are cached + * @return + * List of paths for the dependencies + */ + private[util] def resolveDependencyPaths( + artifacts: Array[AnyRef], + cacheDirectory: File): Seq[String] = { + artifacts + .map(_.asInstanceOf[Artifact]) + .filter { artifactInfo => + if (artifactInfo.getExt == "jar") { + true + } else { + logInfo(s"Skipping non-jar dependency ${artifactInfo.getId}") + false + } + } + .map { artifactInfo => + val artifact = artifactInfo.getModuleRevisionId + val extraAttrs = artifactInfo.getExtraAttributes + val classifier = if (extraAttrs.containsKey("classifier")) { + "-" + extraAttrs.get("classifier") + } else { + "" + } + cacheDirectory.getAbsolutePath + File.separator + + s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}$classifier.jar" + }.toImmutableArraySeq + } + + /** Adds the given maven coordinates to Ivy's module descriptor. */ + private[util] def addDependenciesToIvy( + md: DefaultModuleDescriptor, + artifacts: Seq[MavenCoordinate], + ivyConfName: String)(implicit printStream: PrintStream): Unit = { + artifacts.foreach { mvn => + val ri = ModuleRevisionId.newInstance(mvn.groupId, mvn.artifactId, mvn.version) + val dd = new DefaultDependencyDescriptor(ri, false, false) + dd.addDependencyConfiguration(ivyConfName, ivyConfName + "(runtime)") + // scalastyle:off println + printStream.println(s"${dd.getDependencyId} added as a dependency") + // scalastyle:on println + md.addDependency(dd) + } + } + + /** Add exclusion rules for dependencies already included in the spark-assembly */ + private def addExclusionRules( + ivySettings: IvySettings, + ivyConfName: String, + md: DefaultModuleDescriptor): Unit = { + // Add scala exclusion rule + md.addExcludeRule(createExclusion("*:scala-library:*", ivySettings, ivyConfName)) + + IVY_DEFAULT_EXCLUDES.foreach { comp => + md.addExcludeRule( + createExclusion(s"org.apache.spark:spark-$comp*:*", ivySettings, ivyConfName)) + } + } + + /** + * Build Ivy Settings using options with default resolvers + * + * @param remoteRepos + * Comma-delimited string of remote repositories other than maven central + * @param ivyPath + * The path to the local ivy repository + * @param useLocalM2AsCache + * Whether or not use `local-m2 repo` as cache + * @return + * An IvySettings object + */ + def buildIvySettings( + remoteRepos: Option[String], + ivyPath: Option[String], + useLocalM2AsCache: Boolean = true)(implicit printStream: PrintStream): IvySettings = { + val ivySettings: IvySettings = new IvySettings + processIvyPathArg(ivySettings, ivyPath) + + // create a pattern matcher + ivySettings.addMatcher(new GlobPatternMatcher) + // create the dependency resolvers + val repoResolver = createRepoResolvers(ivySettings.getDefaultIvyUserDir, useLocalM2AsCache) + ivySettings.addResolver(repoResolver) + ivySettings.setDefaultResolver(repoResolver.getName) + processRemoteRepoArg(ivySettings, remoteRepos) + // (since 2.5) Setting the property ivy.maven.lookup.sources to false + // disables the lookup of the sources artifact. + // And setting the property ivy.maven.lookup.javadoc to false + // disables the lookup of the javadoc artifact. + ivySettings.setVariable("ivy.maven.lookup.sources", "false") + ivySettings.setVariable("ivy.maven.lookup.javadoc", "false") + ivySettings + } + + /** + * Load Ivy settings from a given filename, using supplied resolvers + * + * @param settingsFile + * Path to Ivy settings file + * @param remoteRepos + * Comma-delimited string of remote repositories other than maven central + * @param ivyPath + * The path to the local ivy repository + * @return + * An IvySettings object + */ + def loadIvySettings(settingsFile: String, remoteRepos: Option[String], ivyPath: Option[String])( + implicit printStream: PrintStream): IvySettings = { + val uri = new URI(settingsFile) + val file = Option(uri.getScheme).getOrElse("file") match { + case "file" => new File(uri.getPath) + case scheme => + throw new IllegalArgumentException( + s"Scheme $scheme not supported in " + + JAR_IVY_SETTING_PATH_KEY) + } + require(file.exists(), s"Ivy settings file $file does not exist") + require(file.isFile, s"Ivy settings file $file is not a normal file") + val ivySettings: IvySettings = new IvySettings + try { + ivySettings.load(file) + if (ivySettings.getDefaultIvyUserDir == null && ivySettings.getDefaultCache == null) { + // To protect old Ivy-based systems like old Spark from Apache Ivy 2.5.2's incompatibility. + // `processIvyPathArg` can overwrite these later. + val alternateIvyDir = System.getProperty("ivy.home", + System.getProperty("user.home") + File.separator + ".ivy2.5.2") + ivySettings.setDefaultIvyUserDir(new File(alternateIvyDir)) + ivySettings.setDefaultCache(new File(alternateIvyDir, "cache")) + } + } catch { + case e @ (_: IOException | _: ParseException) => + throw new SparkException(s"Failed when loading Ivy settings from $settingsFile", e) + } + processIvyPathArg(ivySettings, ivyPath) + processRemoteRepoArg(ivySettings, remoteRepos) + ivySettings + } + + /* Set ivy settings for location of cache, if option is supplied */ + private def processIvyPathArg(ivySettings: IvySettings, ivyPath: Option[String]): Unit = { + val alternateIvyDir = ivyPath.filterNot(_.trim.isEmpty).getOrElse { + // To protect old Ivy-based systems like old Spark from Apache Ivy 2.5.2's incompatibility. + System.getProperty("ivy.home", + System.getProperty("user.home") + File.separator + ".ivy2.5.2") + } + ivySettings.setDefaultIvyUserDir(new File(alternateIvyDir)) + ivySettings.setDefaultCache(new File(alternateIvyDir, "cache")) + } + + /* Add any optional additional remote repositories */ + private def processRemoteRepoArg(ivySettings: IvySettings, remoteRepos: Option[String])(implicit + printStream: PrintStream): Unit = { + remoteRepos.filterNot(_.trim.isEmpty).map(_.split(",")).foreach { repositoryList => + val cr = new ChainResolver + cr.setName("user-list") + + // add current default resolver, if any + Option(ivySettings.getDefaultResolver).foreach(cr.add) + + // add additional repositories, last resolution in chain takes precedence + repositoryList.zipWithIndex.foreach { case (repo, i) => + val brr: IBiblioResolver = new IBiblioResolver + brr.setM2compatible(true) + brr.setUsepoms(true) + brr.setRoot(repo) + brr.setName(s"repo-${i + 1}") + cr.add(brr) + // scalastyle:off println + printStream.println(s"$repo added as a remote repository with the name: ${brr.getName}") + // scalastyle:on println + } + + ivySettings.addResolver(cr) + ivySettings.setDefaultResolver(cr.getName) + } + } + + /** A nice function to use in tests as well. Values are dummy strings. */ + private[util] def getModuleDescriptor: DefaultModuleDescriptor = + DefaultModuleDescriptor.newDefaultInstance(ModuleRevisionId + // Include UUID in module name, so multiple clients resolving maven coordinate at the + // same time do not modify the same resolution file concurrently. + .newInstance("org.apache.spark", s"spark-submit-parent-${UUID.randomUUID.toString}", "1.0")) + + /** + * Clear ivy resolution from current launch. The resolution file is usually at + * ~/.ivy2/org.apache.spark-spark-submit-parent-$UUID-default.xml, + * ~/.ivy2/resolved-org.apache.spark-spark-submit-parent-$UUID-1.0.xml, and + * ~/.ivy2/resolved-org.apache.spark-spark-submit-parent-$UUID-1.0.properties. Since each launch + * will have its own resolution files created, delete them after each resolution to prevent + * accumulation of these files in the ivy cache dir. + */ + private def clearIvyResolutionFiles( + mdId: ModuleRevisionId, + defaultCacheFile: File, + ivyConfName: String): Unit = { + val currentResolutionFiles = Seq( + s"${mdId.getOrganisation}-${mdId.getName}-$ivyConfName.xml", + s"resolved-${mdId.getOrganisation}-${mdId.getName}-${mdId.getRevision}.xml", + s"resolved-${mdId.getOrganisation}-${mdId.getName}-${mdId.getRevision}.properties") + currentResolutionFiles.foreach { filename => + new File(defaultCacheFile, filename).delete() + } + } + + /** + * Clear invalid cache files in ivy. The cache file is usually at + * ~/.ivy2/cache/${groupId}/${artifactId}/ivy-${version}.xml, + * ~/.ivy2/cache/${groupId}/${artifactId}/ivy-${version}.xml.original, and + * ~/.ivy2/cache/${groupId}/${artifactId}/ivydata-${version}.properties. + * Because when using `local-m2` repo as a cache, some invalid files were created. + * If not deleted here, an error prompt similar to `unknown resolver local-m2-cache` + * will be generated, making some confusion for users. + */ + private def clearInvalidIvyCacheFiles( + mdId: ModuleRevisionId, + defaultCacheFile: File): Unit = { + val cacheFiles = Seq( + s"${mdId.getOrganisation}${File.separator}${mdId.getName}${File.separator}" + + s"ivy-${mdId.getRevision}.xml", + s"${mdId.getOrganisation}${File.separator}${mdId.getName}${File.separator}" + + s"ivy-${mdId.getRevision}.xml.original", + s"${mdId.getOrganisation}${File.separator}${mdId.getName}${File.separator}" + + s"ivydata-${mdId.getRevision}.properties") + cacheFiles.foreach { filename => + new File(defaultCacheFile, filename).delete() + } + } + + /** + * Resolves any dependencies that were supplied through maven coordinates + * + * @param coordinates + * Comma-delimited string of maven coordinates + * @param ivySettings + * An IvySettings containing resolvers to use + * @param noCacheIvySettings + * An no-cache(local-m2-cache) IvySettings containing resolvers to use + * @param transitive + * Whether resolving transitive dependencies, default is true + * @param exclusions + * Exclusions to apply when resolving transitive dependencies + * @return + * Seq of path to the jars of the given maven artifacts including their transitive + * dependencies + */ + def resolveMavenCoordinates( + coordinates: String, + ivySettings: IvySettings, + noCacheIvySettings: Option[IvySettings] = None, + transitive: Boolean, + exclusions: Seq[String] = Nil, + isTest: Boolean = false)(implicit printStream: PrintStream): Seq[String] = { + if (coordinates == null || coordinates.trim.isEmpty) { + Nil + } else { + val sysOut = System.out + // Default configuration name for ivy + val ivyConfName = "default" + + // A Module descriptor must be specified. Entries are dummy strings + val md = getModuleDescriptor + + md.setDefaultConf(ivyConfName) + try { + // To prevent ivy from logging to system out + System.setOut(printStream) + val artifacts = extractMavenCoordinates(coordinates) + // Directories for caching downloads through ivy and storing the jars when maven coordinates + // are supplied to spark-submit + val packagesDirectory: File = new File(ivySettings.getDefaultIvyUserDir, "jars") + // scalastyle:off println + printStream.println( + s"Ivy Default Cache set to: ${ivySettings.getDefaultCache.getAbsolutePath}") + printStream.println(s"The jars for the packages stored in: $packagesDirectory") + // scalastyle:on println + + val ivy = Ivy.newInstance(ivySettings) + ivy.pushContext() + + // Set resolve options to download transitive dependencies as well + val resolveOptions = new ResolveOptions + resolveOptions.setTransitive(transitive) + val retrieveOptions = new RetrieveOptions + // Turn downloading and logging off for testing + if (isTest) { + resolveOptions.setDownload(false) + resolveOptions.setLog(LogOptions.LOG_QUIET) + retrieveOptions.setLog(LogOptions.LOG_QUIET) + } else { + resolveOptions.setDownload(true) + } + // retrieve all resolved dependencies + retrieveOptions.setDestArtifactPattern( + packagesDirectory.getAbsolutePath + File.separator + + "[organization]_[artifact]-[revision](-[classifier]).[ext]") + retrieveOptions.setConfs(Array(ivyConfName)) + + // Add exclusion rules for Spark and Scala Library + addExclusionRules(ivySettings, ivyConfName, md) + // add all supplied maven artifacts as dependencies + addDependenciesToIvy(md, artifacts, ivyConfName) + exclusions.foreach { e => + md.addExcludeRule(createExclusion(e + ":*", ivySettings, ivyConfName)) + } + // resolve dependencies + val rr: ResolveReport = ivy.resolve(md, resolveOptions) + if (rr.hasError) { + // SPARK-46302: When there are some corrupted jars in the local maven repo, + // we try to continue without the cache + val failedReports = rr.getArtifactsReports(DownloadStatus.FAILED, true) + if (failedReports.nonEmpty && noCacheIvySettings.isDefined) { + val failedArtifacts = failedReports.map(r => r.getArtifact) + logInfo(s"Download failed: ${failedArtifacts.mkString("[", ", ", "]")}, " + + s"attempt to retry while skipping local-m2-cache.") + failedArtifacts.foreach(artifact => { + clearInvalidIvyCacheFiles(artifact.getModuleRevisionId, ivySettings.getDefaultCache) + }) + ivy.popContext() + + val noCacheIvy = Ivy.newInstance(noCacheIvySettings.get) + noCacheIvy.pushContext() + + val noCacheRr = noCacheIvy.resolve(md, resolveOptions) + if (noCacheRr.hasError) { + throw new RuntimeException(noCacheRr.getAllProblemMessages.toString) + } + noCacheIvy.retrieve(noCacheRr.getModuleDescriptor.getModuleRevisionId, retrieveOptions) + val dependencyPaths = resolveDependencyPaths( + noCacheRr.getArtifacts.toArray, packagesDirectory) + noCacheIvy.popContext() + + dependencyPaths + } else { + throw new RuntimeException(rr.getAllProblemMessages.toString) + } + } else { + ivy.retrieve(rr.getModuleDescriptor.getModuleRevisionId, retrieveOptions) + val dependencyPaths = resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory) + ivy.popContext() + + dependencyPaths + } + } finally { + System.setOut(sysOut) + clearIvyResolutionFiles(md.getModuleRevisionId, ivySettings.getDefaultCache, ivyConfName) + } + } + } + + private[util] def createExclusion( + coords: String, + ivySettings: IvySettings, + ivyConfName: String): ExcludeRule = { + val c = extractMavenCoordinates(coords).head + val id = new ArtifactId(new ModuleId(c.groupId, c.artifactId), "*", "*", "*") + val rule = new DefaultExcludeRule(id, ivySettings.getMatcher("glob"), null) + rule.addConfiguration(ivyConfName) + rule + } + + private def isInvalidQueryString(tokens: Array[String]): Boolean = { + tokens.length != 2 || StringUtils.isBlank(tokens(0)) || StringUtils.isBlank(tokens(1)) + } + + /** + * Parse URI query string's parameter value of `transitive`, `exclude` and `repos`. + * Other invalid parameters will be ignored. + * + * @param uri + * Ivy URI need to be downloaded. + * @return + * Tuple value of parameter `transitive`, `exclude` and `repos` value. + * + * 1. transitive: whether to download dependency jar of Ivy URI, default value is true and + * this parameter value is case-insensitive. This mimics Hive's behaviour for parsing the + * transitive parameter. Invalid value will be treat as false. Example: Input: + * exclude=org.mortbay.jetty:jetty&transitive=true Output: true + * + * 2. exclude: comma separated exclusions to apply when resolving transitive dependencies, + * consists of `group:module` pairs separated by commas. Example: Input: + * excludeorg.mortbay.jetty:jetty,org.eclipse.jetty:jetty-http Output: + * [org.mortbay.jetty:jetty,org.eclipse.jetty:jetty-http] + * + * 3. repos: comma separated repositories to use when resolving dependencies. + */ + def parseQueryParams(uri: URI): (Boolean, String, String) = { + val uriQuery = uri.getQuery + if (uriQuery == null) { + (true, "", "") + } else { + val mapTokens = uriQuery.split("&").map(_.split("=")) + if (mapTokens.exists(MavenUtils.isInvalidQueryString)) { + throw new IllegalArgumentException( + s"Invalid query string in Ivy URI ${uri.toString}: $uriQuery") + } + val groupedParams = mapTokens.map(kv => (kv(0), kv(1))).groupBy(_._1) + + // Parse transitive parameters (e.g., transitive=true) in an Ivy URI, default value is true + val transitiveParams = groupedParams.get("transitive") + if (transitiveParams.map(_.length).getOrElse(0) > 1) { + logWarning( + "It's best to specify `transitive` parameter in ivy URI query only once." + + " If there are multiple `transitive` parameter, we will select the last one") + } + val transitive = + transitiveParams + .flatMap(_.takeRight(1).map(_._2.equalsIgnoreCase("true")).headOption) + .getOrElse(true) + + // Parse an excluded list (e.g., exclude=org.mortbay.jetty:jetty,org.eclipse.jetty:jetty-http) + // in an Ivy URI. When download Ivy URI jar, Spark won't download transitive jar + // in a excluded list. + val exclusionList = groupedParams + .get("exclude") + .map { params => + params + .map(_._2) + .flatMap { excludeString => + val excludes = excludeString.split(",") + if (excludes.map(_.split(":")).exists(MavenUtils.isInvalidQueryString)) { + throw new IllegalArgumentException( + s"Invalid exclude string in Ivy URI ${uri.toString}:" + + " expected 'org:module,org:module,..', found " + excludeString) + } + excludes + } + .mkString(",") + } + .getOrElse("") + + val repos = groupedParams + .get("repos") + .map { params => + params + .map(_._2) + .flatMap(_.split(",")) + .mkString(",") + } + .getOrElse("") + + val validParams = Set("transitive", "exclude", "repos") + val invalidParams = groupedParams.keys.filterNot(validParams.contains).toSeq + if (invalidParams.nonEmpty) { + logWarning( + s"Invalid parameters `${invalidParams.sorted.mkString(",")}` found " + + s"in Ivy URI query `$uriQuery`.") + } + + (transitive, exclusionList, repos) + } + } +} diff --git a/core/pom.xml b/core/pom.xml index 5c74f48be9ada..924ffcf2fc016 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -479,14 +479,6 @@ commons-logging commons-logging - - org.codehaus.jackson - jackson-mapper-asl - - - org.codehaus.jackson - jackson-core-asl - com.fasterxml.jackson.core jackson-core diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index be097a0e3d424..0b07be34f9acf 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -2286,10 +2286,10 @@ package object config { .doc("Path to specify the Ivy user directory, used for the local Ivy cache and " + "package files from spark.jars.packages. " + "This will override the Ivy property ivy.default.ivy.user.dir " + - "which defaults to ~/.ivy2.") + "which defaults to ~/.ivy2.5.2") .version("1.3.0") .stringConf - .createOptional + .createWithDefault("~/.ivy2.5.2") private[spark] val JAR_IVY_SETTING_PATH = ConfigBuilder("spark.jars.ivySettings") diff --git a/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala b/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala index b986be03e965c..ebed572a39ab9 100644 --- a/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala +++ b/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala @@ -377,7 +377,8 @@ private[deploy] object IvyTestUtils { f(repo.toURI.toString) } finally { // Clean up - if (repo.toString.contains(".m2") || repo.toString.contains(".ivy2")) { + if (repo.toString.contains(".m2") || repo.toString.contains(".ivy2") || + repo.toString.contains(".ivy2.5.2")) { val groupDir = getBaseGroupDirectory(artifact, useIvyLayout) FileUtils.deleteDirectory(new File(repo, groupDir + File.separator + artifact.artifactId)) deps.foreach { _.foreach { dep => diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index 0c5769cd66ad1..934b5e7e407cc 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -1,36 +1,35 @@ HikariCP/2.5.1//HikariCP-2.5.1.jar JLargeArrays/1.5//JLargeArrays-1.5.jar JTransforms/3.1//JTransforms-3.1.jar -RoaringBitmap/0.9.25//RoaringBitmap-0.9.25.jar +RoaringBitmap/0.9.32//RoaringBitmap-0.9.32.jar ST4/4.0.4//ST4-4.0.4.jar activation/1.1.1//activation-1.1.1.jar aircompressor/0.21//aircompressor-0.21.jar algebra_2.12/2.0.1//algebra_2.12-2.0.1.jar annotations/17.0.0//annotations-17.0.0.jar antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar -antlr4-runtime/4.8//antlr4-runtime-4.8.jar +antlr4-runtime/4.9.3//antlr4-runtime-4.9.3.jar aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar aopalliance/1.0//aopalliance-1.0.jar apacheds-i18n/2.0.0-M15//apacheds-i18n-2.0.0-M15.jar apacheds-kerberos-codec/2.0.0-M15//apacheds-kerberos-codec-2.0.0-M15.jar api-asn1-api/1.0.0-M20//api-asn1-api-1.0.0-M20.jar api-util/1.0.0-M20//api-util-1.0.0-M20.jar -arpack/2.2.1//arpack-2.2.1.jar +arpack/3.0.2//arpack-3.0.2.jar arpack_combined_all/0.1//arpack_combined_all-0.1.jar -arrow-format/7.0.0//arrow-format-7.0.0.jar -arrow-memory-core/7.0.0//arrow-memory-core-7.0.0.jar -arrow-memory-netty/7.0.0//arrow-memory-netty-7.0.0.jar -arrow-vector/7.0.0//arrow-vector-7.0.0.jar +arrow-format/9.0.0//arrow-format-9.0.0.jar +arrow-memory-core/9.0.0//arrow-memory-core-9.0.0.jar +arrow-memory-netty/9.0.0//arrow-memory-netty-9.0.0.jar +arrow-vector/9.0.0//arrow-vector-9.0.0.jar audience-annotations/0.5.0//audience-annotations-0.5.0.jar -automaton/1.11-8//automaton-1.11-8.jar -avro-ipc/1.11.0//avro-ipc-1.11.0.jar -avro-mapred/1.11.0//avro-mapred-1.11.0.jar -avro/1.11.0//avro-1.11.0.jar +avro-ipc/1.11.1//avro-ipc-1.11.1.jar +avro-mapred/1.11.1//avro-mapred-1.11.1.jar +avro/1.11.1//avro-1.11.1.jar azure-storage/2.0.0//azure-storage-2.0.0.jar -blas/2.2.1//blas-2.2.1.jar +blas/3.0.2//blas-3.0.2.jar bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar -breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar -breeze_2.12/1.2//breeze_2.12-1.2.jar +breeze-macros_2.12/2.1.0//breeze-macros_2.12-2.1.0.jar +breeze_2.12/2.1.0//breeze_2.12-2.1.0.jar cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar chill-java/0.10.0//chill-java-0.10.0.jar chill_2.12/0.10.0//chill_2.12-0.10.0.jar @@ -39,7 +38,7 @@ commons-cli/1.5.0//commons-cli-1.5.0.jar commons-codec/1.15//commons-codec-1.15.jar commons-collections/3.2.2//commons-collections-3.2.2.jar commons-collections4/4.4//commons-collections4-4.4.jar -commons-compiler/3.0.16//commons-compiler-3.0.16.jar +commons-compiler/3.1.7//commons-compiler-3.1.7.jar commons-compress/1.21//commons-compress-1.21.jar commons-configuration/1.6//commons-configuration-1.6.jar commons-crypto/1.1.0//commons-crypto-1.1.0.jar @@ -55,7 +54,6 @@ commons-net/3.1//commons-net-3.1.jar commons-pool/1.5.4//commons-pool-1.5.4.jar commons-text/1.10.0//commons-text-1.10.0.jar compress-lzf/1.1//compress-lzf-1.1.jar -core/1.1.2//core-1.1.2.jar curator-client/2.7.1//curator-client-2.7.1.jar curator-framework/2.7.1//curator-framework-2.7.1.jar curator-recipes/2.7.1//curator-recipes-2.7.1.jar @@ -65,7 +63,7 @@ datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar derby/10.14.2.0//derby-10.14.2.0.jar dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar flatbuffers-java/1.12.0//flatbuffers-java-1.12.0.jar -generex/1.0.2//generex-1.0.2.jar +gcs-connector/hadoop2-2.2.7/shaded/gcs-connector-hadoop2-2.2.7-shaded.jar gmetric4j/1.0.10//gmetric4j-1.0.10.jar gson/2.2.4//gson-2.2.4.jar guava/14.0.1//guava-14.0.1.jar @@ -97,12 +95,12 @@ hive-jdbc/2.3.9//hive-jdbc-2.3.9.jar hive-llap-common/2.3.9//hive-llap-common-2.3.9.jar hive-metastore/2.3.9//hive-metastore-2.3.9.jar hive-serde/2.3.9//hive-serde-2.3.9.jar -hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar +hive-service-rpc/3.1.3//hive-service-rpc-3.1.3.jar hive-shims-0.23/2.3.9//hive-shims-0.23-2.3.9.jar hive-shims-common/2.3.9//hive-shims-common-2.3.9.jar hive-shims-scheduler/2.3.9//hive-shims-scheduler-2.3.9.jar hive-shims/2.3.9//hive-shims-2.3.9.jar -hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar +hive-storage-api/2.7.3//hive-storage-api-2.7.3.jar hive-vector-code-gen/2.3.9//hive-vector-code-gen-2.3.9.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar @@ -111,11 +109,11 @@ htrace-core/3.1.0-incubating//htrace-core-3.1.0-incubating.jar httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.14//httpcore-4.4.14.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar -ivy/2.5.1//ivy-2.5.1.jar +ivy/2.5.0//ivy-2.5.0.jar jackson-annotations/2.13.4//jackson-annotations-2.13.4.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.13.4//jackson-core-2.13.4.jar -jackson-databind/2.13.4.2//jackson-databind-2.13.4.2.jar +jackson-databind/2.13.4.1//jackson-databind-2.13.4.1.jar jackson-dataformat-cbor/2.13.4//jackson-dataformat-cbor-2.13.4.jar jackson-dataformat-yaml/2.13.4//jackson-dataformat-yaml-2.13.4.jar jackson-datatype-jsr310/2.13.4//jackson-datatype-jsr310-2.13.4.jar @@ -129,14 +127,14 @@ jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar -janino/3.0.16//janino-3.0.16.jar +janino/3.1.7//janino-3.1.7.jar javassist/3.25.0-GA//javassist-3.25.0-GA.jar javax.inject/1//javax.inject-1.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javolution/5.5.1//javolution-5.5.1.jar jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar -jcl-over-slf4j/1.7.32//jcl-over-slf4j-1.7.32.jar +jcl-over-slf4j/2.0.3//jcl-over-slf4j-2.0.3.jar jdo-api/3.0.1//jdo-api-3.0.1.jar jersey-client/2.36//jersey-client-2.36.jar jersey-common/2.36//jersey-common-2.36.jar @@ -146,10 +144,10 @@ jersey-hk2/2.36//jersey-hk2-2.36.jar jersey-server/2.36//jersey-server-2.36.jar jetty-sslengine/6.1.26//jetty-sslengine-6.1.26.jar jetty-util/6.1.26//jetty-util-6.1.26.jar -jetty-util/9.4.48.v20220622//jetty-util-9.4.48.v20220622.jar +jetty-util/9.4.49.v20220914//jetty-util-9.4.49.v20220914.jar jetty/6.1.26//jetty-6.1.26.jar jline/2.14.6//jline-2.14.6.jar -joda-time/2.10.13//joda-time-2.10.13.jar +joda-time/2.11.2//joda-time-2.11.2.jar jodd-core/3.5.2//jodd-core-3.5.2.jar jpam/1.1//jpam-1.1.jar json/1.8//json-1.8.jar @@ -160,91 +158,97 @@ json4s-scalap_2.12/3.7.0-M11//json4s-scalap_2.12-3.7.0-M11.jar jsp-api/2.1//jsp-api-2.1.jar jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar -jul-to-slf4j/1.7.32//jul-to-slf4j-1.7.32.jar +jul-to-slf4j/2.0.3//jul-to-slf4j-2.0.3.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar -kubernetes-client/5.12.2//kubernetes-client-5.12.2.jar -kubernetes-model-admissionregistration/5.12.2//kubernetes-model-admissionregistration-5.12.2.jar -kubernetes-model-apiextensions/5.12.2//kubernetes-model-apiextensions-5.12.2.jar -kubernetes-model-apps/5.12.2//kubernetes-model-apps-5.12.2.jar -kubernetes-model-autoscaling/5.12.2//kubernetes-model-autoscaling-5.12.2.jar -kubernetes-model-batch/5.12.2//kubernetes-model-batch-5.12.2.jar -kubernetes-model-certificates/5.12.2//kubernetes-model-certificates-5.12.2.jar -kubernetes-model-common/5.12.2//kubernetes-model-common-5.12.2.jar -kubernetes-model-coordination/5.12.2//kubernetes-model-coordination-5.12.2.jar -kubernetes-model-core/5.12.2//kubernetes-model-core-5.12.2.jar -kubernetes-model-discovery/5.12.2//kubernetes-model-discovery-5.12.2.jar -kubernetes-model-events/5.12.2//kubernetes-model-events-5.12.2.jar -kubernetes-model-extensions/5.12.2//kubernetes-model-extensions-5.12.2.jar -kubernetes-model-flowcontrol/5.12.2//kubernetes-model-flowcontrol-5.12.2.jar -kubernetes-model-metrics/5.12.2//kubernetes-model-metrics-5.12.2.jar -kubernetes-model-networking/5.12.2//kubernetes-model-networking-5.12.2.jar -kubernetes-model-node/5.12.2//kubernetes-model-node-5.12.2.jar -kubernetes-model-policy/5.12.2//kubernetes-model-policy-5.12.2.jar -kubernetes-model-rbac/5.12.2//kubernetes-model-rbac-5.12.2.jar -kubernetes-model-scheduling/5.12.2//kubernetes-model-scheduling-5.12.2.jar -kubernetes-model-storageclass/5.12.2//kubernetes-model-storageclass-5.12.2.jar -lapack/2.2.1//lapack-2.2.1.jar +kubernetes-client-api/6.1.1//kubernetes-client-api-6.1.1.jar +kubernetes-client/6.1.1//kubernetes-client-6.1.1.jar +kubernetes-httpclient-okhttp/6.1.1//kubernetes-httpclient-okhttp-6.1.1.jar +kubernetes-model-admissionregistration/6.1.1//kubernetes-model-admissionregistration-6.1.1.jar +kubernetes-model-apiextensions/6.1.1//kubernetes-model-apiextensions-6.1.1.jar +kubernetes-model-apps/6.1.1//kubernetes-model-apps-6.1.1.jar +kubernetes-model-autoscaling/6.1.1//kubernetes-model-autoscaling-6.1.1.jar +kubernetes-model-batch/6.1.1//kubernetes-model-batch-6.1.1.jar +kubernetes-model-certificates/6.1.1//kubernetes-model-certificates-6.1.1.jar +kubernetes-model-common/6.1.1//kubernetes-model-common-6.1.1.jar +kubernetes-model-coordination/6.1.1//kubernetes-model-coordination-6.1.1.jar +kubernetes-model-core/6.1.1//kubernetes-model-core-6.1.1.jar +kubernetes-model-discovery/6.1.1//kubernetes-model-discovery-6.1.1.jar +kubernetes-model-events/6.1.1//kubernetes-model-events-6.1.1.jar +kubernetes-model-extensions/6.1.1//kubernetes-model-extensions-6.1.1.jar +kubernetes-model-flowcontrol/6.1.1//kubernetes-model-flowcontrol-6.1.1.jar +kubernetes-model-gatewayapi/6.1.1//kubernetes-model-gatewayapi-6.1.1.jar +kubernetes-model-metrics/6.1.1//kubernetes-model-metrics-6.1.1.jar +kubernetes-model-networking/6.1.1//kubernetes-model-networking-6.1.1.jar +kubernetes-model-node/6.1.1//kubernetes-model-node-6.1.1.jar +kubernetes-model-policy/6.1.1//kubernetes-model-policy-6.1.1.jar +kubernetes-model-rbac/6.1.1//kubernetes-model-rbac-6.1.1.jar +kubernetes-model-scheduling/6.1.1//kubernetes-model-scheduling-6.1.1.jar +kubernetes-model-storageclass/6.1.1//kubernetes-model-storageclass-6.1.1.jar +lapack/3.0.2//lapack-3.0.2.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar -libthrift/0.14.1//libthrift-0.14.1.jar -log4j-1.2-api/2.17.2//log4j-1.2-api-2.17.2.jar -log4j-api/2.17.2//log4j-api-2.17.2.jar -log4j-core/2.17.2//log4j-core-2.17.2.jar -log4j-slf4j-impl/2.17.2//log4j-slf4j-impl-2.17.2.jar +libthrift/0.12.0//libthrift-0.12.0.jar +log4j-1.2-api/2.19.0//log4j-1.2-api-2.19.0.jar +log4j-api/2.19.0//log4j-api-2.19.0.jar +log4j-core/2.19.0//log4j-core-2.19.0.jar +log4j-slf4j2-impl/2.19.0//log4j-slf4j2-impl-2.19.0.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.8.0//lz4-java-1.8.0.jar mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar -metrics-core/4.2.7//metrics-core-4.2.7.jar -metrics-graphite/4.2.7//metrics-graphite-4.2.7.jar -metrics-jmx/4.2.7//metrics-jmx-4.2.7.jar -metrics-json/4.2.7//metrics-json-4.2.7.jar -metrics-jvm/4.2.7//metrics-jvm-4.2.7.jar +metrics-core/4.2.10//metrics-core-4.2.10.jar +metrics-graphite/4.2.10//metrics-graphite-4.2.10.jar +metrics-jmx/4.2.10//metrics-jmx-4.2.10.jar +metrics-json/4.2.10//metrics-json-4.2.10.jar +metrics-jvm/4.2.10//metrics-jvm-4.2.10.jar minlog/1.3.0//minlog-1.3.0.jar -netty-all/4.1.74.Final//netty-all-4.1.74.Final.jar -netty-buffer/4.1.74.Final//netty-buffer-4.1.74.Final.jar -netty-codec/4.1.74.Final//netty-codec-4.1.74.Final.jar -netty-common/4.1.74.Final//netty-common-4.1.74.Final.jar -netty-handler/4.1.74.Final//netty-handler-4.1.74.Final.jar -netty-resolver/4.1.74.Final//netty-resolver-4.1.74.Final.jar -netty-tcnative-classes/2.0.48.Final//netty-tcnative-classes-2.0.48.Final.jar -netty-transport-classes-epoll/4.1.74.Final//netty-transport-classes-epoll-4.1.74.Final.jar -netty-transport-classes-kqueue/4.1.74.Final//netty-transport-classes-kqueue-4.1.74.Final.jar -netty-transport-native-epoll/4.1.74.Final/linux-aarch_64/netty-transport-native-epoll-4.1.74.Final-linux-aarch_64.jar -netty-transport-native-epoll/4.1.74.Final/linux-x86_64/netty-transport-native-epoll-4.1.74.Final-linux-x86_64.jar -netty-transport-native-kqueue/4.1.74.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.74.Final-osx-aarch_64.jar -netty-transport-native-kqueue/4.1.74.Final/osx-x86_64/netty-transport-native-kqueue-4.1.74.Final-osx-x86_64.jar -netty-transport-native-unix-common/4.1.74.Final//netty-transport-native-unix-common-4.1.74.Final.jar -netty-transport/4.1.74.Final//netty-transport-4.1.74.Final.jar +netty-all/4.1.84.Final//netty-all-4.1.84.Final.jar +netty-buffer/4.1.84.Final//netty-buffer-4.1.84.Final.jar +netty-codec-http/4.1.84.Final//netty-codec-http-4.1.84.Final.jar +netty-codec-http2/4.1.84.Final//netty-codec-http2-4.1.84.Final.jar +netty-codec-socks/4.1.84.Final//netty-codec-socks-4.1.84.Final.jar +netty-codec/4.1.84.Final//netty-codec-4.1.84.Final.jar +netty-common/4.1.84.Final//netty-common-4.1.84.Final.jar +netty-handler-proxy/4.1.84.Final//netty-handler-proxy-4.1.84.Final.jar +netty-handler/4.1.84.Final//netty-handler-4.1.84.Final.jar +netty-resolver/4.1.84.Final//netty-resolver-4.1.84.Final.jar +netty-tcnative-classes/2.0.54.Final//netty-tcnative-classes-2.0.54.Final.jar +netty-transport-classes-epoll/4.1.84.Final//netty-transport-classes-epoll-4.1.84.Final.jar +netty-transport-classes-kqueue/4.1.84.Final//netty-transport-classes-kqueue-4.1.84.Final.jar +netty-transport-native-epoll/4.1.84.Final/linux-aarch_64/netty-transport-native-epoll-4.1.84.Final-linux-aarch_64.jar +netty-transport-native-epoll/4.1.84.Final/linux-x86_64/netty-transport-native-epoll-4.1.84.Final-linux-x86_64.jar +netty-transport-native-kqueue/4.1.84.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.84.Final-osx-aarch_64.jar +netty-transport-native-kqueue/4.1.84.Final/osx-x86_64/netty-transport-native-kqueue-4.1.84.Final-osx-x86_64.jar +netty-transport-native-unix-common/4.1.84.Final//netty-transport-native-unix-common-4.1.84.Final.jar +netty-transport/4.1.84.Final//netty-transport-4.1.84.Final.jar objenesis/3.2//objenesis-3.2.jar okhttp/3.12.12//okhttp-3.12.12.jar -okio/1.14.0//okio-1.14.0.jar +okio/1.15.0//okio-1.15.0.jar opencsv/2.3//opencsv-2.3.jar -orc-core/1.7.8//orc-core-1.7.8.jar -orc-mapreduce/1.7.8//orc-mapreduce-1.7.8.jar -orc-shims/1.7.8//orc-shims-1.7.8.jar +orc-core/1.8.0/shaded-protobuf/orc-core-1.8.0-shaded-protobuf.jar +orc-mapreduce/1.8.0/shaded-protobuf/orc-mapreduce-1.8.0-shaded-protobuf.jar +orc-shims/1.8.0//orc-shims-1.8.0.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar -parquet-column/1.12.2//parquet-column-1.12.2.jar -parquet-common/1.12.2//parquet-common-1.12.2.jar -parquet-encoding/1.12.2//parquet-encoding-1.12.2.jar -parquet-format-structures/1.12.2//parquet-format-structures-1.12.2.jar -parquet-hadoop/1.12.2//parquet-hadoop-1.12.2.jar -parquet-jackson/1.12.2//parquet-jackson-1.12.2.jar +parquet-column/1.12.3//parquet-column-1.12.3.jar +parquet-common/1.12.3//parquet-common-1.12.3.jar +parquet-encoding/1.12.3//parquet-encoding-1.12.3.jar +parquet-format-structures/1.12.3//parquet-format-structures-1.12.3.jar +parquet-hadoop/1.12.3//parquet-hadoop-1.12.3.jar +parquet-jackson/1.12.3//parquet-jackson-1.12.3.jar pickle/1.2//pickle-1.2.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar -py4j/0.10.9.5//py4j-0.10.9.5.jar +py4j/0.10.9.7//py4j-0.10.9.7.jar remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar -rocksdbjni/6.20.3//rocksdbjni-6.20.3.jar -scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar -scala-compiler/2.12.15//scala-compiler-2.12.15.jar -scala-library/2.12.15//scala-library-2.12.15.jar +rocksdbjni/7.6.0//rocksdbjni-7.6.0.jar +scala-collection-compat_2.12/2.7.0//scala-collection-compat_2.12-2.7.0.jar +scala-compiler/2.12.17//scala-compiler-2.12.17.jar +scala-library/2.12.17//scala-library-2.12.17.jar scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar -scala-reflect/2.12.15//scala-reflect-2.12.15.jar -scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar -shapeless_2.12/2.3.7//shapeless_2.12-2.3.7.jar -shims/0.9.25//shims-0.9.25.jar -slf4j-api/1.7.32//slf4j-api-1.7.32.jar +scala-reflect/2.12.17//scala-reflect-2.12.17.jar +scala-xml_2.12/2.1.0//scala-xml_2.12-2.1.0.jar +shims/0.9.32//shims-0.9.32.jar +slf4j-api/2.0.3//slf4j-api-2.0.3.jar snakeyaml/1.31//snakeyaml-1.31.jar snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar @@ -254,17 +258,17 @@ spire_2.12/0.17.0//spire_2.12-0.17.0.jar stax-api/1.0.1//stax-api-1.0.1.jar stream/2.9.6//stream-2.9.6.jar super-csv/2.2.0//super-csv-2.2.0.jar -threeten-extra/1.5.0//threeten-extra-1.5.0.jar -tink/1.6.1//tink-1.6.1.jar +threeten-extra/1.7.1//threeten-extra-1.7.1.jar +tink/1.7.0//tink-1.7.0.jar transaction-api/1.1//transaction-api-1.1.jar univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar velocity/1.5//velocity-1.5.jar -xbean-asm9-shaded/4.20//xbean-asm9-shaded-4.20.jar +xbean-asm9-shaded/4.21//xbean-asm9-shaded-4.21.jar xercesImpl/2.12.2//xercesImpl-2.12.2.jar xml-apis/1.4.01//xml-apis-1.4.01.jar xmlenc/0.52//xmlenc-0.52.jar -xz/1.9//xz-1.9.jar +xz/1.8//xz-1.8.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar zookeeper-jute/3.6.2//zookeeper-jute-3.6.2.jar zookeeper/3.6.2//zookeeper-3.6.2.jar -zstd-jni/1.5.2-1//zstd-jni-1.5.2-1.jar +zstd-jni/1.5.2-4//zstd-jni-1.5.2-4.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 27d37136d6d18..8794f1f72bf72 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -1,7 +1,7 @@ HikariCP/2.5.1//HikariCP-2.5.1.jar JLargeArrays/1.5//JLargeArrays-1.5.jar JTransforms/3.1//JTransforms-3.1.jar -RoaringBitmap/0.9.25//RoaringBitmap-0.9.25.jar +RoaringBitmap/0.9.32//RoaringBitmap-0.9.32.jar ST4/4.0.4//ST4-4.0.4.jar activation/1.1.1//activation-1.1.1.jar aircompressor/0.21//aircompressor-0.21.jar @@ -12,27 +12,26 @@ aliyun-java-sdk-ram/3.1.0//aliyun-java-sdk-ram-3.1.0.jar aliyun-sdk-oss/3.13.0//aliyun-sdk-oss-3.13.0.jar annotations/17.0.0//annotations-17.0.0.jar antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar -antlr4-runtime/4.8//antlr4-runtime-4.8.jar +antlr4-runtime/4.9.3//antlr4-runtime-4.9.3.jar aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar -arpack/2.2.1//arpack-2.2.1.jar +arpack/3.0.2//arpack-3.0.2.jar arpack_combined_all/0.1//arpack_combined_all-0.1.jar -arrow-format/7.0.0//arrow-format-7.0.0.jar -arrow-memory-core/7.0.0//arrow-memory-core-7.0.0.jar -arrow-memory-netty/7.0.0//arrow-memory-netty-7.0.0.jar -arrow-vector/7.0.0//arrow-vector-7.0.0.jar +arrow-format/9.0.0//arrow-format-9.0.0.jar +arrow-memory-core/9.0.0//arrow-memory-core-9.0.0.jar +arrow-memory-netty/9.0.0//arrow-memory-netty-9.0.0.jar +arrow-vector/9.0.0//arrow-vector-9.0.0.jar audience-annotations/0.5.0//audience-annotations-0.5.0.jar -automaton/1.11-8//automaton-1.11-8.jar -avro-ipc/1.11.0//avro-ipc-1.11.0.jar -avro-mapred/1.11.0//avro-mapred-1.11.0.jar -avro/1.11.0//avro-1.11.0.jar -aws-java-sdk-bundle/1.11.1026//aws-java-sdk-bundle-1.11.1026.jar +avro-ipc/1.11.1//avro-ipc-1.11.1.jar +avro-mapred/1.11.1//avro-mapred-1.11.1.jar +avro/1.11.1//avro-1.11.1.jar +aws-java-sdk-bundle/1.12.262//aws-java-sdk-bundle-1.12.262.jar azure-data-lake-store-sdk/2.3.9//azure-data-lake-store-sdk-2.3.9.jar azure-keyvault-core/1.0.0//azure-keyvault-core-1.0.0.jar azure-storage/7.0.1//azure-storage-7.0.1.jar -blas/2.2.1//blas-2.2.1.jar +blas/3.0.2//blas-3.0.2.jar bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar -breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar -breeze_2.12/1.2//breeze_2.12-1.2.jar +breeze-macros_2.12/2.1.0//breeze-macros_2.12-2.1.0.jar +breeze_2.12/2.1.0//breeze_2.12-2.1.0.jar cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar chill-java/0.10.0//chill-java-0.10.0.jar chill_2.12/0.10.0//chill_2.12-0.10.0.jar @@ -40,7 +39,7 @@ commons-cli/1.5.0//commons-cli-1.5.0.jar commons-codec/1.15//commons-codec-1.15.jar commons-collections/3.2.2//commons-collections-3.2.2.jar commons-collections4/4.4//commons-collections4-4.4.jar -commons-compiler/3.0.16//commons-compiler-3.0.16.jar +commons-compiler/3.1.7//commons-compiler-3.1.7.jar commons-compress/1.21//commons-compress-1.21.jar commons-crypto/1.1.0//commons-crypto-1.1.0.jar commons-dbcp/1.4//commons-dbcp-1.4.jar @@ -50,35 +49,32 @@ commons-lang3/3.12.0//commons-lang3-3.12.0.jar commons-logging/1.1.3//commons-logging-1.1.3.jar commons-math3/3.6.1//commons-math3-3.6.1.jar commons-pool/1.5.4//commons-pool-1.5.4.jar -commons-text/1.10.0//commons-text-1.10.0.jar -compress-lzf/1.1//compress-lzf-1.1.jar -core/1.1.2//core-1.1.2.jar -cos_api-bundle/5.6.19//cos_api-bundle-5.6.19.jar -curator-client/2.13.0//curator-client-2.13.0.jar -curator-framework/2.13.0//curator-framework-2.13.0.jar -curator-recipes/2.13.0//curator-recipes-2.13.0.jar +commons-text/1.11.0//commons-text-1.11.0.jar +compress-lzf/1.1.2//compress-lzf-1.1.2.jar +curator-client/5.2.0//curator-client-5.2.0.jar +curator-framework/5.2.0//curator-framework-5.2.0.jar +curator-recipes/5.2.0//curator-recipes-5.2.0.jar datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar derby/10.14.2.0//derby-10.14.2.0.jar dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar flatbuffers-java/1.12.0//flatbuffers-java-1.12.0.jar -generex/1.0.2//generex-1.0.2.jar +gcs-connector/hadoop3-2.2.7/shaded/gcs-connector-hadoop3-2.2.7-shaded.jar gmetric4j/1.0.10//gmetric4j-1.0.10.jar gson/2.2.4//gson-2.2.4.jar guava/14.0.1//guava-14.0.1.jar -hadoop-aliyun/3.3.2//hadoop-aliyun-3.3.2.jar -hadoop-annotations/3.3.2//hadoop-annotations-3.3.2.jar -hadoop-aws/3.3.2//hadoop-aws-3.3.2.jar -hadoop-azure-datalake/3.3.2//hadoop-azure-datalake-3.3.2.jar -hadoop-azure/3.3.2//hadoop-azure-3.3.2.jar -hadoop-client-api/3.3.2//hadoop-client-api-3.3.2.jar -hadoop-client-runtime/3.3.2//hadoop-client-runtime-3.3.2.jar -hadoop-cloud-storage/3.3.2//hadoop-cloud-storage-3.3.2.jar -hadoop-cos/3.3.2//hadoop-cos-3.3.2.jar -hadoop-openstack/3.3.2//hadoop-openstack-3.3.2.jar +hadoop-aliyun/3.3.4//hadoop-aliyun-3.3.4.jar +hadoop-annotations/3.3.4//hadoop-annotations-3.3.4.jar +hadoop-aws/3.3.4//hadoop-aws-3.3.4.jar +hadoop-azure-datalake/3.3.4//hadoop-azure-datalake-3.3.4.jar +hadoop-azure/3.3.4//hadoop-azure-3.3.4.jar +hadoop-client-api/3.3.4//hadoop-client-api-3.3.4.jar +hadoop-client-runtime/3.3.4//hadoop-client-runtime-3.3.4.jar +hadoop-cloud-storage/3.3.4//hadoop-cloud-storage-3.3.4.jar +hadoop-openstack/3.3.4//hadoop-openstack-3.3.4.jar hadoop-shaded-guava/1.1.1//hadoop-shaded-guava-1.1.1.jar -hadoop-yarn-server-web-proxy/3.3.2//hadoop-yarn-server-web-proxy-3.3.2.jar +hadoop-yarn-server-web-proxy/3.3.4//hadoop-yarn-server-web-proxy-3.3.4.jar hive-beeline/2.3.9//hive-beeline-2.3.9.jar hive-cli/2.3.9//hive-cli-2.3.9.jar hive-common/2.3.9//hive-common-2.3.9.jar @@ -87,12 +83,12 @@ hive-jdbc/2.3.9//hive-jdbc-2.3.9.jar hive-llap-common/2.3.9//hive-llap-common-2.3.9.jar hive-metastore/2.3.9//hive-metastore-2.3.9.jar hive-serde/2.3.9//hive-serde-2.3.9.jar -hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar +hive-service-rpc/3.1.3//hive-service-rpc-3.1.3.jar hive-shims-0.23/2.3.9//hive-shims-0.23-2.3.9.jar hive-shims-common/2.3.9//hive-shims-common-2.3.9.jar hive-shims-scheduler/2.3.9//hive-shims-scheduler-2.3.9.jar hive-shims/2.3.9//hive-shims-2.3.9.jar -hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar +hive-storage-api/2.7.3//hive-storage-api-2.7.3.jar hive-vector-code-gen/2.3.9//hive-vector-code-gen-2.3.9.jar hk2-api/2.6.1//hk2-api-2.6.1.jar hk2-locator/2.6.1//hk2-locator-2.6.1.jar @@ -102,28 +98,26 @@ httpcore/4.4.14//httpcore-4.4.14.jar ini4j/0.5.4//ini4j-0.5.4.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.5.1//ivy-2.5.1.jar -jackson-annotations/2.13.4//jackson-annotations-2.13.4.jar -jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar -jackson-core/2.13.4//jackson-core-2.13.4.jar -jackson-databind/2.13.4.2//jackson-databind-2.13.4.2.jar -jackson-dataformat-cbor/2.13.4//jackson-dataformat-cbor-2.13.4.jar -jackson-dataformat-yaml/2.13.4//jackson-dataformat-yaml-2.13.4.jar -jackson-datatype-jsr310/2.13.4//jackson-datatype-jsr310-2.13.4.jar -jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar -jackson-module-scala_2.12/2.13.4//jackson-module-scala_2.12-2.13.4.jar +jackson-annotations/2.14.2//jackson-annotations-2.14.2.jar +jackson-core/2.14.2//jackson-core-2.14.2.jar +jackson-databind/2.14.2//jackson-databind-2.14.2.jar +jackson-dataformat-cbor/2.14.2//jackson-dataformat-cbor-2.14.2.jar +jackson-dataformat-yaml/2.14.2//jackson-dataformat-yaml-2.14.2.jar +jackson-datatype-jsr310/2.14.2//jackson-datatype-jsr310-2.14.2.jar +jackson-module-scala_2.12/2.14.2//jackson-module-scala_2.12-2.14.2.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar -janino/3.0.16//janino-3.0.16.jar +janino/3.1.7//janino-3.1.7.jar javassist/3.25.0-GA//javassist-3.25.0-GA.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javolution/5.5.1//javolution-5.5.1.jar jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar -jcl-over-slf4j/1.7.32//jcl-over-slf4j-1.7.32.jar +jcl-over-slf4j/2.0.3//jcl-over-slf4j-2.0.3.jar jdo-api/3.0.1//jdo-api-3.0.1.jar jdom2/2.0.6//jdom2-2.0.6.jar jersey-client/2.36//jersey-client-2.36.jar @@ -133,10 +127,10 @@ jersey-container-servlet/2.36//jersey-container-servlet-2.36.jar jersey-hk2/2.36//jersey-hk2-2.36.jar jersey-server/2.36//jersey-server-2.36.jar jettison/1.1//jettison-1.1.jar -jetty-util-ajax/9.4.48.v20220622//jetty-util-ajax-9.4.48.v20220622.jar -jetty-util/9.4.48.v20220622//jetty-util-9.4.48.v20220622.jar +jetty-util-ajax/9.4.49.v20220914//jetty-util-ajax-9.4.49.v20220914.jar +jetty-util/9.4.49.v20220914//jetty-util-9.4.49.v20220914.jar jline/2.14.6//jline-2.14.6.jar -joda-time/2.10.13//joda-time-2.10.13.jar +joda-time/2.11.2//joda-time-2.11.2.jar jodd-core/3.5.2//jodd-core-3.5.2.jar jpam/1.1//jpam-1.1.jar json/1.8//json-1.8.jar @@ -146,94 +140,100 @@ json4s-jackson_2.12/3.7.0-M11//json4s-jackson_2.12-3.7.0-M11.jar json4s-scalap_2.12/3.7.0-M11//json4s-scalap_2.12-3.7.0-M11.jar jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar -jul-to-slf4j/1.7.32//jul-to-slf4j-1.7.32.jar +jul-to-slf4j/2.0.3//jul-to-slf4j-2.0.3.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar -kubernetes-client/5.12.2//kubernetes-client-5.12.2.jar -kubernetes-model-admissionregistration/5.12.2//kubernetes-model-admissionregistration-5.12.2.jar -kubernetes-model-apiextensions/5.12.2//kubernetes-model-apiextensions-5.12.2.jar -kubernetes-model-apps/5.12.2//kubernetes-model-apps-5.12.2.jar -kubernetes-model-autoscaling/5.12.2//kubernetes-model-autoscaling-5.12.2.jar -kubernetes-model-batch/5.12.2//kubernetes-model-batch-5.12.2.jar -kubernetes-model-certificates/5.12.2//kubernetes-model-certificates-5.12.2.jar -kubernetes-model-common/5.12.2//kubernetes-model-common-5.12.2.jar -kubernetes-model-coordination/5.12.2//kubernetes-model-coordination-5.12.2.jar -kubernetes-model-core/5.12.2//kubernetes-model-core-5.12.2.jar -kubernetes-model-discovery/5.12.2//kubernetes-model-discovery-5.12.2.jar -kubernetes-model-events/5.12.2//kubernetes-model-events-5.12.2.jar -kubernetes-model-extensions/5.12.2//kubernetes-model-extensions-5.12.2.jar -kubernetes-model-flowcontrol/5.12.2//kubernetes-model-flowcontrol-5.12.2.jar -kubernetes-model-metrics/5.12.2//kubernetes-model-metrics-5.12.2.jar -kubernetes-model-networking/5.12.2//kubernetes-model-networking-5.12.2.jar -kubernetes-model-node/5.12.2//kubernetes-model-node-5.12.2.jar -kubernetes-model-policy/5.12.2//kubernetes-model-policy-5.12.2.jar -kubernetes-model-rbac/5.12.2//kubernetes-model-rbac-5.12.2.jar -kubernetes-model-scheduling/5.12.2//kubernetes-model-scheduling-5.12.2.jar -kubernetes-model-storageclass/5.12.2//kubernetes-model-storageclass-5.12.2.jar -lapack/2.2.1//lapack-2.2.1.jar +kubernetes-client-api/6.1.1//kubernetes-client-api-6.1.1.jar +kubernetes-client/6.1.1//kubernetes-client-6.1.1.jar +kubernetes-httpclient-okhttp/6.1.1//kubernetes-httpclient-okhttp-6.1.1.jar +kubernetes-model-admissionregistration/6.1.1//kubernetes-model-admissionregistration-6.1.1.jar +kubernetes-model-apiextensions/6.1.1//kubernetes-model-apiextensions-6.1.1.jar +kubernetes-model-apps/6.1.1//kubernetes-model-apps-6.1.1.jar +kubernetes-model-autoscaling/6.1.1//kubernetes-model-autoscaling-6.1.1.jar +kubernetes-model-batch/6.1.1//kubernetes-model-batch-6.1.1.jar +kubernetes-model-certificates/6.1.1//kubernetes-model-certificates-6.1.1.jar +kubernetes-model-common/6.1.1//kubernetes-model-common-6.1.1.jar +kubernetes-model-coordination/6.1.1//kubernetes-model-coordination-6.1.1.jar +kubernetes-model-core/6.1.1//kubernetes-model-core-6.1.1.jar +kubernetes-model-discovery/6.1.1//kubernetes-model-discovery-6.1.1.jar +kubernetes-model-events/6.1.1//kubernetes-model-events-6.1.1.jar +kubernetes-model-extensions/6.1.1//kubernetes-model-extensions-6.1.1.jar +kubernetes-model-flowcontrol/6.1.1//kubernetes-model-flowcontrol-6.1.1.jar +kubernetes-model-gatewayapi/6.1.1//kubernetes-model-gatewayapi-6.1.1.jar +kubernetes-model-metrics/6.1.1//kubernetes-model-metrics-6.1.1.jar +kubernetes-model-networking/6.1.1//kubernetes-model-networking-6.1.1.jar +kubernetes-model-node/6.1.1//kubernetes-model-node-6.1.1.jar +kubernetes-model-policy/6.1.1//kubernetes-model-policy-6.1.1.jar +kubernetes-model-rbac/6.1.1//kubernetes-model-rbac-6.1.1.jar +kubernetes-model-scheduling/6.1.1//kubernetes-model-scheduling-6.1.1.jar +kubernetes-model-storageclass/6.1.1//kubernetes-model-storageclass-6.1.1.jar +lapack/3.0.2//lapack-3.0.2.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar -libthrift/0.14.1//libthrift-0.14.1.jar -log4j-1.2-api/2.17.2//log4j-1.2-api-2.17.2.jar -log4j-api/2.17.2//log4j-api-2.17.2.jar -log4j-core/2.17.2//log4j-core-2.17.2.jar -log4j-slf4j-impl/2.17.2//log4j-slf4j-impl-2.17.2.jar +libthrift/0.12.0//libthrift-0.12.0.jar +log4j-1.2-api/2.19.0//log4j-1.2-api-2.19.0.jar +log4j-api/2.19.0//log4j-api-2.19.0.jar +log4j-core/2.19.0//log4j-core-2.19.0.jar +log4j-slf4j2-impl/2.19.0//log4j-slf4j2-impl-2.19.0.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.8.0//lz4-java-1.8.0.jar mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar -metrics-core/4.2.7//metrics-core-4.2.7.jar -metrics-graphite/4.2.7//metrics-graphite-4.2.7.jar -metrics-jmx/4.2.7//metrics-jmx-4.2.7.jar -metrics-json/4.2.7//metrics-json-4.2.7.jar -metrics-jvm/4.2.7//metrics-jvm-4.2.7.jar +metrics-core/4.2.10//metrics-core-4.2.10.jar +metrics-graphite/4.2.10//metrics-graphite-4.2.10.jar +metrics-jmx/4.2.10//metrics-jmx-4.2.10.jar +metrics-json/4.2.10//metrics-json-4.2.10.jar +metrics-jvm/4.2.10//metrics-jvm-4.2.10.jar minlog/1.3.0//minlog-1.3.0.jar -netty-all/4.1.74.Final//netty-all-4.1.74.Final.jar -netty-buffer/4.1.74.Final//netty-buffer-4.1.74.Final.jar -netty-codec/4.1.74.Final//netty-codec-4.1.74.Final.jar -netty-common/4.1.74.Final//netty-common-4.1.74.Final.jar -netty-handler/4.1.74.Final//netty-handler-4.1.74.Final.jar -netty-resolver/4.1.74.Final//netty-resolver-4.1.74.Final.jar -netty-tcnative-classes/2.0.48.Final//netty-tcnative-classes-2.0.48.Final.jar -netty-transport-classes-epoll/4.1.74.Final//netty-transport-classes-epoll-4.1.74.Final.jar -netty-transport-classes-kqueue/4.1.74.Final//netty-transport-classes-kqueue-4.1.74.Final.jar -netty-transport-native-epoll/4.1.74.Final/linux-aarch_64/netty-transport-native-epoll-4.1.74.Final-linux-aarch_64.jar -netty-transport-native-epoll/4.1.74.Final/linux-x86_64/netty-transport-native-epoll-4.1.74.Final-linux-x86_64.jar -netty-transport-native-kqueue/4.1.74.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.74.Final-osx-aarch_64.jar -netty-transport-native-kqueue/4.1.74.Final/osx-x86_64/netty-transport-native-kqueue-4.1.74.Final-osx-x86_64.jar -netty-transport-native-unix-common/4.1.74.Final//netty-transport-native-unix-common-4.1.74.Final.jar -netty-transport/4.1.74.Final//netty-transport-4.1.74.Final.jar +netty-all/4.1.84.Final//netty-all-4.1.84.Final.jar +netty-buffer/4.1.84.Final//netty-buffer-4.1.84.Final.jar +netty-codec-http/4.1.84.Final//netty-codec-http-4.1.84.Final.jar +netty-codec-http2/4.1.84.Final//netty-codec-http2-4.1.84.Final.jar +netty-codec-socks/4.1.84.Final//netty-codec-socks-4.1.84.Final.jar +netty-codec/4.1.84.Final//netty-codec-4.1.84.Final.jar +netty-common/4.1.84.Final//netty-common-4.1.84.Final.jar +netty-handler-proxy/4.1.84.Final//netty-handler-proxy-4.1.84.Final.jar +netty-handler/4.1.84.Final//netty-handler-4.1.84.Final.jar +netty-resolver/4.1.84.Final//netty-resolver-4.1.84.Final.jar +netty-tcnative-classes/2.0.54.Final//netty-tcnative-classes-2.0.54.Final.jar +netty-transport-classes-epoll/4.1.84.Final//netty-transport-classes-epoll-4.1.84.Final.jar +netty-transport-classes-kqueue/4.1.84.Final//netty-transport-classes-kqueue-4.1.84.Final.jar +netty-transport-native-epoll/4.1.84.Final/linux-aarch_64/netty-transport-native-epoll-4.1.84.Final-linux-aarch_64.jar +netty-transport-native-epoll/4.1.84.Final/linux-x86_64/netty-transport-native-epoll-4.1.84.Final-linux-x86_64.jar +netty-transport-native-kqueue/4.1.84.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.84.Final-osx-aarch_64.jar +netty-transport-native-kqueue/4.1.84.Final/osx-x86_64/netty-transport-native-kqueue-4.1.84.Final-osx-x86_64.jar +netty-transport-native-unix-common/4.1.84.Final//netty-transport-native-unix-common-4.1.84.Final.jar +netty-transport/4.1.84.Final//netty-transport-4.1.84.Final.jar objenesis/3.2//objenesis-3.2.jar okhttp/3.12.12//okhttp-3.12.12.jar -okio/1.14.0//okio-1.14.0.jar +okio/1.15.0//okio-1.15.0.jar opencsv/2.3//opencsv-2.3.jar opentracing-api/0.33.0//opentracing-api-0.33.0.jar opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar opentracing-util/0.33.0//opentracing-util-0.33.0.jar -orc-core/1.7.8//orc-core-1.7.8.jar -orc-mapreduce/1.7.8//orc-mapreduce-1.7.8.jar -orc-shims/1.7.8//orc-shims-1.7.8.jar +orc-core/1.8.0/shaded-protobuf/orc-core-1.8.0-shaded-protobuf.jar +orc-mapreduce/1.8.0/shaded-protobuf/orc-mapreduce-1.8.0-shaded-protobuf.jar +orc-shims/1.8.0//orc-shims-1.8.0.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar -parquet-column/1.12.2//parquet-column-1.12.2.jar -parquet-common/1.12.2//parquet-common-1.12.2.jar -parquet-encoding/1.12.2//parquet-encoding-1.12.2.jar -parquet-format-structures/1.12.2//parquet-format-structures-1.12.2.jar -parquet-hadoop/1.12.2//parquet-hadoop-1.12.2.jar -parquet-jackson/1.12.2//parquet-jackson-1.12.2.jar +parquet-column/1.12.3//parquet-column-1.12.3.jar +parquet-common/1.12.3//parquet-common-1.12.3.jar +parquet-encoding/1.12.3//parquet-encoding-1.12.3.jar +parquet-format-structures/1.12.3//parquet-format-structures-1.12.3.jar +parquet-hadoop/1.12.3//parquet-hadoop-1.12.3.jar +parquet-jackson/1.12.3//parquet-jackson-1.12.3.jar pickle/1.2//pickle-1.2.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar -py4j/0.10.9.5//py4j-0.10.9.5.jar +py4j/0.10.9.7//py4j-0.10.9.7.jar remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar -rocksdbjni/6.20.3//rocksdbjni-6.20.3.jar -scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar -scala-compiler/2.12.15//scala-compiler-2.12.15.jar -scala-library/2.12.15//scala-library-2.12.15.jar +rocksdbjni/7.6.0//rocksdbjni-7.6.0.jar +scala-collection-compat_2.12/2.7.0//scala-collection-compat_2.12-2.7.0.jar +scala-compiler/2.12.17//scala-compiler-2.12.17.jar +scala-library/2.12.17//scala-library-2.12.17.jar scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar -scala-reflect/2.12.15//scala-reflect-2.12.15.jar -scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar -shapeless_2.12/2.3.7//shapeless_2.12-2.3.7.jar -shims/0.9.25//shims-0.9.25.jar -slf4j-api/1.7.32//slf4j-api-1.7.32.jar +scala-reflect/2.12.17//scala-reflect-2.12.17.jar +scala-xml_2.12/2.1.0//scala-xml_2.12-2.1.0.jar +shims/0.9.32//shims-0.9.32.jar +slf4j-api/2.0.3//slf4j-api-2.0.3.jar snakeyaml/1.31//snakeyaml-1.31.jar snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar @@ -243,15 +243,15 @@ spire_2.12/0.17.0//spire_2.12-0.17.0.jar stax-api/1.0.1//stax-api-1.0.1.jar stream/2.9.6//stream-2.9.6.jar super-csv/2.2.0//super-csv-2.2.0.jar -threeten-extra/1.5.0//threeten-extra-1.5.0.jar -tink/1.6.1//tink-1.6.1.jar +threeten-extra/1.7.1//threeten-extra-1.7.1.jar +tink/1.7.0//tink-1.7.0.jar transaction-api/1.1//transaction-api-1.1.jar univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar velocity/1.5//velocity-1.5.jar wildfly-openssl/1.0.7.Final//wildfly-openssl-1.0.7.Final.jar -xbean-asm9-shaded/4.20//xbean-asm9-shaded-4.20.jar -xz/1.9//xz-1.9.jar +xbean-asm9-shaded/4.21//xbean-asm9-shaded-4.21.jar +xz/1.8//xz-1.8.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar zookeeper-jute/3.6.2//zookeeper-jute-3.6.2.jar zookeeper/3.6.2//zookeeper-3.6.2.jar -zstd-jni/1.5.2-1//zstd-jni-1.5.2-1.jar +zstd-jni/1.5.2-4//zstd-jni-1.5.2-4.jar diff --git a/dev/run-tests.py b/dev/run-tests.py index 570ee4c8169cf..a43b433f8dcf0 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -472,6 +472,8 @@ def main(): rm_r(os.path.join(SPARK_HOME, "work")) rm_r(os.path.join(USER_HOME, ".ivy2", "local", "org.apache.spark")) rm_r(os.path.join(USER_HOME, ".ivy2", "cache", "org.apache.spark")) + rm_r(os.path.join(USER_HOME, ".ivy2.5.2", "local", "org.apache.spark")) + rm_r(os.path.join(USER_HOME, ".ivy2.5.2", "cache", "org.apache.spark")) os.environ["CURRENT_BLOCK"] = str(ERROR_CODES["BLOCK_GENERAL"]) diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md index 50c91b7f156cf..3adfbeca8fd96 100644 --- a/docs/core-migration-guide.md +++ b/docs/core-migration-guide.md @@ -22,12 +22,42 @@ license: | * Table of contents {:toc} +## Upgrading from Core 3.5 to 4.0 + +- Since Spark 4.0, Spark will roll event logs to archive them incrementally. To restore the behavior before Spark 4.0, you can set `spark.eventLog.rolling.enabled` to `false`. + +- Since Spark 4.0, Spark will compress event logs. To restore the behavior before Spark 4.0, you can set `spark.eventLog.compress` to `false`. + +- Since Spark 4.0, Spark workers will clean up worker and stopped application directories periodically. To restore the behavior before Spark 4.0, you can set `spark.worker.cleanup.enabled` to `false`. + +- Since Spark 4.0, `spark.shuffle.service.db.backend` is set to `ROCKSDB` by default which means Spark will use RocksDB store for shuffle service. To restore the behavior before Spark 4.0, you can set `spark.shuffle.service.db.backend` to `LEVELDB`. + +- In Spark 4.0, support for Apache Mesos as a resource manager was removed. + +- Since Spark 4.0, Spark uses `ReadWriteOncePod` instead of `ReadWriteOnce` access mode in persistence volume claims. To restore the legacy behavior, you can set `spark.kubernetes.legacy.useReadWriteOnceAccessMode` to `true`. + +- Since Spark 4.0, Spark uses `~/.ivy2.5.2` as Ivy user directory by default to isolate the existing systems from Apache Ivy's incompatibility. To restore the legacy behavior, you can set `spark.jars.ivy` to `~/.ivy2`. + +## Upgrading from Core 3.4 to 3.5 + +- Since Spark 3.5, `spark.yarn.executor.failuresValidityInterval` is deprecated. Use `spark.executor.failuresValidityInterval` instead. + +- Since Spark 3.5, `spark.yarn.max.executor.failures` is deprecated. Use `spark.executor.maxNumFailures` instead. + +## Upgrading from Core 3.3 to 3.4 + +- Since Spark 3.4, Spark driver will own `PersistentVolumnClaim`s and try to reuse if they are not assigned to live executors. To restore the behavior before Spark 3.4, you can set `spark.kubernetes.driver.ownPersistentVolumeClaim` to `false` and `spark.kubernetes.driver.reusePersistentVolumeClaim` to `false`. + +- Since Spark 3.4, Spark driver will track shuffle data when dynamic allocation is enabled without shuffle service. To restore the behavior before Spark 3.4, you can set `spark.dynamicAllocation.shuffleTracking.enabled` to `false`. + +- Since Spark 3.4, Spark will try to decommission cached RDD and shuffle blocks if both `spark.decommission.enabled` and `spark.storage.decommission.enabled` are true. To restore the behavior before Spark 3.4, you can set both `spark.storage.decommission.rddBlocks.enabled` and `spark.storage.decommission.shuffleBlocks.enabled` to `false`. + +- Since Spark 3.4, Spark will use RocksDB store if `spark.history.store.hybridStore.enabled` is true. To restore the behavior before Spark 3.4, you can set `spark.history.store.hybridStore.diskBackend` to `LEVELDB`. + ## Upgrading from Core 3.2 to 3.3 - Since Spark 3.3, Spark migrates its log4j dependency from 1.x to 2.x because log4j 1.x has reached end of life and is no longer supported by the community. Vulnerabilities reported after August 2015 against log4j 1.x were not checked and will not be fixed. Users should rewrite original log4j properties files using log4j2 syntax (XML, JSON, YAML, or properties format). Spark rewrites the `conf/log4j.properties.template` which is included in Spark distribution, to `conf/log4j2.properties.template` with log4j2 properties format. -- Since Spark 3.3.3, `spark.submit.proxyUser.allowCustomClasspathInClusterMode` allows users to disable custom class path in cluster mode by proxy users. It still defaults to `true` to maintain backward compatibility. - ## Upgrading from Core 3.1 to 3.2 - Since Spark 3.2, `spark.scheduler.allocation.file` supports read remote file using hadoop filesystem which means if the path has no scheme Spark will respect hadoop configuration to read it. To restore the behavior before Spark 3.2, you can specify the local scheme for `spark.scheduler.allocation.file` e.g. `file:///path/to/file`. diff --git a/pom.xml b/pom.xml index e2aec01117c66..3e97695fb4ef4 100644 --- a/pom.xml +++ b/pom.xml @@ -150,7 +150,7 @@ 9.4.48.v20220622 4.0.3 0.10.0 - 2.5.1 + 2.5.2 2.0.8 2.3.0 0.14.2 @@ -625,7 +625,7 @@ org.apache.commons commons-text - 1.10.0 + 1.11.0 commons-lang @@ -1307,10 +1307,6 @@ asm asm - - org.codehaus.jackson - jackson-mapper-asl - org.ow2.asm asm @@ -1807,28 +1803,7 @@ - - org.codehaus.jackson - jackson-core-asl - ${codehaus.jackson.version} - ${hadoop.deps.scope} - - - org.codehaus.jackson - jackson-mapper-asl - ${codehaus.jackson.version} - ${hadoop.deps.scope} - - - org.codehaus.jackson - jackson-xc - ${codehaus.jackson.version} - - - org.codehaus.jackson - jackson-jaxrs - ${codehaus.jackson.version} - + ${hive.group} hive-beeline diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 394328262818c..6e54c12d4786f 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -201,6 +201,16 @@ jersey-json test ${jersey-1.version} + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + com.sun.jersey diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 41f42fb9dc4e6..cc055fca2ca17 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -272,16 +272,6 @@ hadoop-2 - - org.codehaus.jackson - jackson-core-asl - test - - - org.codehaus.jackson - jackson-mapper-asl - test - diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java index f4d07d10a4357..3345a9ae6a5fa 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java @@ -534,7 +534,7 @@ public synchronized String getDelegationTokenFromMetaStore(String owner) try { Hive.closeCurrent(); - return Hive.get(hiveConf).getDelegationToken(owner, owner); + return Hive.getWithoutRegisterFns(hiveConf).getDelegationToken(owner, owner); } catch (HiveException e) { if (e.getCause() instanceof UnsupportedOperationException) { throw (UnsupportedOperationException)e.getCause(); diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java index 8e1e500ff78b4..53de7c673b43f 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java @@ -252,7 +252,7 @@ public static int setVariable(String varname, String varvalue) throws Exception ss.getHiveVariables().put(propName, substitution.substitute(ss.getConf(),varvalue)); } else if (varname.startsWith(METACONF_PREFIX)) { String propName = varname.substring(METACONF_PREFIX.length()); - Hive hive = Hive.get(ss.getConf()); + Hive hive = Hive.getWithoutRegisterFns(ss.getConf()); hive.setMetaConf(propName, substitution.substitute(ss.getConf(), varvalue)); } else { setConf(varname, varname, varvalue, true); @@ -413,7 +413,7 @@ public HiveConf getHiveConf() { @Override public IMetaStoreClient getMetaStoreClient() throws HiveSQLException { try { - return Hive.get(getHiveConf()).getMSC(); + return Hive.getWithoutRegisterFns(getHiveConf()).getMSC(); } catch (HiveException e) { throw new HiveSQLException("Failed to get metastore connection", e); } catch (MetaException e) { diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java index 6e153db6751d3..514b19eb7111a 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java @@ -54,7 +54,7 @@ public HiveSessionImplwithUGI(TProtocolVersion protocol, String username, String // create a new metastore connection for this particular user session Hive.set(null); try { - sessionHive = Hive.get(getHiveConf()); + sessionHive = Hive.getWithoutRegisterFns(getHiveConf()); } catch (HiveException e) { throw new HiveSQLException("Failed to setup metastore connection", e); } @@ -140,7 +140,7 @@ private void setDelegationToken(String delegationTokenStr) throws HiveSQLExcepti private void cancelDelegationToken() throws HiveSQLException { if (delegationTokenStr != null) { try { - Hive.get(getHiveConf()).cancelDelegationToken(delegationTokenStr); + Hive.getWithoutRegisterFns(getHiveConf()).cancelDelegationToken(delegationTokenStr); } catch (HiveException e) { throw new HiveSQLException("Couldn't cancel delegation token", e); } diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index ada6d91e77183..a37abdf55fc47 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -143,10 +143,6 @@ org.apache.httpcomponents httpclient - - org.codehaus.jackson - jackson-mapper-asl - commons-codec diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala index 07361cfdce9da..9284b35fb3e35 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.hive.test import java.io.File import java.net.URI -import java.util.{Set => JavaSet} import scala.collection.JavaConverters._ import scala.collection.mutable @@ -27,7 +26,6 @@ import scala.collection.mutable import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.hadoop.hive.conf.HiveConf.ConfVars -import org.apache.hadoop.hive.ql.exec.FunctionRegistry import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.apache.spark.{SparkConf, SparkContext} @@ -523,12 +521,6 @@ private[hive] class TestHiveSparkSession( } } - /** - * Records the UDFs present when the server starts, so we can delete ones that are created by - * tests. - */ - protected val originalUDFs: JavaSet[String] = FunctionRegistry.getFunctionNames - /** * Resets the test instance by deleting any table, view, temp view, and UDF that have been created */