diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java index a9d0e41aa3cbb..77a395ed1d2f1 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java @@ -145,6 +145,8 @@ List buildClassPath(String appClassPath) throws IOException { boolean prependClasses = !isEmpty(getenv("SPARK_PREPEND_CLASSES")); boolean isTesting = "1".equals(getenv("SPARK_TESTING")); + boolean isTestingSql = "1".equals(getenv("SPARK_SQL_TESTING")); + String jarsDir = findJarsDir(getSparkHome(), getScalaVersion(), !isTesting && !isTestingSql); if (prependClasses || isTesting) { String scala = getScalaVersion(); List projects = Arrays.asList( @@ -176,6 +178,9 @@ List buildClassPath(String appClassPath) throws IOException { "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark classes ahead of " + "assembly."); } + boolean shouldPrePendSparkHive = isJarAvailable(jarsDir, "spark-hive_"); + boolean shouldPrePendSparkHiveThriftServer = + shouldPrePendSparkHive && isJarAvailable(jarsDir, "spark-hive-thriftserver_"); for (String project : projects) { // Do not use locally compiled class files for Spark server because it should use shaded // dependencies. @@ -185,6 +190,24 @@ List buildClassPath(String appClassPath) throws IOException { if (isRemote && "1".equals(getenv("SPARK_SCALA_SHELL")) && project.equals("sql/core")) { continue; } + // SPARK-49534: The assumption here is that if `spark-hive_xxx.jar` is not in the + // classpath, then the `-Phive` profile was not used during package, and therefore + // the Hive-related jars should also not be in the classpath. To avoid failure in + // loading the SPI in `DataSourceRegister` under `sql/hive`, no longer prepend `sql/hive`. + if (!shouldPrePendSparkHive && project.equals("sql/hive")) { + continue; + } + // SPARK-49534: Meanwhile, due to the strong dependency of `sql/hive-thriftserver` + // on `sql/hive`, the prepend for `sql/hive-thriftserver` will also be excluded + // if `spark-hive_xxx.jar` is not in the classpath. On the other hand, if + // `spark-hive-thriftserver_xxx.jar` is not in the classpath, then the + // `-Phive-thriftserver` profile was not used during package, and therefore, + // jars such as hive-cli and hive-beeline should also not be included in the classpath. + // To avoid the inelegant startup failures of tools such as spark-sql, in this scenario, + // `sql/hive-thriftserver` will no longer be prepended to the classpath. + if (!shouldPrePendSparkHiveThriftServer && project.equals("sql/hive-thriftserver")) { + continue; + } addToClassPath(cp, String.format("%s/%s/target/scala-%s/classes", sparkHome, project, scala)); } @@ -205,8 +228,6 @@ List buildClassPath(String appClassPath) throws IOException { // Add Spark jars to the classpath. For the testing case, we rely on the test code to set and // propagate the test classpath appropriately. For normal invocation, look for the jars // directory under SPARK_HOME. - boolean isTestingSql = "1".equals(getenv("SPARK_SQL_TESTING")); - String jarsDir = findJarsDir(getSparkHome(), getScalaVersion(), !isTesting && !isTestingSql); if (jarsDir != null) { // Place slf4j-api-* jar first to be robust for (File f: new File(jarsDir).listFiles()) { @@ -265,6 +286,23 @@ private void addToClassPath(Set cp, String entries) { } } + /** + * Checks if the spark-hive jar file is available in the specified directory. + * + * @param jarsDir the directory to search for spark-hive jar files + * @return true if a file starting with "spark-hive_" is found, false otherwise + */ + private boolean isJarAvailable(String jarsDir, String jarNamePrefix) { + if (jarsDir != null) { + for (File f : new File(jarsDir).listFiles()) { + if (f.getName().startsWith(jarNamePrefix)) { + return true; + } + } + } + return false; + } + String getScalaVersion() { String scala = getenv("SPARK_SCALA_VERSION"); if (scala != null) {