Skip to content

Commit 6ca990f

Browse files
committed
[SPARK-13294][PROJECT INFRA] Remove MiMa's dependency on spark-class / Spark assembly
This patch removes the need to build a full Spark assembly before running the `dev/mima` script. - I modified the `tools` project to remove a direct dependency on Spark, so `sbt/sbt tools/fullClasspath` will now return the classpath for the `GenerateMIMAIgnore` class itself plus its own dependencies. - This required me to delete two classes full of dead code that we don't use anymore - `GenerateMIMAIgnore` now uses [ClassUtil](http://software.clapper.org/classutil/) to find all of the Spark classes rather than our homemade JAR traversal code. The problem in our own code was that it didn't handle folders of classes properly, which is necessary in order to generate excludes with an assembly-free Spark build. - `./dev/mima` no longer runs through `spark-class`, eliminating the need to reason about classpath ordering between `SPARK_CLASSPATH` and the assembly. Author: Josh Rosen <joshrosen@databricks.com> Closes #11178 from JoshRosen/remove-assembly-in-run-tests.
1 parent d18276c commit 6ca990f

File tree

8 files changed

+58
-576
lines changed

8 files changed

+58
-576
lines changed

dev/mima

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,24 +24,21 @@ set -e
2424
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
2525
cd "$FWDIR"
2626

27-
echo -e "q\n" | build/sbt oldDeps/update
27+
TOOLS_CLASSPATH="$(build/sbt "export tools/fullClasspath" | tail -n1)"
28+
2829
rm -f .generated-mima*
2930

3031
generate_mima_ignore() {
31-
SPARK_JAVA_OPTS="-XX:MaxPermSize=1g -Xmx2g" \
32-
./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore
32+
java \
33+
-XX:MaxPermSize=1g \
34+
-Xmx2g \
35+
-cp "$TOOLS_CLASSPATH:$1" \
36+
org.apache.spark.tools.GenerateMIMAIgnore
3337
}
3438

35-
# Generate Mima Ignore is called twice, first with latest built jars
36-
# on the classpath and then again with previous version jars on the classpath.
37-
# Because of a bug in GenerateMIMAIgnore that when old jars are ahead on classpath
38-
# it did not process the new classes (which are in assembly jar).
39-
generate_mima_ignore
40-
41-
export SPARK_CLASSPATH="$(build/sbt "export oldDeps/fullClasspath" | tail -n1)"
42-
echo "SPARK_CLASSPATH=$SPARK_CLASSPATH"
43-
44-
generate_mima_ignore
39+
SPARK_PROFILES="-Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive"
40+
generate_mima_ignore "$(build/sbt $SPARK_PROFILES "export assembly/fullClasspath" | tail -n1)"
41+
generate_mima_ignore "$(build/sbt $SPARK_PROFILES "export oldDeps/fullClasspath" | tail -n1)"
4542

4643
echo -e "q\n" | build/sbt mima-report-binary-issues | grep -v -e "info.*Resolving"
4744
ret_val=$?

dev/run-tests.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,6 @@ def build_spark_sbt(hadoop_version):
336336
# Enable all of the profiles for the build:
337337
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
338338
sbt_goals = ["package",
339-
"assembly/assembly",
340339
"streaming-kafka-assembly/assembly",
341340
"streaming-flume-assembly/assembly",
342341
"streaming-mqtt-assembly/assembly",
@@ -350,6 +349,16 @@ def build_spark_sbt(hadoop_version):
350349
exec_sbt(profiles_and_goals)
351350

352351

352+
def build_spark_assembly_sbt(hadoop_version):
353+
# Enable all of the profiles for the build:
354+
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
355+
sbt_goals = ["assembly/assembly"]
356+
profiles_and_goals = build_profiles + sbt_goals
357+
print("[info] Building Spark assembly (w/Hive 1.2.1) using SBT with these arguments: ",
358+
" ".join(profiles_and_goals))
359+
exec_sbt(profiles_and_goals)
360+
361+
353362
def build_apache_spark(build_tool, hadoop_version):
354363
"""Will build Spark against Hive v1.2.1 given the passed in build tool (either `sbt` or
355364
`maven`). Defaults to using `sbt`."""
@@ -561,11 +570,14 @@ def main():
561570
# spark build
562571
build_apache_spark(build_tool, hadoop_version)
563572

564-
# TODO Temporarily disable MiMA check for DF-to-DS migration prototyping
565-
# # backwards compatibility checks
566-
# if build_tool == "sbt":
567-
# # Note: compatiblity tests only supported in sbt for now
568-
# detect_binary_inop_with_mima()
573+
# backwards compatibility checks
574+
if build_tool == "sbt":
575+
# Note: compatibility tests only supported in sbt for now
576+
# TODO Temporarily disable MiMA check for DF-to-DS migration prototyping
577+
# detect_binary_inop_with_mima()
578+
# Since we did not build assembly/assembly before running dev/mima, we need to
579+
# do it here because the tests still rely on it; see SPARK-13294 for details.
580+
build_spark_assembly_sbt(hadoop_version)
569581

570582
# run the test suites
571583
run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags)

launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,10 @@
1717

1818
package org.apache.spark.launcher;
1919

20-
import java.io.File;
2120
import java.io.IOException;
2221
import java.util.ArrayList;
2322
import java.util.List;
2423
import java.util.Map;
25-
import java.util.regex.Pattern;
2624

2725
import static org.apache.spark.launcher.CommandBuilderUtils.*;
2826

@@ -76,26 +74,6 @@ public List<String> buildCommand(Map<String, String> env) throws IOException {
7674
javaOptsKeys.add("SPARK_DAEMON_JAVA_OPTS");
7775
javaOptsKeys.add("SPARK_SHUFFLE_OPTS");
7876
memKey = "SPARK_DAEMON_MEMORY";
79-
} else if (className.startsWith("org.apache.spark.tools.")) {
80-
String sparkHome = getSparkHome();
81-
File toolsDir = new File(join(File.separator, sparkHome, "tools", "target",
82-
"scala-" + getScalaVersion()));
83-
checkState(toolsDir.isDirectory(), "Cannot find tools build directory.");
84-
85-
Pattern re = Pattern.compile("spark-tools_.*\\.jar");
86-
for (File f : toolsDir.listFiles()) {
87-
if (re.matcher(f.getName()).matches()) {
88-
extraClassPath = f.getAbsolutePath();
89-
break;
90-
}
91-
}
92-
93-
checkState(extraClassPath != null,
94-
"Failed to find Spark Tools Jar in %s.\n" +
95-
"You need to run \"build/sbt tools/package\" before running %s.",
96-
toolsDir.getAbsolutePath(), className);
97-
98-
javaOptsKeys.add("SPARK_JAVA_OPTS");
9977
} else {
10078
javaOptsKeys.add("SPARK_JAVA_OPTS");
10179
memKey = "SPARK_DRIVER_MEMORY";

project/SparkBuild.scala

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -384,18 +384,19 @@ object OldDeps {
384384

385385
lazy val project = Project("oldDeps", file("dev"), settings = oldDepsSettings)
386386

387-
def versionArtifact(id: String): Option[sbt.ModuleID] = {
388-
val fullId = id + "_2.11"
389-
Some("org.apache.spark" % fullId % "1.2.0")
390-
}
391-
392387
def oldDepsSettings() = Defaults.coreDefaultSettings ++ Seq(
393388
name := "old-deps",
394389
scalaVersion := "2.10.5",
395-
libraryDependencies := Seq("spark-streaming-mqtt", "spark-streaming-zeromq",
396-
"spark-streaming-flume", "spark-streaming-twitter",
397-
"spark-streaming", "spark-mllib", "spark-graphx",
398-
"spark-core").map(versionArtifact(_).get intransitive())
390+
libraryDependencies := Seq(
391+
"spark-streaming-mqtt",
392+
"spark-streaming-zeromq",
393+
"spark-streaming-flume",
394+
"spark-streaming-twitter",
395+
"spark-streaming",
396+
"spark-mllib",
397+
"spark-graphx",
398+
"spark-core"
399+
).map(id => "org.apache.spark" % (id + "_2.11") % "1.2.0")
399400
)
400401
}
401402

tools/pom.xml

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,6 @@
3434
<url>http://spark.apache.org/</url>
3535

3636
<dependencies>
37-
<dependency>
38-
<groupId>org.apache.spark</groupId>
39-
<artifactId>spark-core_${scala.binary.version}</artifactId>
40-
<version>${project.version}</version>
41-
</dependency>
42-
<dependency>
43-
<groupId>org.apache.spark</groupId>
44-
<artifactId>spark-streaming_${scala.binary.version}</artifactId>
45-
<version>${project.version}</version>
46-
</dependency>
4737
<dependency>
4838
<groupId>org.scala-lang</groupId>
4939
<artifactId>scala-reflect</artifactId>
@@ -52,6 +42,11 @@
5242
<groupId>org.scala-lang</groupId>
5343
<artifactId>scala-compiler</artifactId>
5444
</dependency>
45+
<dependency>
46+
<groupId>org.clapper</groupId>
47+
<artifactId>classutil_${scala.binary.version}</artifactId>
48+
<version>1.0.6</version>
49+
</dependency>
5550
</dependencies>
5651

5752
<build>

tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala

Lines changed: 15 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,13 @@
1818
// scalastyle:off classforname
1919
package org.apache.spark.tools
2020

21-
import java.io.File
22-
import java.util.jar.JarFile
23-
2421
import scala.collection.mutable
25-
import scala.collection.JavaConverters._
2622
import scala.reflect.runtime.{universe => unv}
2723
import scala.reflect.runtime.universe.runtimeMirror
2824
import scala.util.Try
2925

26+
import org.clapper.classutil.ClassFinder
27+
3028
/**
3129
* A tool for generating classes to be excluded during binary checking with MIMA. It is expected
3230
* that this tool is run with ./spark-class.
@@ -42,12 +40,13 @@ object GenerateMIMAIgnore {
4240
private val classLoader = Thread.currentThread().getContextClassLoader
4341
private val mirror = runtimeMirror(classLoader)
4442

43+
private def isDeveloperApi(sym: unv.Symbol) = sym.annotations.exists {
44+
_.tpe =:= mirror.staticClass("org.apache.spark.annotation.DeveloperApi").toType
45+
}
4546

46-
private def isDeveloperApi(sym: unv.Symbol) =
47-
sym.annotations.exists(_.tpe =:= unv.typeOf[org.apache.spark.annotation.DeveloperApi])
48-
49-
private def isExperimental(sym: unv.Symbol) =
50-
sym.annotations.exists(_.tpe =:= unv.typeOf[org.apache.spark.annotation.Experimental])
47+
private def isExperimental(sym: unv.Symbol) = sym.annotations.exists {
48+
_.tpe =:= mirror.staticClass("org.apache.spark.annotation.Experimental").toType
49+
}
5150

5251

5352
private def isPackagePrivate(sym: unv.Symbol) =
@@ -160,35 +159,13 @@ object GenerateMIMAIgnore {
160159
* and subpackages both from directories and jars present on the classpath.
161160
*/
162161
private def getClasses(packageName: String): Set[String] = {
163-
val path = packageName.replace('.', '/')
164-
val resources = classLoader.getResources(path)
165-
166-
val jars = resources.asScala.filter(_.getProtocol == "jar")
167-
.map(_.getFile.split(":")(1).split("!")(0)).toSeq
168-
169-
jars.flatMap(getClassesFromJar(_, path))
170-
.map(_.getName)
171-
.filterNot(shouldExclude).toSet
172-
}
173-
174-
/**
175-
* Get all classes in a package from a jar file.
176-
*/
177-
private def getClassesFromJar(jarPath: String, packageName: String) = {
178-
import scala.collection.mutable
179-
val jar = new JarFile(new File(jarPath))
180-
val enums = jar.entries().asScala.map(_.getName).filter(_.startsWith(packageName))
181-
val classes = mutable.HashSet[Class[_]]()
182-
for (entry <- enums if entry.endsWith(".class")) {
183-
try {
184-
classes += Class.forName(entry.replace('/', '.').stripSuffix(".class"), false, classLoader)
185-
} catch {
186-
// scalastyle:off println
187-
case _: Throwable => println("Unable to load:" + entry)
188-
// scalastyle:on println
189-
}
190-
}
191-
classes
162+
val finder = ClassFinder()
163+
finder
164+
.getClasses
165+
.map(_.name)
166+
.filter(_.startsWith(packageName))
167+
.filterNot(shouldExclude)
168+
.toSet
192169
}
193170
}
194171
// scalastyle:on classforname

0 commit comments

Comments
 (0)