Skip to content

Commit dc126f2

Browse files
committed
SPARK-1094 Support MiMa for reporting binary compatibility accross versions.
This adds some changes on top of the initial work by @ScrapCodes in #20: The goal here is to do automated checking of Spark commits to determine whether they break binary compatibility. 1. Special case for inner classes of package-private objects. 2. Made tools classes accessible when running `spark-class`. 3. Made some declared types in MLLib more general. 4. Various other improvements to exclude-generation script. 5. In-code documentation. Author: Patrick Wendell <pwendell@gmail.com> Author: Prashant Sharma <prashant.s@imaginea.com> Author: Prashant Sharma <scrapcodes@gmail.com> Closes #207 from pwendell/mima and squashes the following commits: 22ae267 [Patrick Wendell] New binary changes after upmerge 6c2030d [Patrick Wendell] Merge remote-tracking branch 'apache/master' into mima 3666cf1 [Patrick Wendell] Minor style change 0e0f570 [Patrick Wendell] Small fix and removing directory listings 647c547 [Patrick Wendell] Reveiw feedback. c39f3b5 [Patrick Wendell] Some enhancements to binary checking. 4c771e0 [Prashant Sharma] Added a tool to generate mima excludes and also adapted build to pick automatically. b551519 [Prashant Sharma] adding a new exclude after rebasing with master 651844c [Prashant Sharma] Support MiMa for reporting binary compatibility accross versions.
1 parent 8043b7b commit dc126f2

File tree

8 files changed

+249
-7
lines changed

8 files changed

+249
-7
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
sbt/*.jar
88
.settings
99
.cache
10+
.mima-excludes
1011
/build/
1112
work/
1213
out/

bin/compute-classpath.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
5858
CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/classes"
5959
CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/classes"
6060
CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/classes"
61+
CLASSPATH="$CLASSPATH:$FWDIR/tools/target/scala-$SCALA_VERSION/classes"
6162
CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/classes"
6263
CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
6364
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"

bin/spark-class

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,7 @@ fi
137137

138138
# Compute classpath using external script
139139
CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
140-
141-
if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
140+
if [[ "$1" =~ org.apache.spark.tools.* ]]; then
142141
CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"
143142
fi
144143

dev/run-tests

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,10 @@ if [ -z "$PYSPARK_PYTHON" ]; then
5959
export PYSPARK_PYTHON=/usr/local/bin/python2.7
6060
fi
6161
./python/run-tests
62+
63+
echo "========================================================================="
64+
echo "Detecting binary incompatibilites with MiMa"
65+
echo "========================================================================="
66+
./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore
67+
sbt/sbt mima-report-binary-issues
68+

project/MimaBuild.scala

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
import com.typesafe.tools.mima.plugin.MimaKeys.{binaryIssueFilters, previousArtifact}
19+
import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings
20+
import sbt._
21+
22+
object MimaBuild {
23+
24+
def ignoredABIProblems(base: File) = {
25+
import com.typesafe.tools.mima.core._
26+
import com.typesafe.tools.mima.core.ProblemFilters._
27+
28+
// Excludes placed here will be used for all Spark versions
29+
val defaultExcludes = Seq()
30+
31+
// Read package-private excludes from file
32+
val excludeFilePath = (base.getAbsolutePath + "/.mima-excludes")
33+
val excludeFile = file(excludeFilePath)
34+
val packagePrivateList: Seq[String] =
35+
if (!excludeFile.exists()) {
36+
Seq()
37+
} else {
38+
IO.read(excludeFile).split("\n")
39+
}
40+
41+
def excludeClass(className: String) = {
42+
Seq(
43+
excludePackage(className),
44+
ProblemFilters.exclude[MissingClassProblem](className),
45+
ProblemFilters.exclude[MissingTypesProblem](className),
46+
excludePackage(className + "$"),
47+
ProblemFilters.exclude[MissingClassProblem](className + "$"),
48+
ProblemFilters.exclude[MissingTypesProblem](className + "$")
49+
)
50+
}
51+
def excludeSparkClass(className: String) = excludeClass("org.apache.spark." + className)
52+
53+
val packagePrivateExcludes = packagePrivateList.flatMap(excludeClass)
54+
55+
/* Excludes specific to a given version of Spark. When comparing the given version against
56+
its immediate predecessor, the excludes listed here will be applied. */
57+
val versionExcludes =
58+
SparkBuild.SPARK_VERSION match {
59+
case v if v.startsWith("1.0") =>
60+
Seq(
61+
excludePackage("org.apache.spark.api.java"),
62+
excludePackage("org.apache.spark.streaming.api.java"),
63+
excludePackage("org.apache.spark.mllib")
64+
) ++
65+
excludeSparkClass("rdd.ClassTags") ++
66+
excludeSparkClass("util.XORShiftRandom") ++
67+
excludeSparkClass("mllib.recommendation.MFDataGenerator") ++
68+
excludeSparkClass("mllib.optimization.SquaredGradient") ++
69+
excludeSparkClass("mllib.regression.RidgeRegressionWithSGD") ++
70+
excludeSparkClass("mllib.regression.LassoWithSGD") ++
71+
excludeSparkClass("mllib.regression.LinearRegressionWithSGD")
72+
case _ => Seq()
73+
}
74+
75+
defaultExcludes ++ packagePrivateExcludes ++ versionExcludes
76+
}
77+
78+
def mimaSettings(sparkHome: File) = mimaDefaultSettings ++ Seq(
79+
previousArtifact := None,
80+
binaryIssueFilters ++= ignoredABIProblems(sparkHome)
81+
)
82+
83+
}

project/SparkBuild.scala

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,16 @@ import sbtassembly.Plugin._
2222
import AssemblyKeys._
2323
import scala.util.Properties
2424
import org.scalastyle.sbt.ScalastylePlugin.{Settings => ScalaStyleSettings}
25+
import com.typesafe.tools.mima.plugin.MimaKeys.previousArtifact
2526

2627
import scala.collection.JavaConversions._
2728

2829
// For Sonatype publishing
2930
//import com.jsuereth.pgp.sbtplugin.PgpKeys._
3031

3132
object SparkBuild extends Build {
33+
val SPARK_VERSION = "1.0.0-SNAPSHOT"
34+
3235
// Hadoop version to build against. For example, "1.0.4" for Apache releases, or
3336
// "2.0.0-mr1-cdh4.2.0" for Cloudera Hadoop. Note that these variables can be set
3437
// through the environment variables SPARK_HADOOP_VERSION and SPARK_YARN.
@@ -146,9 +149,9 @@ object SparkBuild extends Build {
146149
lazy val allProjects = packageProjects ++ allExternalRefs ++
147150
Seq[ProjectReference](examples, tools, assemblyProj, hive) ++ maybeJava8Tests
148151

149-
def sharedSettings = Defaults.defaultSettings ++ Seq(
152+
def sharedSettings = Defaults.defaultSettings ++ MimaBuild.mimaSettings(file(sparkHome)) ++ Seq(
150153
organization := "org.apache.spark",
151-
version := "1.0.0-SNAPSHOT",
154+
version := SPARK_VERSION,
152155
scalaVersion := "2.10.3",
153156
scalacOptions := Seq("-Xmax-classfile-name", "120", "-unchecked", "-deprecation",
154157
"-target:" + SCALAC_JVM_VERSION),
@@ -284,9 +287,14 @@ object SparkBuild extends Build {
284287
val excludeSLF4J = ExclusionRule(organization = "org.slf4j")
285288
val excludeScalap = ExclusionRule(organization = "org.scala-lang", artifact = "scalap")
286289

290+
def sparkPreviousArtifact(id: String, organization: String = "org.apache.spark",
291+
version: String = "0.9.0-incubating", crossVersion: String = "2.10"): Option[sbt.ModuleID] = {
292+
val fullId = if (crossVersion.isEmpty) id else id + "_" + crossVersion
293+
Some(organization % fullId % version) // the artifact to compare binary compatibility with
294+
}
295+
287296
def coreSettings = sharedSettings ++ Seq(
288297
name := "spark-core",
289-
290298
libraryDependencies ++= Seq(
291299
"com.google.guava" % "guava" % "14.0.1",
292300
"com.google.code.findbugs" % "jsr305" % "1.3.9",
@@ -325,7 +333,7 @@ object SparkBuild extends Build {
325333
publish := {}
326334
)
327335

328-
def replSettings = sharedSettings ++ Seq(
336+
def replSettings = sharedSettings ++ Seq(
329337
name := "spark-repl",
330338
libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "scala-compiler" % v ),
331339
libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "jline" % v ),
@@ -354,17 +362,20 @@ object SparkBuild extends Build {
354362

355363
def graphxSettings = sharedSettings ++ Seq(
356364
name := "spark-graphx",
365+
previousArtifact := sparkPreviousArtifact("spark-graphx"),
357366
libraryDependencies ++= Seq(
358367
"org.jblas" % "jblas" % "1.2.3"
359368
)
360369
)
361370

362371
def bagelSettings = sharedSettings ++ Seq(
363-
name := "spark-bagel"
372+
name := "spark-bagel",
373+
previousArtifact := sparkPreviousArtifact("spark-bagel")
364374
)
365375

366376
def mllibSettings = sharedSettings ++ Seq(
367377
name := "spark-mllib",
378+
previousArtifact := sparkPreviousArtifact("spark-mllib"),
368379
libraryDependencies ++= Seq(
369380
"org.jblas" % "jblas" % "1.2.3",
370381
"org.scalanlp" %% "breeze" % "0.7"
@@ -428,6 +439,7 @@ object SparkBuild extends Build {
428439

429440
def streamingSettings = sharedSettings ++ Seq(
430441
name := "spark-streaming",
442+
previousArtifact := sparkPreviousArtifact("spark-streaming"),
431443
libraryDependencies ++= Seq(
432444
"commons-io" % "commons-io" % "2.4"
433445
)
@@ -503,13 +515,15 @@ object SparkBuild extends Build {
503515

504516
def twitterSettings() = sharedSettings ++ Seq(
505517
name := "spark-streaming-twitter",
518+
previousArtifact := sparkPreviousArtifact("spark-streaming-twitter"),
506519
libraryDependencies ++= Seq(
507520
"org.twitter4j" % "twitter4j-stream" % "3.0.3" excludeAll(excludeNetty)
508521
)
509522
)
510523

511524
def kafkaSettings() = sharedSettings ++ Seq(
512525
name := "spark-streaming-kafka",
526+
previousArtifact := sparkPreviousArtifact("spark-streaming-kafka"),
513527
libraryDependencies ++= Seq(
514528
"com.github.sgroschupf" % "zkclient" % "0.1" excludeAll(excludeNetty),
515529
"org.apache.kafka" %% "kafka" % "0.8.0"
@@ -522,20 +536,23 @@ object SparkBuild extends Build {
522536

523537
def flumeSettings() = sharedSettings ++ Seq(
524538
name := "spark-streaming-flume",
539+
previousArtifact := sparkPreviousArtifact("spark-streaming-flume"),
525540
libraryDependencies ++= Seq(
526541
"org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty)
527542
)
528543
)
529544

530545
def zeromqSettings() = sharedSettings ++ Seq(
531546
name := "spark-streaming-zeromq",
547+
previousArtifact := sparkPreviousArtifact("spark-streaming-zeromq"),
532548
libraryDependencies ++= Seq(
533549
"org.spark-project.akka" %% "akka-zeromq" % "2.2.3-shaded-protobuf" excludeAll(excludeNetty)
534550
)
535551
)
536552

537553
def mqttSettings() = streamingSettings ++ Seq(
538554
name := "spark-streaming-mqtt",
555+
previousArtifact := sparkPreviousArtifact("spark-streaming-mqtt"),
539556
libraryDependencies ++= Seq("org.eclipse.paho" % "mqtt-client" % "0.4.0")
540557
)
541558
}

project/plugins.sbt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,6 @@ addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.4")
1919

2020
addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.4.0")
2121

22+
addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.1.6")
23+
2224
addSbtPlugin("com.alpinenow" % "junit_xml_listener" % "0.5.0")
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.tools
19+
20+
import java.io.File
21+
import java.util.jar.JarFile
22+
23+
import scala.collection.mutable
24+
import scala.collection.JavaConversions._
25+
import scala.reflect.runtime.universe.runtimeMirror
26+
27+
/**
28+
* A tool for generating classes to be excluded during binary checking with MIMA. It is expected
29+
* that this tool is run with ./spark-class.
30+
*
31+
* MIMA itself only supports JVM-level visibility and doesn't account for package-private classes.
32+
* This tool looks at all currently package-private classes and generates exclusions for them. Note
33+
* that this approach is not sound. It can lead to false positives if we move or rename a previously
34+
* package-private class. It can lead to false negatives if someone explicitly makes a class
35+
* package-private that wasn't before. This exists only to help catch certain classes of changes
36+
* which might be difficult to catch during review.
37+
*/
38+
object GenerateMIMAIgnore {
39+
private val classLoader = Thread.currentThread().getContextClassLoader
40+
private val mirror = runtimeMirror(classLoader)
41+
42+
private def classesPrivateWithin(packageName: String): Set[String] = {
43+
44+
val classes = getClasses(packageName, classLoader)
45+
val privateClasses = mutable.HashSet[String]()
46+
47+
def isPackagePrivate(className: String) = {
48+
try {
49+
/* Couldn't figure out if it's possible to determine a-priori whether a given symbol
50+
is a module or class. */
51+
52+
val privateAsClass = mirror
53+
.staticClass(className)
54+
.privateWithin
55+
.fullName
56+
.startsWith(packageName)
57+
58+
val privateAsModule = mirror
59+
.staticModule(className)
60+
.privateWithin
61+
.fullName
62+
.startsWith(packageName)
63+
64+
privateAsClass || privateAsModule
65+
} catch {
66+
case _: Throwable => {
67+
println("Error determining visibility: " + className)
68+
false
69+
}
70+
}
71+
}
72+
73+
for (className <- classes) {
74+
val directlyPrivateSpark = isPackagePrivate(className)
75+
76+
/* Inner classes defined within a private[spark] class or object are effectively
77+
invisible, so we account for them as package private. */
78+
val indirectlyPrivateSpark = {
79+
val maybeOuter = className.toString.takeWhile(_ != '$')
80+
if (maybeOuter != className) {
81+
isPackagePrivate(maybeOuter)
82+
} else {
83+
false
84+
}
85+
}
86+
if (directlyPrivateSpark || indirectlyPrivateSpark) privateClasses += className
87+
}
88+
privateClasses.flatMap(c => Seq(c, c.replace("$", "#"))).toSet
89+
}
90+
91+
def main(args: Array[String]) {
92+
scala.tools.nsc.io.File(".mima-excludes").
93+
writeAll(classesPrivateWithin("org.apache.spark").mkString("\n"))
94+
println("Created : .mima-excludes in current directory.")
95+
}
96+
97+
98+
private def shouldExclude(name: String) = {
99+
// Heuristic to remove JVM classes that do not correspond to user-facing classes in Scala
100+
name.contains("anon") ||
101+
name.endsWith("$class") ||
102+
name.contains("$sp")
103+
}
104+
105+
/**
106+
* Scans all classes accessible from the context class loader which belong to the given package
107+
* and subpackages both from directories and jars present on the classpath.
108+
*/
109+
private def getClasses(packageName: String,
110+
classLoader: ClassLoader = Thread.currentThread().getContextClassLoader): Set[String] = {
111+
val path = packageName.replace('.', '/')
112+
val resources = classLoader.getResources(path)
113+
114+
val jars = resources.filter(x => x.getProtocol == "jar")
115+
.map(_.getFile.split(":")(1).split("!")(0)).toSeq
116+
117+
jars.flatMap(getClassesFromJar(_, path))
118+
.map(_.getName)
119+
.filterNot(shouldExclude).toSet
120+
}
121+
122+
/**
123+
* Get all classes in a package from a jar file.
124+
*/
125+
private def getClassesFromJar(jarPath: String, packageName: String) = {
126+
val jar = new JarFile(new File(jarPath))
127+
val enums = jar.entries().map(_.getName).filter(_.startsWith(packageName))
128+
val classes = for (entry <- enums if entry.endsWith(".class"))
129+
yield Class.forName(entry.replace('/', '.').stripSuffix(".class"))
130+
classes
131+
}
132+
}

0 commit comments

Comments
 (0)