Skip to content

Commit 95a48ac

Browse files
author
Davies Liu
committed
Merge branch 'master' of github.com:apache/spark into randomSplit
2 parents 0d9b256 + 484fecb commit 95a48ac

File tree

250 files changed

+9834
-2470
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

250 files changed

+9834
-2470
lines changed

.rat-excludes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ SparkImports.scala
4444
SparkJLineCompletion.scala
4545
SparkJLineReader.scala
4646
SparkMemberHandlers.scala
47+
SparkReplReporter.scala
4748
sbt
4849
sbt-launch-lib.bash
4950
plugins.sbt

assembly/pom.xml

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,22 +66,22 @@
6666
</dependency>
6767
<dependency>
6868
<groupId>org.apache.spark</groupId>
69-
<artifactId>spark-repl_${scala.binary.version}</artifactId>
69+
<artifactId>spark-streaming_${scala.binary.version}</artifactId>
7070
<version>${project.version}</version>
7171
</dependency>
7272
<dependency>
7373
<groupId>org.apache.spark</groupId>
74-
<artifactId>spark-streaming_${scala.binary.version}</artifactId>
74+
<artifactId>spark-graphx_${scala.binary.version}</artifactId>
7575
<version>${project.version}</version>
7676
</dependency>
7777
<dependency>
7878
<groupId>org.apache.spark</groupId>
79-
<artifactId>spark-graphx_${scala.binary.version}</artifactId>
79+
<artifactId>spark-sql_${scala.binary.version}</artifactId>
8080
<version>${project.version}</version>
8181
</dependency>
8282
<dependency>
8383
<groupId>org.apache.spark</groupId>
84-
<artifactId>spark-sql_${scala.binary.version}</artifactId>
84+
<artifactId>spark-repl_${scala.binary.version}</artifactId>
8585
<version>${project.version}</version>
8686
</dependency>
8787
</dependencies>
@@ -197,6 +197,11 @@
197197
<artifactId>spark-hive_${scala.binary.version}</artifactId>
198198
<version>${project.version}</version>
199199
</dependency>
200+
</dependencies>
201+
</profile>
202+
<profile>
203+
<id>hive-thriftserver</id>
204+
<dependencies>
200205
<dependency>
201206
<groupId>org.apache.spark</groupId>
202207
<artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>

bin/compute-classpath.sh

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@
2020
# This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
2121
# script and the ExecutorRunner in standalone cluster mode.
2222

23-
SCALA_VERSION=2.10
24-
2523
# Figure out where Spark is installed
2624
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
2725

@@ -36,7 +34,7 @@ else
3634
CLASSPATH="$CLASSPATH:$FWDIR/conf"
3735
fi
3836

39-
ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SCALA_VERSION"
37+
ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SPARK_SCALA_VERSION"
4038

4139
if [ -n "$JAVA_HOME" ]; then
4240
JAR_CMD="$JAVA_HOME/bin/jar"
@@ -48,19 +46,19 @@ fi
4846
if [ -n "$SPARK_PREPEND_CLASSES" ]; then
4947
echo "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark"\
5048
"classes ahead of assembly." >&2
51-
CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/classes"
49+
CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SPARK_SCALA_VERSION/classes"
5250
CLASSPATH="$CLASSPATH:$FWDIR/core/target/jars/*"
53-
CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/classes"
54-
CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/classes"
55-
CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/classes"
56-
CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/classes"
57-
CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/classes"
58-
CLASSPATH="$CLASSPATH:$FWDIR/tools/target/scala-$SCALA_VERSION/classes"
59-
CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/classes"
60-
CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
61-
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"
62-
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive-thriftserver/target/scala-$SCALA_VERSION/classes"
63-
CLASSPATH="$CLASSPATH:$FWDIR/yarn/stable/target/scala-$SCALA_VERSION/classes"
51+
CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SPARK_SCALA_VERSION/classes"
52+
CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SPARK_SCALA_VERSION/classes"
53+
CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SPARK_SCALA_VERSION/classes"
54+
CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SPARK_SCALA_VERSION/classes"
55+
CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SPARK_SCALA_VERSION/classes"
56+
CLASSPATH="$CLASSPATH:$FWDIR/tools/target/scala-$SPARK_SCALA_VERSION/classes"
57+
CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SPARK_SCALA_VERSION/classes"
58+
CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SPARK_SCALA_VERSION/classes"
59+
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SPARK_SCALA_VERSION/classes"
60+
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive-thriftserver/target/scala-$SPARK_SCALA_VERSION/classes"
61+
CLASSPATH="$CLASSPATH:$FWDIR/yarn/stable/target/scala-$SPARK_SCALA_VERSION/classes"
6462
fi
6563

6664
# Use spark-assembly jar from either RELEASE or assembly directory
@@ -123,15 +121,15 @@ fi
123121

124122
# Add test classes if we're running from SBT or Maven with SPARK_TESTING set to 1
125123
if [[ $SPARK_TESTING == 1 ]]; then
126-
CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/test-classes"
127-
CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/test-classes"
128-
CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/test-classes"
129-
CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/test-classes"
130-
CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/test-classes"
131-
CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/test-classes"
132-
CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/test-classes"
133-
CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/test-classes"
134-
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/test-classes"
124+
CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SPARK_SCALA_VERSION/test-classes"
125+
CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SPARK_SCALA_VERSION/test-classes"
126+
CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SPARK_SCALA_VERSION/test-classes"
127+
CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SPARK_SCALA_VERSION/test-classes"
128+
CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SPARK_SCALA_VERSION/test-classes"
129+
CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SPARK_SCALA_VERSION/test-classes"
130+
CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SPARK_SCALA_VERSION/test-classes"
131+
CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SPARK_SCALA_VERSION/test-classes"
132+
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SPARK_SCALA_VERSION/test-classes"
135133
fi
136134

137135
# Add hadoop conf dir if given -- otherwise FileSystem.*, etc fail !

bin/load-spark-env.sh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,23 @@ if [ -z "$SPARK_ENV_LOADED" ]; then
3636
set +a
3737
fi
3838
fi
39+
40+
# Setting SPARK_SCALA_VERSION if not already set.
41+
42+
if [ -z "$SPARK_SCALA_VERSION" ]; then
43+
44+
ASSEMBLY_DIR2="$FWDIR/assembly/target/scala-2.11"
45+
ASSEMBLY_DIR1="$FWDIR/assembly/target/scala-2.10"
46+
47+
if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1" ]]; then
48+
echo -e "Presence of build for both scala versions(SCALA 2.10 and SCALA 2.11) detected." 1>&2
49+
echo -e 'Either clean one of them or, export SPARK_SCALA_VERSION=2.11 in spark-env.sh.' 1>&2
50+
exit 1
51+
fi
52+
53+
if [ -d "$ASSEMBLY_DIR2" ]; then
54+
export SPARK_SCALA_VERSION="2.11"
55+
else
56+
export SPARK_SCALA_VERSION="2.10"
57+
fi
58+
fi

bin/pyspark

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ export SPARK_HOME="$FWDIR"
2525

2626
source "$FWDIR/bin/utils.sh"
2727

28-
SCALA_VERSION=2.10
28+
source "$FWDIR"/bin/load-spark-env.sh
2929

3030
function usage() {
3131
echo "Usage: ./bin/pyspark [options]" 1>&2
@@ -40,16 +40,14 @@ fi
4040
# Exit if the user hasn't compiled Spark
4141
if [ ! -f "$FWDIR/RELEASE" ]; then
4242
# Exit if the user hasn't compiled Spark
43-
ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null
43+
ls "$FWDIR"/assembly/target/scala-$SPARK_SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null
4444
if [[ $? != 0 ]]; then
4545
echo "Failed to find Spark assembly in $FWDIR/assembly/target" 1>&2
4646
echo "You need to build Spark before running this program" 1>&2
4747
exit 1
4848
fi
4949
fi
5050

51-
. "$FWDIR"/bin/load-spark-env.sh
52-
5351
# In Spark <= 1.1, setting IPYTHON=1 would cause the driver to be launched using the `ipython`
5452
# executable, while the worker would still be launched using PYSPARK_PYTHON.
5553
#

bin/run-example

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717
# limitations under the License.
1818
#
1919

20-
SCALA_VERSION=2.10
21-
2220
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
2321
export SPARK_HOME="$FWDIR"
2422
EXAMPLES_DIR="$FWDIR"/examples
2523

24+
. "$FWDIR"/bin/load-spark-env.sh
25+
2626
if [ -n "$1" ]; then
2727
EXAMPLE_CLASS="$1"
2828
shift
@@ -36,8 +36,8 @@ fi
3636

3737
if [ -f "$FWDIR/RELEASE" ]; then
3838
export SPARK_EXAMPLES_JAR="`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`"
39-
elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
40-
export SPARK_EXAMPLES_JAR="`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar`"
39+
elif [ -e "$EXAMPLES_DIR"/target/scala-$SPARK_SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
40+
export SPARK_EXAMPLES_JAR="`ls "$EXAMPLES_DIR"/target/scala-$SPARK_SCALA_VERSION/spark-examples-*hadoop*.jar`"
4141
fi
4242

4343
if [[ -z "$SPARK_EXAMPLES_JAR" ]]; then

bin/spark-class

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@ case "`uname`" in
2424
CYGWIN*) cygwin=true;;
2525
esac
2626

27-
SCALA_VERSION=2.10
28-
2927
# Figure out where Spark is installed
3028
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
3129

@@ -128,9 +126,9 @@ fi
128126

129127
TOOLS_DIR="$FWDIR"/tools
130128
SPARK_TOOLS_JAR=""
131-
if [ -e "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar ]; then
129+
if [ -e "$TOOLS_DIR"/target/scala-$SPARK_SCALA_VERSION/spark-tools*[0-9Tg].jar ]; then
132130
# Use the JAR from the SBT build
133-
export SPARK_TOOLS_JAR="`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar`"
131+
export SPARK_TOOLS_JAR="`ls "$TOOLS_DIR"/target/scala-$SPARK_SCALA_VERSION/spark-tools*[0-9Tg].jar`"
134132
fi
135133
if [ -e "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar ]; then
136134
# Use the JAR from the Maven build
@@ -149,7 +147,7 @@ fi
149147

150148
if [[ "$1" =~ org.apache.spark.tools.* ]]; then
151149
if test -z "$SPARK_TOOLS_JAR"; then
152-
echo "Failed to find Spark Tools Jar in $FWDIR/tools/target/scala-$SCALA_VERSION/" 1>&2
150+
echo "Failed to find Spark Tools Jar in $FWDIR/tools/target/scala-$SPARK_SCALA_VERSION/" 1>&2
153151
echo "You need to build Spark before running $1." 1>&2
154152
exit 1
155153
fi

core/pom.xml

Lines changed: 40 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,34 @@
3434
<name>Spark Project Core</name>
3535
<url>http://spark.apache.org/</url>
3636
<dependencies>
37+
<dependency>
38+
<groupId>com.twitter</groupId>
39+
<artifactId>chill_${scala.binary.version}</artifactId>
40+
<exclusions>
41+
<exclusion>
42+
<groupId>org.ow2.asm</groupId>
43+
<artifactId>asm</artifactId>
44+
</exclusion>
45+
<exclusion>
46+
<groupId>org.ow2.asm</groupId>
47+
<artifactId>asm-commons</artifactId>
48+
</exclusion>
49+
</exclusions>
50+
</dependency>
51+
<dependency>
52+
<groupId>com.twitter</groupId>
53+
<artifactId>chill-java</artifactId>
54+
<exclusions>
55+
<exclusion>
56+
<groupId>org.ow2.asm</groupId>
57+
<artifactId>asm</artifactId>
58+
</exclusion>
59+
<exclusion>
60+
<groupId>org.ow2.asm</groupId>
61+
<artifactId>asm-commons</artifactId>
62+
</exclusion>
63+
</exclusions>
64+
</dependency>
3765
<dependency>
3866
<groupId>org.apache.hadoop</groupId>
3967
<artifactId>hadoop-client</artifactId>
@@ -46,12 +74,12 @@
4674
</dependency>
4775
<dependency>
4876
<groupId>org.apache.spark</groupId>
49-
<artifactId>spark-network-common_2.10</artifactId>
77+
<artifactId>spark-network-common_${scala.binary.version}</artifactId>
5078
<version>${project.version}</version>
5179
</dependency>
5280
<dependency>
5381
<groupId>org.apache.spark</groupId>
54-
<artifactId>spark-network-shuffle_2.10</artifactId>
82+
<artifactId>spark-network-shuffle_${scala.binary.version}</artifactId>
5583
<version>${project.version}</version>
5684
</dependency>
5785
<dependency>
@@ -132,14 +160,6 @@
132160
<groupId>net.jpountz.lz4</groupId>
133161
<artifactId>lz4</artifactId>
134162
</dependency>
135-
<dependency>
136-
<groupId>com.twitter</groupId>
137-
<artifactId>chill_${scala.binary.version}</artifactId>
138-
</dependency>
139-
<dependency>
140-
<groupId>com.twitter</groupId>
141-
<artifactId>chill-java</artifactId>
142-
</dependency>
143163
<dependency>
144164
<groupId>org.roaringbitmap</groupId>
145165
<artifactId>RoaringBitmap</artifactId>
@@ -204,13 +224,6 @@
204224
<artifactId>derby</artifactId>
205225
<scope>test</scope>
206226
</dependency>
207-
<dependency>
208-
<groupId>org.tachyonproject</groupId>
209-
<artifactId>tachyon</artifactId>
210-
<version>0.5.0</version>
211-
<type>test-jar</type>
212-
<scope>test</scope>
213-
</dependency>
214227
<dependency>
215228
<groupId>org.tachyonproject</groupId>
216229
<artifactId>tachyon-client</artifactId>
@@ -316,14 +329,16 @@
316329
<plugin>
317330
<groupId>org.scalatest</groupId>
318331
<artifactId>scalatest-maven-plugin</artifactId>
319-
<configuration>
320-
<environmentVariables>
321-
<SPARK_HOME>${basedir}/..</SPARK_HOME>
322-
<SPARK_TESTING>1</SPARK_TESTING>
323-
<SPARK_CLASSPATH>${spark.classpath}</SPARK_CLASSPATH>
324-
</environmentVariables>
325-
</configuration>
332+
<executions>
333+
<execution>
334+
<id>test</id>
335+
<goals>
336+
<goal>test</goal>
337+
</goals>
338+
</execution>
339+
</executions>
326340
</plugin>
341+
327342
<!-- Unzip py4j so we can include its files in the jar -->
328343
<plugin>
329344
<groupId>org.apache.maven.plugins</groupId>
@@ -431,4 +446,5 @@
431446
</resource>
432447
</resources>
433448
</build>
449+
434450
</project>

core/src/main/scala/org/apache/spark/SparkEnv.scala

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,9 +168,11 @@ object SparkEnv extends Logging {
168168
executorId: String,
169169
hostname: String,
170170
port: Int,
171+
numCores: Int,
171172
isLocal: Boolean,
172173
actorSystem: ActorSystem = null): SparkEnv = {
173-
create(conf, executorId, hostname, port, false, isLocal, defaultActorSystem = actorSystem)
174+
create(conf, executorId, hostname, port, false, isLocal, defaultActorSystem = actorSystem,
175+
numUsableCores = numCores)
174176
}
175177

176178
/**
@@ -184,7 +186,8 @@ object SparkEnv extends Logging {
184186
isDriver: Boolean,
185187
isLocal: Boolean,
186188
listenerBus: LiveListenerBus = null,
187-
defaultActorSystem: ActorSystem = null): SparkEnv = {
189+
defaultActorSystem: ActorSystem = null,
190+
numUsableCores: Int = 0): SparkEnv = {
188191

189192
// Listener bus is only used on the driver
190193
if (isDriver) {
@@ -276,7 +279,7 @@ object SparkEnv extends Logging {
276279
val blockTransferService =
277280
conf.get("spark.shuffle.blockTransferService", "netty").toLowerCase match {
278281
case "netty" =>
279-
new NettyBlockTransferService(conf, securityManager)
282+
new NettyBlockTransferService(conf, securityManager, numUsableCores)
280283
case "nio" =>
281284
new NioBlockTransferService(conf, securityManager)
282285
}
@@ -287,7 +290,8 @@ object SparkEnv extends Logging {
287290

288291
// NB: blockManager is not valid until initialize() is called later.
289292
val blockManager = new BlockManager(executorId, actorSystem, blockManagerMaster,
290-
serializer, conf, mapOutputTracker, shuffleManager, blockTransferService, securityManager)
293+
serializer, conf, mapOutputTracker, shuffleManager, blockTransferService, securityManager,
294+
numUsableCores)
291295

292296
val broadcastManager = new BroadcastManager(isDriver, conf, securityManager)
293297

core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ object PythonRunner {
8787
// Strip the URI scheme from the path
8888
formattedPath =
8989
new URI(formattedPath).getScheme match {
90-
case Utils.windowsDrive(d) if windows => formattedPath
9190
case null => formattedPath
91+
case Utils.windowsDrive(d) if windows => formattedPath
9292
case _ => new URI(formattedPath).getPath
9393
}
9494

0 commit comments

Comments
 (0)