Skip to content

Commit b673bf7

Browse files
author
Diana Carroll
committed
Merge branch 'master' of github.com:apache/spark
2 parents 0309cf9 + 21109fb commit b673bf7

File tree

11,776 files changed

+263529
-5872
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

11,776 files changed

+263529
-5872
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,4 @@ dist/
4545
spark-*-bin.tar.gz
4646
unit-tests.log
4747
/lib/
48+
rat-results.txt

.rat-excludes

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
target
2+
.gitignore
3+
.project
4+
.classpath
5+
.rat-excludes
6+
.*md
7+
derby.log
8+
TAGS
9+
RELEASE
10+
control
11+
docs
12+
fairscheduler.xml.template
13+
log4j.properties
14+
log4j.properties.template
15+
metrics.properties.template
16+
slaves
17+
spark-env.sh
18+
spark-env.sh.template
19+
log4j-defaults.properties
20+
sorttable.js
21+
.*txt
22+
.*data
23+
.*log
24+
cloudpickle.py
25+
join.py
26+
SparkExprTyper.scala
27+
SparkILoop.scala
28+
SparkILoopInit.scala
29+
SparkIMain.scala
30+
SparkImports.scala
31+
SparkJLineCompletion.scala
32+
SparkJLineReader.scala
33+
SparkMemberHandlers.scala
34+
sbt
35+
sbt-launch-lib.bash
36+
plugins.sbt
37+
work
38+
.*\.q
39+
golden

LICENSE

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,3 +396,35 @@ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
396396
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
397397
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
398398
POSSIBILITY OF SUCH DAMAGE.
399+
400+
401+
========================================================================
402+
For sbt and sbt-launch-lib.bash in sbt/:
403+
========================================================================
404+
405+
// Generated from http://www.opensource.org/licenses/bsd-license.php
406+
Copyright (c) 2011, Paul Phillips.
407+
All rights reserved.
408+
409+
Redistribution and use in source and binary forms, with or without
410+
modification, are permitted provided that the following conditions are met:
411+
412+
* Redistributions of source code must retain the above copyright notice,
413+
this list of conditions and the following disclaimer.
414+
* Redistributions in binary form must reproduce the above copyright notice,
415+
this list of conditions and the following disclaimer in the documentation
416+
and/or other materials provided with the distribution.
417+
* Neither the name of the author nor the names of its contributors may be
418+
used to endorse or promote products derived from this software without
419+
specific prior written permission.
420+
421+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
422+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
423+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
424+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
425+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
426+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
427+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
428+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
429+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
430+
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

NOTICE

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
Apache Spark
2-
Copyright 2013 The Apache Software Foundation.
2+
Copyright 2014 The Apache Software Foundation.
33

44
This product includes software developed at
55
The Apache Software Foundation (http://www.apache.org/).
6+
7+
In addition, this product includes:
8+
9+
- JUnit (http://www.junit.org) is a testing framework for Java. We included it
10+
under the terms of the Eclipse Public License v1.0.
11+
12+
- JTransforms (https://sites.google.com/site/piotrwendykier/software/jtransforms)
13+
provides fast transforms in Java. It is tri-licensed, and we included it under
14+
the terms of the Mozilla Public License v1.1.

assembly/pom.xml

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,20 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent</artifactId>
24-
<version>1.0.0-incubating-SNAPSHOT</version>
24+
<version>1.0.0-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

2828
<groupId>org.apache.spark</groupId>
2929
<artifactId>spark-assembly_2.10</artifactId>
3030
<name>Spark Project Assembly</name>
31-
<url>http://spark.incubator.apache.org/</url>
31+
<url>http://spark.apache.org/</url>
32+
<packaging>pom</packaging>
3233

3334
<properties>
34-
<spark.jar>${project.build.directory}/scala-${scala.binary.version}/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar</spark.jar>
35+
<spark.jar.dir>scala-${scala.binary.version}</spark.jar.dir>
36+
<spark.jar.basename>${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename>
37+
<spark.jar>${project.build.directory}/${spark.jar.dir}/${spark.jar.basename}</spark.jar>
3538
<deb.pkg.name>spark</deb.pkg.name>
3639
<deb.install.path>/usr/share/spark</deb.install.path>
3740
<deb.user>root</deb.user>
@@ -76,6 +79,11 @@
7679
<artifactId>spark-graphx_${scala.binary.version}</artifactId>
7780
<version>${project.version}</version>
7881
</dependency>
82+
<dependency>
83+
<groupId>org.apache.spark</groupId>
84+
<artifactId>spark-sql_${scala.binary.version}</artifactId>
85+
<version>${project.version}</version>
86+
</dependency>
7987
<dependency>
8088
<groupId>net.sf.py4j</groupId>
8189
<artifactId>py4j</artifactId>
@@ -155,6 +163,16 @@
155163
</dependency>
156164
</dependencies>
157165
</profile>
166+
<profile>
167+
<id>spark-ganglia-lgpl</id>
168+
<dependencies>
169+
<dependency>
170+
<groupId>org.apache.spark</groupId>
171+
<artifactId>spark-ganglia-lgpl_${scala.binary.version}</artifactId>
172+
<version>${project.version}</version>
173+
</dependency>
174+
</dependencies>
175+
</profile>
158176
<profile>
159177
<id>bigtop-dist</id>
160178
<!-- This profile uses the assembly plugin to create a special "dist" package for BigTop

assembly/src/main/assembly/assembly.xml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@
5555
<include>**/*</include>
5656
</includes>
5757
</fileSet>
58+
<fileSet>
59+
<directory>
60+
${project.parent.basedir}/assembly/target/${spark.jar.dir}
61+
</directory>
62+
<outputDirectory>/</outputDirectory>
63+
<includes>
64+
<include>${spark.jar.basename}</include>
65+
</includes>
66+
</fileSet>
5867
</fileSets>
5968

6069
<dependencySets>
@@ -75,6 +84,8 @@
7584
<excludes>
7685
<exclude>org.apache.hadoop:*:jar</exclude>
7786
<exclude>org.apache.spark:*:jar</exclude>
87+
<exclude>org.apache.zookeeper:*:jar</exclude>
88+
<exclude>org.apache.avro:*:jar</exclude>
7889
</excludes>
7990
</dependencySet>
8091
</dependencySets>

bagel/pom.xml

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,29 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent</artifactId>
24-
<version>1.0.0-incubating-SNAPSHOT</version>
24+
<version>1.0.0-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

2828
<groupId>org.apache.spark</groupId>
2929
<artifactId>spark-bagel_2.10</artifactId>
3030
<packaging>jar</packaging>
3131
<name>Spark Project Bagel</name>
32-
<url>http://spark.incubator.apache.org/</url>
32+
<url>http://spark.apache.org/</url>
33+
34+
<profiles>
35+
<profile>
36+
<!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
37+
a Hadoop 0.23.X issue -->
38+
<id>yarn-alpha</id>
39+
<dependencies>
40+
<dependency>
41+
<groupId>org.apache.avro</groupId>
42+
<artifactId>avro</artifactId>
43+
</dependency>
44+
</dependencies>
45+
</profile>
46+
</profiles>
3347

3448
<dependencies>
3549
<dependency>

bin/compute-classpath.sh

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,23 +33,43 @@ fi
3333
# Build up classpath
3434
CLASSPATH="$SPARK_CLASSPATH:$FWDIR/conf"
3535

36+
# Support for interacting with Hive. Since hive pulls in a lot of dependencies that might break
37+
# existing Spark applications, it is not included in the standard spark assembly. Instead, we only
38+
# include it in the classpath if the user has explicitly requested it by running "sbt hive/assembly"
39+
# Hopefully we will find a way to avoid uber-jars entirely and deploy only the needed packages in
40+
# the future.
41+
if [ -f "$FWDIR"/sql/hive/target/scala-$SCALA_VERSION/spark-hive-assembly-*.jar ]; then
42+
echo "Hive assembly found, including hive support. If this isn't desired run sbt hive/clean."
43+
44+
# Datanucleus jars do not work if only included in the uberjar as plugin.xml metadata is lost.
45+
DATANUCLEUSJARS=$(JARS=("$FWDIR/lib_managed/jars"/datanucleus-*.jar); IFS=:; echo "${JARS[*]}")
46+
CLASSPATH=$CLASSPATH:$DATANUCLEUSJARS
47+
48+
ASSEMBLY_DIR="$FWDIR/sql/hive/target/scala-$SCALA_VERSION/"
49+
else
50+
ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SCALA_VERSION/"
51+
fi
52+
3653
# First check if we have a dependencies jar. If so, include binary classes with the deps jar
37-
if [ -f "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*-deps.jar ]; then
54+
if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
3855
CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/classes"
3956
CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/classes"
4057
CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/classes"
4158
CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/classes"
4259
CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/classes"
4360
CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/classes"
61+
CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/classes"
62+
CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
63+
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"
4464

45-
DEPS_ASSEMBLY_JAR=`ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*-deps.jar`
65+
DEPS_ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark*-assembly*hadoop*-deps.jar`
4666
CLASSPATH="$CLASSPATH:$DEPS_ASSEMBLY_JAR"
4767
else
4868
# Else use spark-assembly jar from either RELEASE or assembly directory
4969
if [ -f "$FWDIR/RELEASE" ]; then
50-
ASSEMBLY_JAR=`ls "$FWDIR"/jars/spark-assembly*.jar`
70+
ASSEMBLY_JAR=`ls "$FWDIR"/jars/spark*-assembly*.jar`
5171
else
52-
ASSEMBLY_JAR=`ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar`
72+
ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark*-assembly*hadoop*.jar`
5373
fi
5474
CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR"
5575
fi
@@ -62,6 +82,9 @@ if [[ $SPARK_TESTING == 1 ]]; then
6282
CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/test-classes"
6383
CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/test-classes"
6484
CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/test-classes"
85+
CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/test-classes"
86+
CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/test-classes"
87+
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/test-classes"
6588
fi
6689

6790
# Add hadoop conf dir if given -- otherwise FileSystem.*, etc fail !

bin/spark-class

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -40,34 +40,46 @@ if [ -z "$1" ]; then
4040
exit 1
4141
fi
4242

43-
# If this is a standalone cluster daemon, reset SPARK_JAVA_OPTS and SPARK_MEM to reasonable
44-
# values for that; it doesn't need a lot
45-
if [ "$1" = "org.apache.spark.deploy.master.Master" -o "$1" = "org.apache.spark.deploy.worker.Worker" ]; then
46-
SPARK_MEM=${SPARK_DAEMON_MEMORY:-512m}
47-
SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.akka.logLifecycleEvents=true"
48-
# Do not overwrite SPARK_JAVA_OPTS environment variable in this script
49-
OUR_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS" # Empty by default
50-
else
51-
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS"
43+
if [ -n "$SPARK_MEM" ]; then
44+
echo "Warning: SPARK_MEM is deprecated, please use a more specific config option"
45+
echo "(e.g., spark.executor.memory or SPARK_DRIVER_MEMORY)."
5246
fi
5347

48+
# Use SPARK_MEM or 512m as the default memory, to be overridden by specific options
49+
DEFAULT_MEM=${SPARK_MEM:-512m}
50+
51+
SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.akka.logLifecycleEvents=true"
5452

55-
# Add java opts for master, worker, executor. The opts maybe null
53+
# Add java opts and memory settings for master, worker, executors, and repl.
5654
case "$1" in
55+
# Master and Worker use SPARK_DAEMON_JAVA_OPTS (and specific opts) + SPARK_DAEMON_MEMORY.
5756
'org.apache.spark.deploy.master.Master')
58-
OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_MASTER_OPTS"
57+
OUR_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS $SPARK_MASTER_OPTS"
58+
OUR_JAVA_MEM=${SPARK_DAEMON_MEMORY:-$DEFAULT_MEM}
5959
;;
6060
'org.apache.spark.deploy.worker.Worker')
61-
OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_WORKER_OPTS"
61+
OUR_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS $SPARK_WORKER_OPTS"
62+
OUR_JAVA_MEM=${SPARK_DAEMON_MEMORY:-$DEFAULT_MEM}
6263
;;
64+
65+
# Executors use SPARK_JAVA_OPTS + SPARK_EXECUTOR_MEMORY.
6366
'org.apache.spark.executor.CoarseGrainedExecutorBackend')
64-
OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
67+
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
68+
OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
6569
;;
6670
'org.apache.spark.executor.MesosExecutorBackend')
67-
OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
71+
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
72+
OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
6873
;;
74+
75+
# All drivers use SPARK_JAVA_OPTS + SPARK_DRIVER_MEMORY. The repl also uses SPARK_REPL_OPTS.
6976
'org.apache.spark.repl.Main')
70-
OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_REPL_OPTS"
77+
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_REPL_OPTS"
78+
OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
79+
;;
80+
*)
81+
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS"
82+
OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
7183
;;
7284
esac
7385

@@ -83,14 +95,10 @@ else
8395
fi
8496
fi
8597

86-
# Set SPARK_MEM if it isn't already set since we also use it for this process
87-
SPARK_MEM=${SPARK_MEM:-512m}
88-
export SPARK_MEM
89-
9098
# Set JAVA_OPTS to be able to load native libraries and to set heap size
9199
JAVA_OPTS="$OUR_JAVA_OPTS"
92100
JAVA_OPTS="$JAVA_OPTS -Djava.library.path=$SPARK_LIBRARY_PATH"
93-
JAVA_OPTS="$JAVA_OPTS -Xms$SPARK_MEM -Xmx$SPARK_MEM"
101+
JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"
94102
# Load extra JAVA_OPTS from conf/java-opts, if it exists
95103
if [ -e "$FWDIR/conf/java-opts" ] ; then
96104
JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"

0 commit comments

Comments
 (0)