Skip to content

Commit bce72f4

Browse files
committed
Merge remote-tracking branch 'apache/master' into SPARK-4586
2 parents 17ecfb9 + 3be2a88 commit bce72f4

File tree

600 files changed

+15389
-11573
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

600 files changed

+15389
-11573
lines changed

.gitignore

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,19 @@
88
*.pyc
99
.idea/
1010
.idea_modules/
11-
sbt/*.jar
11+
build/*.jar
1212
.settings
1313
.cache
14+
cache
1415
.generated-mima*
15-
/build/
1616
work/
1717
out/
1818
.DS_Store
1919
third_party/libmesos.so
2020
third_party/libmesos.dylib
21+
build/apache-maven*
22+
build/zinc*
23+
build/scala*
2124
conf/java-opts
2225
conf/*.sh
2326
conf/*.cmd
@@ -55,6 +58,7 @@ dev/create-release/*final
5558
spark-*-bin-*.tgz
5659
unit-tests.log
5760
/lib/
61+
ec2/lib/
5862
rat-results.txt
5963
scalastyle.txt
6064
scalastyle-output.xml

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ To build Spark and its example programs, run:
2626

2727
(You do not need to do this if you downloaded a pre-built package.)
2828
More detailed documentation is available from the project site, at
29-
["Building Spark with Maven"](http://spark.apache.org/docs/latest/building-with-maven.html).
29+
["Building Spark with Maven"](http://spark.apache.org/docs/latest/building-spark.html).
3030

3131
## Interactive Scala Shell
3232

assembly/pom.xml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,5 +354,25 @@
354354
</dependency>
355355
</dependencies>
356356
</profile>
357+
358+
<!-- Profiles that disable inclusion of certain dependencies. -->
359+
<profile>
360+
<id>hadoop-provided</id>
361+
<properties>
362+
<hadoop.deps.scope>provided</hadoop.deps.scope>
363+
</properties>
364+
</profile>
365+
<profile>
366+
<id>hive-provided</id>
367+
<properties>
368+
<hive.deps.scope>provided</hive.deps.scope>
369+
</properties>
370+
</profile>
371+
<profile>
372+
<id>parquet-provided</id>
373+
<properties>
374+
<parquet.deps.scope>provided</parquet.deps.scope>
375+
</properties>
376+
</profile>
357377
</profiles>
358378
</project>

bagel/pom.xml

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,6 @@
4040
<artifactId>spark-core_${scala.binary.version}</artifactId>
4141
<version>${project.version}</version>
4242
</dependency>
43-
<dependency>
44-
<groupId>org.eclipse.jetty</groupId>
45-
<artifactId>jetty-server</artifactId>
46-
</dependency>
47-
<dependency>
48-
<groupId>org.scalatest</groupId>
49-
<artifactId>scalatest_${scala.binary.version}</artifactId>
50-
<scope>test</scope>
51-
</dependency>
5243
<dependency>
5344
<groupId>org.scalacheck</groupId>
5445
<artifactId>scalacheck_${scala.binary.version}</artifactId>
@@ -58,11 +49,5 @@
5849
<build>
5950
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
6051
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
61-
<plugins>
62-
<plugin>
63-
<groupId>org.scalatest</groupId>
64-
<artifactId>scalatest-maven-plugin</artifactId>
65-
</plugin>
66-
</plugins>
6752
</build>
6853
</project>

bagel/src/test/resources/log4j.properties

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
# limitations under the License.
1616
#
1717

18-
# Set everything to be logged to the file bagel/target/unit-tests.log
18+
# Set everything to be logged to the file target/unit-tests.log
1919
log4j.rootCategory=INFO, file
2020
log4j.appender.file=org.apache.log4j.FileAppender
21-
log4j.appender.file.append=false
21+
log4j.appender.file.append=true
2222
log4j.appender.file.file=target/unit-tests.log
2323
log4j.appender.file.layout=org.apache.log4j.PatternLayout
2424
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n

bin/compute-classpath.cmd

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,13 @@ if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
109109
set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
110110
:no_yarn_conf_dir
111111

112+
rem To allow for distributions to append needed libraries to the classpath (e.g. when
113+
rem using the "hadoop-provided" profile to build Spark), check SPARK_DIST_CLASSPATH and
114+
rem append it to tbe final classpath.
115+
if not "x%$SPARK_DIST_CLASSPATH%"=="x" (
116+
set CLASSPATH=%CLASSPATH%;%SPARK_DIST_CLASSPATH%
117+
)
118+
112119
rem A bit of a hack to allow calling this script within run2.cmd without seeing output
113120
if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
114121

bin/compute-classpath.sh

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -72,22 +72,25 @@ else
7272
assembly_folder="$ASSEMBLY_DIR"
7373
fi
7474

75-
num_jars="$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar$" | wc -l)"
76-
if [ "$num_jars" -eq "0" ]; then
77-
echo "Failed to find Spark assembly in $assembly_folder"
78-
echo "You need to build Spark before running this program."
79-
exit 1
80-
fi
75+
num_jars=0
76+
77+
for f in ${assembly_folder}/spark-assembly*hadoop*.jar; do
78+
if [[ ! -e "$f" ]]; then
79+
echo "Failed to find Spark assembly in $assembly_folder" 1>&2
80+
echo "You need to build Spark before running this program." 1>&2
81+
exit 1
82+
fi
83+
ASSEMBLY_JAR="$f"
84+
num_jars=$((num_jars+1))
85+
done
86+
8187
if [ "$num_jars" -gt "1" ]; then
82-
jars_list=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*.jar$")
83-
echo "Found multiple Spark assembly jars in $assembly_folder:"
84-
echo "$jars_list"
85-
echo "Please remove all but one jar."
88+
echo "Found multiple Spark assembly jars in $assembly_folder:" 1>&2
89+
ls ${assembly_folder}/spark-assembly*hadoop*.jar 1>&2
90+
echo "Please remove all but one jar." 1>&2
8691
exit 1
8792
fi
8893

89-
ASSEMBLY_JAR="$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)"
90-
9194
# Verify that versions of java used to build the jars and run Spark are compatible
9295
jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1)
9396
if [[ "$jar_error_check" =~ "invalid CEN header" ]]; then
@@ -146,4 +149,11 @@ if [ -n "$YARN_CONF_DIR" ]; then
146149
CLASSPATH="$CLASSPATH:$YARN_CONF_DIR"
147150
fi
148151

152+
# To allow for distributions to append needed libraries to the classpath (e.g. when
153+
# using the "hadoop-provided" profile to build Spark), check SPARK_DIST_CLASSPATH and
154+
# append it to tbe final classpath.
155+
if [ -n "$SPARK_DIST_CLASSPATH" ]; then
156+
CLASSPATH="$CLASSPATH:$SPARK_DIST_CLASSPATH"
157+
fi
158+
149159
echo "$CLASSPATH"

bin/run-example

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,17 +35,32 @@ else
3535
fi
3636

3737
if [ -f "$FWDIR/RELEASE" ]; then
38-
export SPARK_EXAMPLES_JAR="`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`"
39-
elif [ -e "$EXAMPLES_DIR"/target/scala-$SPARK_SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
40-
export SPARK_EXAMPLES_JAR="`ls "$EXAMPLES_DIR"/target/scala-$SPARK_SCALA_VERSION/spark-examples-*hadoop*.jar`"
38+
JAR_PATH="${FWDIR}/lib"
39+
else
40+
JAR_PATH="${EXAMPLES_DIR}/target/scala-${SPARK_SCALA_VERSION}"
4141
fi
4242

43-
if [[ -z "$SPARK_EXAMPLES_JAR" ]]; then
44-
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
45-
echo "You need to build Spark before running this program" 1>&2
43+
JAR_COUNT=0
44+
45+
for f in ${JAR_PATH}/spark-examples-*hadoop*.jar; do
46+
if [[ ! -e "$f" ]]; then
47+
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
48+
echo "You need to build Spark before running this program" 1>&2
49+
exit 1
50+
fi
51+
SPARK_EXAMPLES_JAR="$f"
52+
JAR_COUNT=$((JAR_COUNT+1))
53+
done
54+
55+
if [ "$JAR_COUNT" -gt "1" ]; then
56+
echo "Found multiple Spark examples assembly jars in ${JAR_PATH}" 1>&2
57+
ls ${JAR_PATH}/spark-examples-*hadoop*.jar 1>&2
58+
echo "Please remove all but one jar." 1>&2
4659
exit 1
4760
fi
4861

62+
export SPARK_EXAMPLES_JAR
63+
4964
EXAMPLE_MASTER=${MASTER:-"local[*]"}
5065

5166
if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then

bin/spark-class

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ case "$1" in
7171
'org.apache.spark.executor.MesosExecutorBackend')
7272
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
7373
OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
74+
export PYTHONPATH="$FWDIR/python:$PYTHONPATH"
75+
export PYTHONPATH="$FWDIR/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
7476
;;
7577

7678
# Spark submit uses SPARK_JAVA_OPTS + SPARK_SUBMIT_OPTS +
@@ -148,7 +150,7 @@ fi
148150
if [[ "$1" =~ org.apache.spark.tools.* ]]; then
149151
if test -z "$SPARK_TOOLS_JAR"; then
150152
echo "Failed to find Spark Tools Jar in $FWDIR/tools/target/scala-$SPARK_SCALA_VERSION/" 1>&2
151-
echo "You need to build Spark before running $1." 1>&2
153+
echo "You need to run \"build/sbt tools/package\" before running $1." 1>&2
152154
exit 1
153155
fi
154156
CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"

bin/spark-submit

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,19 @@ while (($#)); do
3838
export SPARK_SUBMIT_CLASSPATH=$2
3939
elif [ "$1" = "--driver-java-options" ]; then
4040
export SPARK_SUBMIT_OPTS=$2
41+
elif [ "$1" = "--master" ]; then
42+
export MASTER=$2
4143
fi
4244
shift
4345
done
4446

45-
DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf"
47+
if [ -z "$SPARK_CONF_DIR" ]; then
48+
export SPARK_CONF_DIR="$SPARK_HOME/conf"
49+
fi
50+
DEFAULT_PROPERTIES_FILE="$SPARK_CONF_DIR/spark-defaults.conf"
51+
if [ "$MASTER" == "yarn-cluster" ]; then
52+
SPARK_SUBMIT_DEPLOY_MODE=cluster
53+
fi
4654
export SPARK_SUBMIT_DEPLOY_MODE=${SPARK_SUBMIT_DEPLOY_MODE:-"client"}
4755
export SPARK_SUBMIT_PROPERTIES_FILE=${SPARK_SUBMIT_PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"}
4856

bin/spark-submit2.cmd

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,11 @@ set ORIG_ARGS=%*
2424

2525
rem Reset the values of all variables used
2626
set SPARK_SUBMIT_DEPLOY_MODE=client
27-
set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_HOME%\conf\spark-defaults.conf
27+
28+
if not defined %SPARK_CONF_DIR% (
29+
set SPARK_CONF_DIR=%SPARK_HOME%\conf
30+
)
31+
set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_CONF_DIR%\spark-defaults.conf
2832
set SPARK_SUBMIT_DRIVER_MEMORY=
2933
set SPARK_SUBMIT_LIBRARY_PATH=
3034
set SPARK_SUBMIT_CLASSPATH=
@@ -45,11 +49,17 @@ if [%1] == [] goto continue
4549
set SPARK_SUBMIT_CLASSPATH=%2
4650
) else if [%1] == [--driver-java-options] (
4751
set SPARK_SUBMIT_OPTS=%2
52+
) else if [%1] == [--master] (
53+
set MASTER=%2
4854
)
4955
shift
5056
goto loop
5157
:continue
5258

59+
if [%MASTER%] == [yarn-cluster] (
60+
set SPARK_SUBMIT_DEPLOY_MODE=cluster
61+
)
62+
5363
rem For client mode, the driver will be launched in the same JVM that launches
5464
rem SparkSubmit, so we may need to read the properties file for any extra class
5565
rem paths, library paths, java options and memory early on. Otherwise, it will

0 commit comments

Comments
 (0)