Skip to content

Commit 823d803

Browse files
Merge remote-tracking branch 'upstream/master' into SPARK-3278
2 parents c06f88c + 1656aae commit 823d803

File tree

280 files changed

+7382
-3618
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

280 files changed

+7382
-3618
lines changed

.gitignore

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,19 @@
88
*.pyc
99
.idea/
1010
.idea_modules/
11-
sbt/*.jar
11+
build/*.jar
1212
.settings
1313
.cache
14+
cache
1415
.generated-mima*
15-
/build/
1616
work/
1717
out/
1818
.DS_Store
1919
third_party/libmesos.so
2020
third_party/libmesos.dylib
21+
build/apache-maven*
22+
build/zinc*
23+
build/scala*
2124
conf/java-opts
2225
conf/*.sh
2326
conf/*.cmd

assembly/pom.xml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,5 +354,25 @@
354354
</dependency>
355355
</dependencies>
356356
</profile>
357+
358+
<!-- Profiles that disable inclusion of certain dependencies. -->
359+
<profile>
360+
<id>hadoop-provided</id>
361+
<properties>
362+
<hadoop.deps.scope>provided</hadoop.deps.scope>
363+
</properties>
364+
</profile>
365+
<profile>
366+
<id>hive-provided</id>
367+
<properties>
368+
<hive.deps.scope>provided</hive.deps.scope>
369+
</properties>
370+
</profile>
371+
<profile>
372+
<id>parquet-provided</id>
373+
<properties>
374+
<parquet.deps.scope>provided</parquet.deps.scope>
375+
</properties>
376+
</profile>
357377
</profiles>
358378
</project>

bagel/pom.xml

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,6 @@
4040
<artifactId>spark-core_${scala.binary.version}</artifactId>
4141
<version>${project.version}</version>
4242
</dependency>
43-
<dependency>
44-
<groupId>org.eclipse.jetty</groupId>
45-
<artifactId>jetty-server</artifactId>
46-
</dependency>
47-
<dependency>
48-
<groupId>org.scalatest</groupId>
49-
<artifactId>scalatest_${scala.binary.version}</artifactId>
50-
<scope>test</scope>
51-
</dependency>
5243
<dependency>
5344
<groupId>org.scalacheck</groupId>
5445
<artifactId>scalacheck_${scala.binary.version}</artifactId>
@@ -58,11 +49,5 @@
5849
<build>
5950
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
6051
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
61-
<plugins>
62-
<plugin>
63-
<groupId>org.scalatest</groupId>
64-
<artifactId>scalatest-maven-plugin</artifactId>
65-
</plugin>
66-
</plugins>
6752
</build>
6853
</project>

bagel/src/test/resources/log4j.properties

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
# limitations under the License.
1616
#
1717

18-
# Set everything to be logged to the file bagel/target/unit-tests.log
18+
# Set everything to be logged to the file target/unit-tests.log
1919
log4j.rootCategory=INFO, file
2020
log4j.appender.file=org.apache.log4j.FileAppender
21-
log4j.appender.file.append=false
21+
log4j.appender.file.append=true
2222
log4j.appender.file.file=target/unit-tests.log
2323
log4j.appender.file.layout=org.apache.log4j.PatternLayout
2424
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n

bin/compute-classpath.cmd

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,13 @@ if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
109109
set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
110110
:no_yarn_conf_dir
111111

112+
rem To allow for distributions to append needed libraries to the classpath (e.g. when
113+
rem using the "hadoop-provided" profile to build Spark), check SPARK_DIST_CLASSPATH and
114+
rem append it to tbe final classpath.
115+
if not "x%$SPARK_DIST_CLASSPATH%"=="x" (
116+
set CLASSPATH=%CLASSPATH%;%SPARK_DIST_CLASSPATH%
117+
)
118+
112119
rem A bit of a hack to allow calling this script within run2.cmd without seeing output
113120
if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
114121

bin/compute-classpath.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,11 @@ if [ -n "$YARN_CONF_DIR" ]; then
146146
CLASSPATH="$CLASSPATH:$YARN_CONF_DIR"
147147
fi
148148

149+
# To allow for distributions to append needed libraries to the classpath (e.g. when
150+
# using the "hadoop-provided" profile to build Spark), check SPARK_DIST_CLASSPATH and
151+
# append it to tbe final classpath.
152+
if [ -n "$SPARK_DIST_CLASSPATH" ]; then
153+
CLASSPATH="$CLASSPATH:$SPARK_DIST_CLASSPATH"
154+
fi
155+
149156
echo "$CLASSPATH"

bin/spark-submit

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,19 @@ while (($#)); do
3838
export SPARK_SUBMIT_CLASSPATH=$2
3939
elif [ "$1" = "--driver-java-options" ]; then
4040
export SPARK_SUBMIT_OPTS=$2
41+
elif [ "$1" = "--master" ]; then
42+
export MASTER=$2
4143
fi
4244
shift
4345
done
4446

45-
DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf"
47+
if [ -z "$SPARK_CONF_DIR" ]; then
48+
export SPARK_CONF_DIR="$SPARK_HOME/conf"
49+
fi
50+
DEFAULT_PROPERTIES_FILE="$SPARK_CONF_DIR/spark-defaults.conf"
51+
if [ "$MASTER" == "yarn-cluster" ]; then
52+
SPARK_SUBMIT_DEPLOY_MODE=cluster
53+
fi
4654
export SPARK_SUBMIT_DEPLOY_MODE=${SPARK_SUBMIT_DEPLOY_MODE:-"client"}
4755
export SPARK_SUBMIT_PROPERTIES_FILE=${SPARK_SUBMIT_PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"}
4856

bin/spark-submit2.cmd

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,11 @@ set ORIG_ARGS=%*
2424

2525
rem Reset the values of all variables used
2626
set SPARK_SUBMIT_DEPLOY_MODE=client
27-
set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_HOME%\conf\spark-defaults.conf
27+
28+
if not defined %SPARK_CONF_DIR% (
29+
set SPARK_CONF_DIR=%SPARK_HOME%\conf
30+
)
31+
set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_CONF_DIR%\spark-defaults.conf
2832
set SPARK_SUBMIT_DRIVER_MEMORY=
2933
set SPARK_SUBMIT_LIBRARY_PATH=
3034
set SPARK_SUBMIT_CLASSPATH=
@@ -45,11 +49,17 @@ if [%1] == [] goto continue
4549
set SPARK_SUBMIT_CLASSPATH=%2
4650
) else if [%1] == [--driver-java-options] (
4751
set SPARK_SUBMIT_OPTS=%2
52+
) else if [%1] == [--master] (
53+
set MASTER=%2
4854
)
4955
shift
5056
goto loop
5157
:continue
5258

59+
if [%MASTER%] == [yarn-cluster] (
60+
set SPARK_SUBMIT_DEPLOY_MODE=cluster
61+
)
62+
5363
rem For client mode, the driver will be launched in the same JVM that launches
5464
rem SparkSubmit, so we may need to read the properties file for any extra class
5565
rem paths, library paths, java options and memory early on. Otherwise, it will

build/mvn

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
# Determine the current working directory
21+
_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
22+
# Preserve the calling directory
23+
_CALLING_DIR="$(pwd)"
24+
25+
# Installs any application tarball given a URL, the expected tarball name,
26+
# and, optionally, a checkable binary path to determine if the binary has
27+
# already been installed
28+
## Arg1 - URL
29+
## Arg2 - Tarball Name
30+
## Arg3 - Checkable Binary
31+
install_app() {
32+
local remote_tarball="$1/$2"
33+
local local_tarball="${_DIR}/$2"
34+
local binary="${_DIR}/$3"
35+
36+
# setup `curl` and `wget` silent options if we're running on Jenkins
37+
local curl_opts=""
38+
local wget_opts=""
39+
if [ -n "$AMPLAB_JENKINS" ]; then
40+
curl_opts="-s"
41+
wget_opts="--quiet"
42+
else
43+
curl_opts="--progress-bar"
44+
wget_opts="--progress=bar:force"
45+
fi
46+
47+
if [ -z "$3" -o ! -f "$binary" ]; then
48+
# check if we already have the tarball
49+
# check if we have curl installed
50+
# download application
51+
[ ! -f "${local_tarball}" ] && [ -n "`which curl 2>/dev/null`" ] && \
52+
echo "exec: curl ${curl_opts} ${remote_tarball}" && \
53+
curl ${curl_opts} "${remote_tarball}" > "${local_tarball}"
54+
# if the file still doesn't exist, lets try `wget` and cross our fingers
55+
[ ! -f "${local_tarball}" ] && [ -n "`which wget 2>/dev/null`" ] && \
56+
echo "exec: wget ${wget_opts} ${remote_tarball}" && \
57+
wget ${wget_opts} -O "${local_tarball}" "${remote_tarball}"
58+
# if both were unsuccessful, exit
59+
[ ! -f "${local_tarball}" ] && \
60+
echo -n "ERROR: Cannot download $2 with cURL or wget; " && \
61+
echo "please install manually and try again." && \
62+
exit 2
63+
cd "${_DIR}" && tar -xzf "$2"
64+
rm -rf "$local_tarball"
65+
fi
66+
}
67+
68+
# Install maven under the build/ folder
69+
install_mvn() {
70+
install_app \
71+
"http://apache.claz.org/maven/maven-3/3.2.3/binaries" \
72+
"apache-maven-3.2.3-bin.tar.gz" \
73+
"apache-maven-3.2.3/bin/mvn"
74+
MVN_BIN="${_DIR}/apache-maven-3.2.3/bin/mvn"
75+
}
76+
77+
# Install zinc under the build/ folder
78+
install_zinc() {
79+
local zinc_path="zinc-0.3.5.3/bin/zinc"
80+
[ ! -f "${zinc_path}" ] && ZINC_INSTALL_FLAG=1
81+
install_app \
82+
"http://downloads.typesafe.com/zinc/0.3.5.3" \
83+
"zinc-0.3.5.3.tgz" \
84+
"${zinc_path}"
85+
ZINC_BIN="${_DIR}/${zinc_path}"
86+
}
87+
88+
# Determine the Scala version from the root pom.xml file, set the Scala URL,
89+
# and, with that, download the specific version of Scala necessary under
90+
# the build/ folder
91+
install_scala() {
92+
# determine the Scala version used in Spark
93+
local scala_version=`grep "scala.version" "${_DIR}/../pom.xml" | \
94+
head -1 | cut -f2 -d'>' | cut -f1 -d'<'`
95+
local scala_bin="${_DIR}/scala-${scala_version}/bin/scala"
96+
97+
install_app \
98+
"http://downloads.typesafe.com/scala/${scala_version}" \
99+
"scala-${scala_version}.tgz" \
100+
"scala-${scala_version}/bin/scala"
101+
102+
SCALA_COMPILER="$(cd "$(dirname ${scala_bin})/../lib" && pwd)/scala-compiler.jar"
103+
SCALA_LIBRARY="$(cd "$(dirname ${scala_bin})/../lib" && pwd)/scala-library.jar"
104+
}
105+
106+
# Determines if a given application is already installed. If not, will attempt
107+
# to install
108+
## Arg1 - application name
109+
## Arg2 - Alternate path to local install under build/ dir
110+
check_and_install_app() {
111+
# create the local environment variable in uppercase
112+
local app_bin="`echo $1 | awk '{print toupper(\$0)}'`_BIN"
113+
# some black magic to set the generated app variable (i.e. MVN_BIN) into the
114+
# environment
115+
eval "${app_bin}=`which $1 2>/dev/null`"
116+
117+
if [ -z "`which $1 2>/dev/null`" ]; then
118+
install_$1
119+
fi
120+
}
121+
122+
# Setup healthy defaults for the Zinc port if none were provided from
123+
# the environment
124+
ZINC_PORT=${ZINC_PORT:-"3030"}
125+
126+
# Check and install all applications necessary to build Spark
127+
check_and_install_app "mvn"
128+
129+
# Install the proper version of Scala and Zinc for the build
130+
install_zinc
131+
install_scala
132+
133+
# Reset the current working directory
134+
cd "${_CALLING_DIR}"
135+
136+
# Now that zinc is ensured to be installed, check its status and, if its
137+
# not running or just installed, start it
138+
if [ -n "${ZINC_INSTALL_FLAG}" -o -z "`${ZINC_BIN} -status`" ]; then
139+
${ZINC_BIN} -shutdown
140+
${ZINC_BIN} -start -port ${ZINC_PORT} \
141+
-scala-compiler "${SCALA_COMPILER}" \
142+
-scala-library "${SCALA_LIBRARY}" &>/dev/null
143+
fi
144+
145+
# Set any `mvn` options if not already present
146+
export MAVEN_OPTS=${MAVEN_OPTS:-"-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"}
147+
148+
# Last, call the `mvn` command as usual
149+
${MVN_BIN} "$@"

0 commit comments

Comments
 (0)