Skip to content

Commit 27be98a

Browse files
author
Marcelo Vanzin
committed
Modify Spark to use launcher lib.
Change the existing scripts under bin/ to use the launcher library, to avoid code duplication and reduce the amount of coupling between scripts and Spark code. Also change some Spark core code to use the library instead of relying on scripts (either by calling them or with comments saying they should be kept in sync). While the library is now included in the assembly (by means of the spark-core dependency), it's still packaged directly into the final lib/ directory, because loading a small jar is much faster than the huge assembly jar, and that makes the start up time of Spark jobs much better.
1 parent 6f70eea commit 27be98a

20 files changed

+145
-1057
lines changed

bin/compute-classpath.cmd

Lines changed: 0 additions & 117 deletions
This file was deleted.

bin/compute-classpath.sh

Lines changed: 0 additions & 149 deletions
This file was deleted.

bin/load-spark-env.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@ fi
4141

4242
if [ -z "$SPARK_SCALA_VERSION" ]; then
4343

44-
ASSEMBLY_DIR2="$FWDIR/assembly/target/scala-2.11"
45-
ASSEMBLY_DIR1="$FWDIR/assembly/target/scala-2.10"
46-
44+
ASSEMBLY_DIR2="$SPARK_HOME/assembly/target/scala-2.11"
45+
ASSEMBLY_DIR1="$SPARK_HOME/assembly/target/scala-2.10"
46+
4747
if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1" ]]; then
4848
echo -e "Presence of build for both scala versions(SCALA 2.10 and SCALA 2.11) detected." 1>&2
4949
echo -e 'Either clean one of them or, export SPARK_SCALA_VERSION=2.11 in spark-env.sh.' 1>&2
@@ -54,5 +54,5 @@ if [ -z "$SPARK_SCALA_VERSION" ]; then
5454
export SPARK_SCALA_VERSION="2.11"
5555
else
5656
export SPARK_SCALA_VERSION="2.10"
57-
fi
57+
fi
5858
fi

bin/pyspark

Lines changed: 8 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -18,36 +18,21 @@
1818
#
1919

2020
# Figure out where Spark is installed
21-
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
21+
SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
2222

23-
# Export this as SPARK_HOME
24-
export SPARK_HOME="$FWDIR"
25-
26-
source "$FWDIR/bin/utils.sh"
27-
28-
source "$FWDIR"/bin/load-spark-env.sh
23+
source "$SPARK_HOME"/bin/load-spark-env.sh
2924

3025
function usage() {
3126
echo "Usage: ./bin/pyspark [options]" 1>&2
32-
"$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
27+
"$SPARK_HOME"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
3328
exit 0
3429
}
30+
export -f usage
3531

3632
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
3733
usage
3834
fi
3935

40-
# Exit if the user hasn't compiled Spark
41-
if [ ! -f "$FWDIR/RELEASE" ]; then
42-
# Exit if the user hasn't compiled Spark
43-
ls "$FWDIR"/assembly/target/scala-$SPARK_SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null
44-
if [[ $? != 0 ]]; then
45-
echo "Failed to find Spark assembly in $FWDIR/assembly/target" 1>&2
46-
echo "You need to build Spark before running this program" 1>&2
47-
exit 1
48-
fi
49-
fi
50-
5136
# In Spark <= 1.1, setting IPYTHON=1 would cause the driver to be launched using the `ipython`
5237
# executable, while the worker would still be launched using PYSPARK_PYTHON.
5338
#
@@ -95,21 +80,7 @@ export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
9580

9681
# Load the PySpark shell.py script when ./pyspark is used interactively:
9782
export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
98-
export PYTHONSTARTUP="$FWDIR/python/pyspark/shell.py"
99-
100-
# Build up arguments list manually to preserve quotes and backslashes.
101-
# We export Spark submit arguments as an environment variable because shell.py must run as a
102-
# PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks.
103-
SUBMIT_USAGE_FUNCTION=usage
104-
gatherSparkSubmitOpts "$@"
105-
PYSPARK_SUBMIT_ARGS=""
106-
whitespace="[[:space:]]"
107-
for i in "${SUBMISSION_OPTS[@]}"; do
108-
if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi
109-
if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi
110-
PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i"
111-
done
112-
export PYSPARK_SUBMIT_ARGS
83+
export PYTHONSTARTUP="$SPARK_HOME/python/pyspark/shell.py"
11384

11485
# For pyspark tests
11586
if [[ -n "$SPARK_TESTING" ]]; then
@@ -123,14 +94,6 @@ if [[ -n "$SPARK_TESTING" ]]; then
12394
exit
12495
fi
12596

126-
# If a python file is provided, directly run spark-submit.
127-
if [[ "$1" =~ \.py$ ]]; then
128-
echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
129-
echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
130-
primary="$1"
131-
shift
132-
gatherSparkSubmitOpts "$@"
133-
exec "$FWDIR"/bin/spark-submit "${SUBMISSION_OPTS[@]}" "$primary" "${APPLICATION_OPTS[@]}"
134-
else
135-
exec "$PYSPARK_DRIVER_PYTHON" $PYSPARK_DRIVER_PYTHON_OPTS
136-
fi
97+
export PYSPARK_DRIVER_PYTHON
98+
export PYSPARK_DRIVER_PYTHON_OPTS
99+
exec $SPARK_HOME/bin/spark-class pyspark "$@"

0 commit comments

Comments
 (0)