Skip to content

Commit 61358e3

Browse files
committed
Merge remote-tracking branch 'apache-github/master' into streaming-web-ui
2 parents 53be2c5 + 92a86b2 commit 61358e3

File tree

199 files changed

+6796
-1477
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

199 files changed

+6796
-1477
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,4 @@ spark-*-bin.tar.gz
4747
unit-tests.log
4848
/lib/
4949
rat-results.txt
50+
scalastyle.txt

bin/spark-shell

Lines changed: 168 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -30,67 +30,189 @@ esac
3030
# Enter posix mode for bash
3131
set -o posix
3232

33-
CORE_PATTERN="^[0-9]+$"
34-
MEM_PATTERN="^[0-9]+[m|g|M|G]$"
35-
33+
## Global script variables
3634
FWDIR="$(cd `dirname $0`/..; pwd)"
3735

38-
if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then
39-
echo "Usage: spark-shell [OPTIONS]"
40-
echo "OPTIONS:"
41-
echo "-c --cores num, the maximum number of cores to be used by the spark shell"
42-
echo "-em --execmem num[m|g], the memory used by each executor of spark shell"
43-
echo "-dm --drivermem num[m|g], the memory used by the spark shell and driver"
44-
echo "-h --help, print this help information"
45-
exit
46-
fi
36+
SPARK_REPL_OPTS="${SPARK_REPL_OPTS:-""}"
37+
DEFAULT_MASTER="local"
38+
MASTER=${MASTER:-""}
39+
40+
info_log=0
41+
42+
#CLI Color Templates
43+
txtund=$(tput sgr 0 1) # Underline
44+
txtbld=$(tput bold) # Bold
45+
bldred=${txtbld}$(tput setaf 1) # red
46+
bldyel=${txtbld}$(tput setaf 3) # yellow
47+
bldblu=${txtbld}$(tput setaf 4) # blue
48+
bldwht=${txtbld}$(tput setaf 7) # white
49+
txtrst=$(tput sgr0) # Reset
50+
info=${bldwht}*${txtrst} # Feedback
51+
pass=${bldblu}*${txtrst}
52+
warn=${bldred}*${txtrst}
53+
ques=${bldblu}?${txtrst}
54+
55+
# Helper function to describe the script usage
56+
function usage() {
57+
cat << EOF
58+
${txtbld}Usage${txtrst}: spark-shell [OPTIONS]
59+
60+
${txtbld}OPTIONS${txtrst}:
61+
-h --help : Print this help information.
62+
-c --cores : The maximum number of cores to be used by the Spark Shell.
63+
-em --executor-memory : The memory used by each executor of the Spark Shell, the number
64+
is followed by m for megabytes or g for gigabytes, e.g. "1g".
65+
-dm --driver-memory : The memory used by the Spark Shell, the number is followed
66+
by m for megabytes or g for gigabytes, e.g. "1g".
67+
-m --master : A full string that describes the Spark Master, defaults to "local"
68+
e.g. "spark://localhost:7077".
69+
--log-conf : Enables logging of the supplied SparkConf as INFO at start of the
70+
Spark Context.
71+
72+
e.g.
73+
spark-shell -m spark://localhost:7077 -c 4 -dm 512m -em 2g
74+
75+
EOF
76+
}
77+
78+
function out_error(){
79+
echo -e "${txtund}${bldred}ERROR${txtrst}: $1"
80+
usage
81+
exit 1
82+
}
83+
84+
function log_info(){
85+
[ $info_log -eq 1 ] && echo -e "${bldyel}INFO${txtrst}: $1"
86+
}
87+
88+
function log_warn(){
89+
echo -e "${txtund}${bldyel}WARN${txtrst}: $1"
90+
}
4791

48-
for o in "$@"; do
49-
if [ "$1" = "-c" -o "$1" = "--cores" ]; then
50-
shift
92+
# PATTERNS used to validate more than one optional arg.
93+
ARG_FLAG_PATTERN="^-"
94+
MEM_PATTERN="^[0-9]+[m|g|M|G]$"
95+
NUM_PATTERN="^[0-9]+$"
96+
PORT_PATTERN="^[0-9]+$"
97+
98+
# Setters for optional args.
99+
function set_cores(){
100+
CORE_PATTERN="^[0-9]+$"
51101
if [[ "$1" =~ $CORE_PATTERN ]]; then
52-
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.cores.max=$1"
53-
shift
102+
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.cores.max=$1"
54103
else
55-
echo "ERROR: wrong format for -c/--cores"
56-
exit 1
104+
out_error "wrong format for $2"
57105
fi
58-
fi
59-
if [ "$1" = "-em" -o "$1" = "--execmem" ]; then
60-
shift
106+
}
107+
108+
function set_em(){
61109
if [[ $1 =~ $MEM_PATTERN ]]; then
62110
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.executor.memory=$1"
63-
shift
64111
else
65-
echo "ERROR: wrong format for --execmem/-em"
66-
exit 1
112+
out_error "wrong format for $2"
67113
fi
68-
fi
69-
if [ "$1" = "-dm" -o "$1" = "--drivermem" ]; then
70-
shift
114+
}
115+
116+
function set_dm(){
71117
if [[ $1 =~ $MEM_PATTERN ]]; then
72118
export SPARK_DRIVER_MEMORY=$1
73-
shift
74119
else
75-
echo "ERROR: wrong format for --drivermem/-dm"
76-
exit 1
120+
out_error "wrong format for $2"
77121
fi
78-
fi
79-
done
122+
}
123+
124+
function set_spark_log_conf(){
125+
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.logConf=$1"
126+
}
80127

81-
# Set MASTER from spark-env if possible
82-
DEFAULT_SPARK_MASTER_PORT=7077
83-
if [ -z "$MASTER" ]; then
84-
. $FWDIR/bin/load-spark-env.sh
85-
if [ "x" != "x$SPARK_MASTER_IP" ]; then
86-
if [ "y" != "y$SPARK_MASTER_PORT" ]; then
87-
SPARK_MASTER_PORT="${SPARK_MASTER_PORT}"
128+
function set_spark_master(){
129+
if ! [[ "$1" =~ $ARG_FLAG_PATTERN ]]; then
130+
MASTER="$1"
88131
else
89-
SPARK_MASTER_PORT=$DEFAULT_SPARK_MASTER_PORT
132+
out_error "wrong format for $2"
133+
fi
134+
}
135+
136+
function resolve_spark_master(){
137+
# Set MASTER from spark-env if possible
138+
DEFAULT_SPARK_MASTER_PORT=7077
139+
if [ -z "$MASTER" ]; then
140+
. $FWDIR/bin/load-spark-env.sh
141+
if [ -n "$SPARK_MASTER_IP" ]; then
142+
SPARK_MASTER_PORT="${SPARK_MASTER_PORT:-"$DEFAULT_SPARK_MASTER_PORT"}"
143+
export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
144+
fi
145+
fi
146+
147+
if [ -z "$MASTER" ]; then
148+
MASTER="$DEFAULT_MASTER"
90149
fi
91-
export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
92-
fi
93-
fi
150+
151+
}
152+
153+
function main(){
154+
log_info "Base Directory set to $FWDIR"
155+
156+
resolve_spark_master
157+
log_info "Spark Master is $MASTER"
158+
159+
log_info "Spark REPL options $SPARK_REPL_OPTS"
160+
if $cygwin; then
161+
# Workaround for issue involving JLine and Cygwin
162+
# (see http://sourceforge.net/p/jline/bugs/40/).
163+
# If you're using the Mintty terminal emulator in Cygwin, may need to set the
164+
# "Backspace sends ^H" setting in "Keys" section of the Mintty options
165+
# (see https://github.com/sbt/sbt/issues/562).
166+
stty -icanon min 1 -echo > /dev/null 2>&1
167+
export SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Djline.terminal=unix"
168+
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
169+
stty icanon echo > /dev/null 2>&1
170+
else
171+
export SPARK_REPL_OPTS
172+
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
173+
fi
174+
}
175+
176+
for option in "$@"
177+
do
178+
case $option in
179+
-h | --help )
180+
usage
181+
exit 1
182+
;;
183+
-c | --cores)
184+
shift
185+
_1=$1
186+
shift
187+
set_cores $_1 "-c/--cores"
188+
;;
189+
-em | --executor-memory)
190+
shift
191+
_1=$1
192+
shift
193+
set_em $_1 "-em/--executor-memory"
194+
;;
195+
-dm | --driver-memory)
196+
shift
197+
_1=$1
198+
shift
199+
set_dm $_1 "-dm/--driver-memory"
200+
;;
201+
-m | --master)
202+
shift
203+
_1=$1
204+
shift
205+
set_spark_master $_1 "-m/--master"
206+
;;
207+
--log-conf)
208+
shift
209+
set_spark_log_conf "true"
210+
info_log=1
211+
;;
212+
?)
213+
;;
214+
esac
215+
done
94216

95217
# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in
96218
# binary distribution of Spark where Scala is not installed
@@ -120,22 +242,10 @@ if [[ ! $? ]]; then
120242
saved_stty=""
121243
fi
122244

123-
if $cygwin; then
124-
# Workaround for issue involving JLine and Cygwin
125-
# (see http://sourceforge.net/p/jline/bugs/40/).
126-
# If you're using the Mintty terminal emulator in Cygwin, may need to set the
127-
# "Backspace sends ^H" setting in "Keys" section of the Mintty options
128-
# (see https://github.com/sbt/sbt/issues/562).
129-
stty -icanon min 1 -echo > /dev/null 2>&1
130-
export SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Djline.terminal=unix"
131-
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
132-
stty icanon echo > /dev/null 2>&1
133-
else
134-
export SPARK_REPL_OPTS
135-
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
136-
fi
245+
main
137246

138247
# record the exit status lest it be overwritten:
139248
# then reenable echo and propagate the code.
140249
exit_status=$?
141250
onExit
251+

bin/spark-submit

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
export SPARK_HOME="$(cd `dirname $0`/..; pwd)"
21+
ORIG_ARGS=$@
22+
23+
while (($#)); do
24+
if [ $1 = "--deploy-mode" ]; then
25+
DEPLOY_MODE=$2
26+
elif [ $1 = "--driver-memory" ]; then
27+
DRIVER_MEMORY=$2
28+
fi
29+
30+
shift
31+
done
32+
33+
if [ ! -z $DRIVER_MEMORY ] && [ ! -z $DEPLOY_MODE ] && [ $DEPLOY_MODE = "client" ]; then
34+
export SPARK_MEM=$DRIVER_MEMORY
35+
fi
36+
37+
$SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit $ORIG_ARGS
38+

core/pom.xml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,6 @@
8282
<groupId>com.google.guava</groupId>
8383
<artifactId>guava</artifactId>
8484
</dependency>
85-
<dependency>
86-
<groupId>com.google.code.findbugs</groupId>
87-
<artifactId>jsr305</artifactId>
88-
</dependency>
8985
<dependency>
9086
<groupId>org.slf4j</groupId>
9187
<artifactId>slf4j-api</artifactId>
@@ -150,7 +146,7 @@
150146
<artifactId>json4s-jackson_${scala.binary.version}</artifactId>
151147
<version>3.2.6</version>
152148
<!-- see also exclusion for lift-json; this is necessary since it depends on
153-
scala-library and scalap 2.10.0, but we use 2.10.3, and only override
149+
scala-library and scalap 2.10.0, but we use 2.10.4, and only override
154150
scala-library -->
155151
<exclusions>
156152
<exclusion>

core/src/main/scala/org/apache/spark/SparkEnv.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ class SparkEnv private[spark] (
8181
// Unfortunately Akka's awaitTermination doesn't actually wait for the Netty server to shut
8282
// down, but let's call it anyway in case it gets fixed in a later release
8383
// UPDATE: In Akka 2.1.x, this hangs if there are remote actors, so we can't call it.
84-
//actorSystem.awaitTermination()
84+
// actorSystem.awaitTermination()
8585
}
8686

8787
private[spark]

core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ extends Logging {
167167
private var initialized = false
168168
private var conf: SparkConf = null
169169
def initialize(_isDriver: Boolean, conf: SparkConf) {
170-
TorrentBroadcast.conf = conf //TODO: we might have to fix it in tests
170+
TorrentBroadcast.conf = conf // TODO: we might have to fix it in tests
171171
synchronized {
172172
if (!initialized) {
173173
initialized = true

core/src/main/scala/org/apache/spark/deploy/Client.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@ private class ClientActor(driverArgs: ClientArguments, conf: SparkConf) extends
128128
*/
129129
object Client {
130130
def main(args: Array[String]) {
131+
println("WARNING: This client is deprecated and will be removed in a future version of Spark.")
132+
println("Use ./bin/spark-submit with \"--master spark://host:port\"")
133+
131134
val conf = new SparkConf()
132135
val driverArgs = new ClientArguments(args)
133136

core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@ class LocalSparkCluster(numWorkers: Int, coresPerWorker: Int, memoryPerWorker: I
6666
// TODO: In Akka 2.1.x, ActorSystem.awaitTermination hangs when you have remote actors!
6767
// This is unfortunate, but for now we just comment it out.
6868
workerActorSystems.foreach(_.shutdown())
69-
//workerActorSystems.foreach(_.awaitTermination())
69+
// workerActorSystems.foreach(_.awaitTermination())
7070
masterActorSystems.foreach(_.shutdown())
71-
//masterActorSystems.foreach(_.awaitTermination())
71+
// masterActorSystems.foreach(_.awaitTermination())
7272
masterActorSystems.clear()
7373
workerActorSystems.clear()
7474
}

0 commit comments

Comments
 (0)