Skip to content

Commit eb2b3ba

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents d8e51f9 + ac9cdc1 commit eb2b3ba

File tree

120 files changed

+2011
-579
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

120 files changed

+2011
-579
lines changed

bin/compute-classpath.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,10 @@ ASSEMBLY_JAR=$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)
8181
# Verify that versions of java used to build the jars and run Spark are compatible
8282
jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1)
8383
if [[ "$jar_error_check" =~ "invalid CEN header" ]]; then
84-
echo "Loading Spark jar with '$JAR_CMD' failed. "
85-
echo "This is likely because Spark was compiled with Java 7 and run "
86-
echo "with Java 6. (see SPARK-1703). Please use Java 7 to run Spark "
87-
echo "or build Spark with Java 6."
84+
echo "Loading Spark jar with '$JAR_CMD' failed. " 1>&2
85+
echo "This is likely because Spark was compiled with Java 7 and run " 1>&2
86+
echo "with Java 6. (see SPARK-1703). Please use Java 7 to run Spark " 1>&2
87+
echo "or build Spark with Java 6." 1>&2
8888
exit 1
8989
fi
9090

bin/pyspark

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ export SPARK_HOME="$FWDIR"
2626
SCALA_VERSION=2.10
2727

2828
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
29-
echo "Usage: ./bin/pyspark [options]"
29+
echo "Usage: ./bin/pyspark [options]" 1>&2
3030
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
3131
exit 0
3232
fi
@@ -36,8 +36,8 @@ if [ ! -f "$FWDIR/RELEASE" ]; then
3636
# Exit if the user hasn't compiled Spark
3737
ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null
3838
if [[ $? != 0 ]]; then
39-
echo "Failed to find Spark assembly in $FWDIR/assembly/target" >&2
40-
echo "You need to build Spark before running this program" >&2
39+
echo "Failed to find Spark assembly in $FWDIR/assembly/target" 1>&2
40+
echo "You need to build Spark before running this program" 1>&2
4141
exit 1
4242
fi
4343
fi

bin/run-example

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ if [ -n "$1" ]; then
2727
EXAMPLE_CLASS="$1"
2828
shift
2929
else
30-
echo "Usage: ./bin/run-example <example-class> [example-args]"
31-
echo " - set MASTER=XX to use a specific master"
32-
echo " - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)"
30+
echo "Usage: ./bin/run-example <example-class> [example-args]" 1>&2
31+
echo " - set MASTER=XX to use a specific master" 1>&2
32+
echo " - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)" 1>&2
3333
exit 1
3434
fi
3535

@@ -40,8 +40,8 @@ elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.ja
4040
fi
4141

4242
if [[ -z $SPARK_EXAMPLES_JAR ]]; then
43-
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" >&2
44-
echo "You need to build Spark before running this program" >&2
43+
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
44+
echo "You need to build Spark before running this program" 1>&2
4545
exit 1
4646
fi
4747

bin/spark-class

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,13 @@ export SPARK_HOME="$FWDIR"
3333
. $FWDIR/bin/load-spark-env.sh
3434

3535
if [ -z "$1" ]; then
36-
echo "Usage: spark-class <class> [<args>]" >&2
36+
echo "Usage: spark-class <class> [<args>]" 1>&2
3737
exit 1
3838
fi
3939

4040
if [ -n "$SPARK_MEM" ]; then
41-
echo "Warning: SPARK_MEM is deprecated, please use a more specific config option"
42-
echo "(e.g., spark.executor.memory or SPARK_DRIVER_MEMORY)."
41+
echo -e "Warning: SPARK_MEM is deprecated, please use a more specific config option" 1>&2
42+
echo -e "(e.g., spark.executor.memory or SPARK_DRIVER_MEMORY)." 1>&2
4343
fi
4444

4545
# Use SPARK_MEM or 512m as the default memory, to be overridden by specific options
@@ -147,10 +147,9 @@ fi
147147
export CLASSPATH
148148

149149
if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
150-
echo -n "Spark Command: "
151-
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
152-
echo "========================================"
153-
echo
150+
echo -n "Spark Command: " 1>&2
151+
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
152+
echo -e "========================================\n" 1>&2
154153
fi
155154

156155
exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"

core/pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@
3434
<dependency>
3535
<groupId>org.apache.hadoop</groupId>
3636
<artifactId>hadoop-client</artifactId>
37+
<exclusions>
38+
<exclusion>
39+
<groupId>javax.servlet</groupId>
40+
<artifactId>servlet-api</artifactId>
41+
</exclusion>
42+
</exclusions>
3743
</dependency>
3844
<dependency>
3945
<groupId>net.java.dev.jets3t</groupId>

core/src/main/resources/org/apache/spark/ui/static/webui.css

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@ span.expand-details {
9595
float: right;
9696
}
9797

98+
pre {
99+
font-size: 0.8em;
100+
}
101+
98102
.stage-details {
99103
max-height: 100px;
100104
overflow-y: auto;

core/src/main/scala/org/apache/spark/CacheManager.scala

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package org.apache.spark
1919

2020
import scala.collection.mutable.{ArrayBuffer, HashSet}
2121

22+
import org.apache.spark.executor.InputMetrics
2223
import org.apache.spark.rdd.RDD
2324
import org.apache.spark.storage._
2425

@@ -41,9 +42,10 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
4142
val key = RDDBlockId(rdd.id, partition.index)
4243
logDebug(s"Looking for partition $key")
4344
blockManager.get(key) match {
44-
case Some(values) =>
45+
case Some(blockResult) =>
4546
// Partition is already materialized, so just return its values
46-
new InterruptibleIterator(context, values.asInstanceOf[Iterator[T]])
47+
context.taskMetrics.inputMetrics = Some(blockResult.inputMetrics)
48+
new InterruptibleIterator(context, blockResult.data.asInstanceOf[Iterator[T]])
4749

4850
case None =>
4951
// Acquire a lock for loading this partition
@@ -110,7 +112,7 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
110112
logInfo(s"Whoever was loading $id failed; we'll try it ourselves")
111113
loading.add(id)
112114
}
113-
values.map(_.asInstanceOf[Iterator[T]])
115+
values.map(_.data.asInstanceOf[Iterator[T]])
114116
}
115117
}
116118
}
@@ -132,7 +134,7 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
132134
* exceptions that can be avoided. */
133135
updatedBlocks ++= blockManager.put(key, values, storageLevel, tellMaster = true)
134136
blockManager.get(key) match {
135-
case Some(v) => v.asInstanceOf[Iterator[T]]
137+
case Some(v) => v.data.asInstanceOf[Iterator[T]]
136138
case None =>
137139
logInfo(s"Failure to store $key")
138140
throw new BlockException(key, s"Block manager failed to return cached value for $key!")

core/src/main/scala/org/apache/spark/Partitioner.scala

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@
1717

1818
package org.apache.spark
1919

20+
import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
21+
2022
import scala.reflect.ClassTag
2123

2224
import org.apache.spark.rdd.RDD
23-
import org.apache.spark.util.CollectionsUtils
24-
import org.apache.spark.util.Utils
25+
import org.apache.spark.serializer.JavaSerializer
26+
import org.apache.spark.util.{CollectionsUtils, Utils}
2527

2628
/**
2729
* An object that defines how the elements in a key-value pair RDD are partitioned by key.
@@ -96,15 +98,15 @@ class HashPartitioner(partitions: Int) extends Partitioner {
9698
* the value of `partitions`.
9799
*/
98100
class RangePartitioner[K : Ordering : ClassTag, V](
99-
partitions: Int,
101+
@transient partitions: Int,
100102
@transient rdd: RDD[_ <: Product2[K,V]],
101-
private val ascending: Boolean = true)
103+
private var ascending: Boolean = true)
102104
extends Partitioner {
103105

104-
private val ordering = implicitly[Ordering[K]]
106+
private var ordering = implicitly[Ordering[K]]
105107

106108
// An array of upper bounds for the first (partitions - 1) partitions
107-
private val rangeBounds: Array[K] = {
109+
private var rangeBounds: Array[K] = {
108110
if (partitions == 1) {
109111
Array()
110112
} else {
@@ -127,7 +129,7 @@ class RangePartitioner[K : Ordering : ClassTag, V](
127129

128130
def numPartitions = rangeBounds.length + 1
129131

130-
private val binarySearch: ((Array[K], K) => Int) = CollectionsUtils.makeBinarySearch[K]
132+
private var binarySearch: ((Array[K], K) => Int) = CollectionsUtils.makeBinarySearch[K]
131133

132134
def getPartition(key: Any): Int = {
133135
val k = key.asInstanceOf[K]
@@ -173,4 +175,40 @@ class RangePartitioner[K : Ordering : ClassTag, V](
173175
result = prime * result + ascending.hashCode
174176
result
175177
}
178+
179+
@throws(classOf[IOException])
180+
private def writeObject(out: ObjectOutputStream) {
181+
val sfactory = SparkEnv.get.serializer
182+
sfactory match {
183+
case js: JavaSerializer => out.defaultWriteObject()
184+
case _ =>
185+
out.writeBoolean(ascending)
186+
out.writeObject(ordering)
187+
out.writeObject(binarySearch)
188+
189+
val ser = sfactory.newInstance()
190+
Utils.serializeViaNestedStream(out, ser) { stream =>
191+
stream.writeObject(scala.reflect.classTag[Array[K]])
192+
stream.writeObject(rangeBounds)
193+
}
194+
}
195+
}
196+
197+
@throws(classOf[IOException])
198+
private def readObject(in: ObjectInputStream) {
199+
val sfactory = SparkEnv.get.serializer
200+
sfactory match {
201+
case js: JavaSerializer => in.defaultReadObject()
202+
case _ =>
203+
ascending = in.readBoolean()
204+
ordering = in.readObject().asInstanceOf[Ordering[K]]
205+
binarySearch = in.readObject().asInstanceOf[(Array[K], K) => Int]
206+
207+
val ser = sfactory.newInstance()
208+
Utils.deserializeViaNestedStream(in, ser) { ds =>
209+
implicit val classTag = ds.readObject[ClassTag[Array[K]]]()
210+
rangeBounds = ds.readObject[Array[K]]()
211+
}
212+
}
213+
}
176214
}

core/src/main/scala/org/apache/spark/SparkContext.scala

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1203,9 +1203,17 @@ class SparkContext(config: SparkConf) extends Logging {
12031203
/**
12041204
* Clean a closure to make it ready to serialized and send to tasks
12051205
* (removes unreferenced variables in $outer's, updates REPL variables)
1206+
* If <tt>checkSerializable</tt> is set, <tt>clean</tt> will also proactively
1207+
* check to see if <tt>f</tt> is serializable and throw a <tt>SparkException</tt>
1208+
* if not.
1209+
*
1210+
* @param f the closure to clean
1211+
* @param checkSerializable whether or not to immediately check <tt>f</tt> for serializability
1212+
* @throws <tt>SparkException<tt> if <tt>checkSerializable</tt> is set but <tt>f</tt> is not
1213+
* serializable
12061214
*/
1207-
private[spark] def clean[F <: AnyRef](f: F): F = {
1208-
ClosureCleaner.clean(f)
1215+
private[spark] def clean[F <: AnyRef](f: F, checkSerializable: Boolean = true): F = {
1216+
ClosureCleaner.clean(f, checkSerializable)
12091217
f
12101218
}
12111219

core/src/main/scala/org/apache/spark/SparkEnv.scala

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ class SparkEnv (
7979

8080
private[spark] def stop() {
8181
pythonWorkers.foreach { case(key, worker) => worker.stop() }
82-
httpFileServer.stop()
82+
Option(httpFileServer).foreach(_.stop())
8383
mapOutputTracker.stop()
8484
shuffleManager.stop()
8585
broadcastManager.stop()
@@ -183,6 +183,7 @@ object SparkEnv extends Logging {
183183

184184
val serializer = instantiateClass[Serializer](
185185
"spark.serializer", "org.apache.spark.serializer.JavaSerializer")
186+
logDebug(s"Using serializer: ${serializer.getClass}")
186187

187188
val closureSerializer = instantiateClass[Serializer](
188189
"spark.closure.serializer", "org.apache.spark.serializer.JavaSerializer")
@@ -227,9 +228,15 @@ object SparkEnv extends Logging {
227228

228229
val cacheManager = new CacheManager(blockManager)
229230

230-
val httpFileServer = new HttpFileServer(securityManager)
231-
httpFileServer.initialize()
232-
conf.set("spark.fileserver.uri", httpFileServer.serverUri)
231+
val httpFileServer =
232+
if (isDriver) {
233+
val server = new HttpFileServer(securityManager)
234+
server.initialize()
235+
conf.set("spark.fileserver.uri", server.serverUri)
236+
server
237+
} else {
238+
null
239+
}
233240

234241
val metricsSystem = if (isDriver) {
235242
MetricsSystem.createMetricsSystem("driver", conf, securityManager)

core/src/main/scala/org/apache/spark/SparkException.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,11 @@ class SparkException(message: String, cause: Throwable)
2222

2323
def this(message: String) = this(message, null)
2424
}
25+
26+
/**
27+
* Exception thrown when execution of some user code in the driver process fails, e.g.
28+
* accumulator update fails or failure in takeOrdered (user supplies an Ordering implementation
29+
* that can be misbehaving.
30+
*/
31+
private[spark] class SparkDriverExecutionException(cause: Throwable)
32+
extends SparkException("Execution error", cause)

core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,8 @@ private class PythonAccumulatorParam(@transient serverHost: String, serverPort:
599599
} else {
600600
// This happens on the master, where we pass the updates to Python through a socket
601601
val socket = new Socket(serverHost, serverPort)
602+
// SPARK-2282: Immediately reuse closed sockets because we create one per task.
603+
socket.setReuseAddress(true)
602604
val in = socket.getInputStream
603605
val out = new DataOutputStream(new BufferedOutputStream(socket.getOutputStream, bufferSize))
604606
out.writeInt(val2.size)

core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import org.apache.spark.{Logging, SecurityManager, SparkConf}
2727
import org.apache.spark.deploy.SparkHadoopUtil
2828
import org.apache.spark.ui.{WebUI, SparkUI, UIUtils}
2929
import org.apache.spark.ui.JettyUtils._
30-
import org.apache.spark.util.Utils
30+
import org.apache.spark.util.{SignalLogger, Utils}
3131

3232
/**
3333
* A web server that renders SparkUIs of completed applications.
@@ -169,10 +169,11 @@ class HistoryServer(
169169
*
170170
* This launches the HistoryServer as a Spark daemon.
171171
*/
172-
object HistoryServer {
172+
object HistoryServer extends Logging {
173173
private val conf = new SparkConf
174174

175175
def main(argStrings: Array[String]) {
176+
SignalLogger.register(log)
176177
initSecurity()
177178
val args = new HistoryServerArguments(conf, argStrings)
178179
val securityManager = new SecurityManager(conf)

core/src/main/scala/org/apache/spark/deploy/master/Master.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ import org.apache.spark.deploy.master.ui.MasterWebUI
4141
import org.apache.spark.metrics.MetricsSystem
4242
import org.apache.spark.scheduler.{EventLoggingListener, ReplayListenerBus}
4343
import org.apache.spark.ui.SparkUI
44-
import org.apache.spark.util.{AkkaUtils, Utils}
44+
import org.apache.spark.util.{AkkaUtils, SignalLogger, Utils}
4545

4646
private[spark] class Master(
4747
host: String,
@@ -481,7 +481,7 @@ private[spark] class Master(
481481
// First schedule drivers, they take strict precedence over applications
482482
val shuffledWorkers = Random.shuffle(workers) // Randomization helps balance drivers
483483
for (worker <- shuffledWorkers if worker.state == WorkerState.ALIVE) {
484-
for (driver <- waitingDrivers) {
484+
for (driver <- List(waitingDrivers: _*)) { // iterate over a copy of waitingDrivers
485485
if (worker.memoryFree >= driver.desc.mem && worker.coresFree >= driver.desc.cores) {
486486
launchDriver(worker, driver)
487487
waitingDrivers -= driver
@@ -755,12 +755,13 @@ private[spark] class Master(
755755
}
756756
}
757757

758-
private[spark] object Master {
758+
private[spark] object Master extends Logging {
759759
val systemName = "sparkMaster"
760760
private val actorName = "Master"
761761
val sparkUrlRegex = "spark://([^:]+):([0-9]+)".r
762762

763763
def main(argStrings: Array[String]) {
764+
SignalLogger.register(log)
764765
val conf = new SparkConf
765766
val args = new MasterArguments(argStrings, conf)
766767
val (actorSystem, _, _) = startSystemAndActor(args.host, args.port, args.webUiPort, conf)

core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ import org.apache.spark.deploy.DeployMessages._
3434
import org.apache.spark.deploy.master.{DriverState, Master}
3535
import org.apache.spark.deploy.worker.ui.WorkerWebUI
3636
import org.apache.spark.metrics.MetricsSystem
37-
import org.apache.spark.util.{AkkaUtils, Utils}
37+
import org.apache.spark.util.{AkkaUtils, SignalLogger, Utils}
3838

3939
/**
4040
* @param masterUrls Each url should look like spark://host:port.
@@ -365,8 +365,9 @@ private[spark] class Worker(
365365
}
366366
}
367367

368-
private[spark] object Worker {
368+
private[spark] object Worker extends Logging {
369369
def main(argStrings: Array[String]) {
370+
SignalLogger.register(log)
370371
val args = new WorkerArguments(argStrings)
371372
val (actorSystem, _) = startSystemAndActor(args.host, args.port, args.webUiPort, args.cores,
372373
args.memory, args.masters, args.workDir)

0 commit comments

Comments
 (0)