apache
diff --git a/‎core/src/main/scala/org/apache/spark/SparkContext.scala
Lines changed: 296 additions & 209 deletions b/‎core/src/main/scala/org/apache/spark/SparkContext.scala
Lines changed: 296 additions & 209 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/executor/Executor.scala
Lines changed: 15 additions & 18 deletions b/‎core/src/main/scala/org/apache/spark/executor/Executor.scala
Lines changed: 15 additions & 18 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/network/nio/ConnectionManager.scala
Lines changed: 4 additions & 3 deletions b/‎core/src/main/scala/org/apache/spark/network/nio/ConnectionManager.scala
Lines changed: 4 additions & 3 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
Lines changed: 3 additions & 5 deletions b/‎core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
Lines changed: 3 additions & 5 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
Lines changed: 6 additions & 3 deletions b/‎core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
Lines changed: 6 additions & 3 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala
Lines changed: 13 additions & 6 deletions b/‎core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala
Lines changed: 13 additions & 6 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/util/ActorLogReceive.scala
Lines changed: 7 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/util/ActorLogReceive.scala
Lines changed: 7 additions & 1 deletion
diff --git a/‎core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
Lines changed: 0 additions & 6 deletions b/‎core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
Lines changed: 0 additions & 6 deletions
diff --git a/‎examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala
Lines changed: 1 addition & 1 deletion b/‎examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
Lines changed: 1 addition & 2 deletions b/‎external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
Lines changed: 1 addition & 2 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/ml/Identifiable.scala
Lines changed: 1 addition & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/ml/Identifiable.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
Lines changed: 10 additions & 5 deletions b/‎mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
Lines changed: 10 additions & 5 deletions
diff --git a/‎mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala
Lines changed: 3 additions & 6 deletions b/‎mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala
Lines changed: 3 additions & 6 deletions
diff --git a/‎python/pyspark/ml/classification.py
Lines changed: 2 additions & 1 deletion b/‎python/pyspark/ml/classification.py
Lines changed: 2 additions & 1 deletion
@@ -21,7 +21,7 @@ import java.io.File
 import java.lang.management.ManagementFactory
 import java.net.URL
 import java.nio.ByteBuffer
-import java.util.concurrent.ConcurrentHashMap
+import java.util.concurrent.{ConcurrentHashMap, Executors, TimeUnit}
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{ArrayBuffer, HashMap}
@@ -60,8 +60,6 @@ private[spark] class Executor(
 
   private val conf = env.conf
 
-  @volatile private var isStopped = false
-
   // No ip or host:port - just hostname
   Utils.checkHost(executorHostname, "Expected executed slave to be a hostname")
   // must not have port specified.
@@ -114,6 +112,10 @@ private[spark] class Executor(
   // Maintains the list of running tasks.
   private val runningTasks = new ConcurrentHashMap[Long, TaskRunner]
 
+  // Executor for the heartbeat task.
+  private val heartbeater = Executors.newSingleThreadScheduledExecutor(
+    Utils.namedThreadFactory("driver-heartbeater"))
+
   startDriverHeartbeater()
 
   def launchTask(
@@ -138,7 +140,8 @@ private[spark] class Executor(
   def stop(): Unit = {
     env.metricsSystem.report()
     env.rpcEnv.stop(executorEndpoint)
-    isStopped = true
+    heartbeater.shutdown()
+    heartbeater.awaitTermination(10, TimeUnit.SECONDS)
     threadPool.shutdown()
     if (!isLocal) {
       env.stop()
@@ -432,23 +435,17 @@ private[spark] class Executor(
   }
 
   /**
-   * Starts a thread to report heartbeat and partial metrics for active tasks to driver.
-   * This thread stops running when the executor is stopped.
+   * Schedules a task to report heartbeat and partial metrics for active tasks to driver.
    */
   private def startDriverHeartbeater(): Unit = {
     val intervalMs = conf.getTimeAsMs("spark.executor.heartbeatInterval", "10s")
-    val thread = new Thread() {
-      override def run() {
-        // Sleep a random interval so the heartbeats don't end up in sync
-        Thread.sleep(intervalMs + (math.random * intervalMs).asInstanceOf[Int])
-        while (!isStopped) {
-          reportHeartBeat()
-          Thread.sleep(intervalMs)
-        }
-      }
+
+    // Wait a random interval so the heartbeats don't end up in sync
+    val initialDelay = intervalMs + (math.random * intervalMs).asInstanceOf[Int]
+
+    val heartbeatTask = new Runnable() {
+      override def run(): Unit = Utils.logUncaughtExceptions(reportHeartBeat())
     }
-    thread.setDaemon(true)
-    thread.setName("driver-heartbeater")
-    thread.start()
+    heartbeater.scheduleAtFixedRate(heartbeatTask, initialDelay, intervalMs, TimeUnit.MILLISECONDS)
   }
 }
@@ -86,11 +86,11 @@ private[nio] class ConnectionManager(
       conf.get("spark.network.timeout", "120s"))
 
   // Get the thread counts from the Spark Configuration.
-  // 
+  //
   // Even though the ThreadPoolExecutor constructor takes both a minimum and maximum value,
   // we only query for the minimum value because we are using LinkedBlockingDeque.
-  // 
-  // The JavaDoc for ThreadPoolExecutor points out that when using a LinkedBlockingDeque (which is 
+  //
+  // The JavaDoc for ThreadPoolExecutor points out that when using a LinkedBlockingDeque (which is
   // an unbounded queue) no more than corePoolSize threads will ever be created, so only the "min"
   // parameter is necessary.
   private val handlerThreadCount = conf.getInt("spark.core.connection.handler.threads.min", 20)
@@ -989,6 +989,7 @@ private[nio] class ConnectionManager(
 
   def stop() {
     ackTimeoutMonitor.stop()
+    selector.wakeup()
     selectorThread.interrupt()
     selectorThread.join()
     selector.close()
 
@@ -142,11 +142,10 @@ private[spark] class TaskSchedulerImpl(
 
     if (!isLocal && conf.getBoolean("spark.speculation", false)) {
       logInfo("Starting speculative execution thread")
-      import sc.env.actorSystem.dispatcher
       sc.env.actorSystem.scheduler.schedule(SPECULATION_INTERVAL_MS milliseconds,
             SPECULATION_INTERVAL_MS milliseconds) {
         Utils.tryOrStopSparkContext(sc) { checkSpeculatableTasks() }
-      }
+      }(sc.env.actorSystem.dispatcher)
     }
   }
 
@@ -394,7 +393,7 @@ private[spark] class TaskSchedulerImpl(
 
   def error(message: String) {
     synchronized {
-      if (activeTaskSets.size > 0) {
+      if (activeTaskSets.nonEmpty) {
         // Have each task set throw a SparkException with the error
         for ((taskSetId, manager) <- activeTaskSets) {
           try {
@@ -407,8 +406,7 @@ private[spark] class TaskSchedulerImpl(
         // No task sets are active but we still got an error. Just exit since this
         // must mean the error is during registration.
         // It might be good to do something smarter here in the future.
-        logError("Exiting due to error from cluster scheduler: " + message)
-        System.exit(1)
+        throw new SparkException(s"Exiting due to error from cluster scheduler: $message")
       }
     }
   }
 
@@ -118,9 +118,12 @@ private[spark] class SparkDeploySchedulerBackend(
     notifyContext()
     if (!stopping) {
       logError("Application has been killed. Reason: " + reason)
-      scheduler.error(reason)
-      // Ensure the application terminates, as we can no longer run jobs.
-      sc.stop()
+      try {
+        scheduler.error(reason)
+      } finally {
+        // Ensure the application terminates, as we can no longer run jobs.
+        sc.stop()
+      }
     }
   }
 
 
@@ -20,12 +20,12 @@ package org.apache.spark.scheduler.local
 import java.nio.ByteBuffer
 import java.util.concurrent.{Executors, TimeUnit}
 
-import org.apache.spark.rpc.{ThreadSafeRpcEndpoint, RpcEndpointRef, RpcEnv}
-import org.apache.spark.util.Utils
-import org.apache.spark.{Logging, SparkContext, SparkEnv, TaskState}
+import org.apache.spark.{Logging, SparkConf, SparkContext, SparkEnv, TaskState}
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.executor.{Executor, ExecutorBackend}
+import org.apache.spark.rpc.{ThreadSafeRpcEndpoint, RpcCallContext, RpcEndpointRef, RpcEnv}
 import org.apache.spark.scheduler.{SchedulerBackend, TaskSchedulerImpl, WorkerOffer}
+import org.apache.spark.util.Utils
 
 private case class ReviveOffers()
 
@@ -71,11 +71,15 @@ private[spark] class LocalEndpoint(
 
     case KillTask(taskId, interruptThread) =>
       executor.killTask(taskId, interruptThread)
+  }
 
+  override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
     case StopExecutor =>
       executor.stop()
+      context.reply(true)
   }
 
+
   def reviveOffers() {
     val offers = Seq(new WorkerOffer(localExecutorId, localExecutorHostname, freeCores))
     val tasks = scheduler.resourceOffers(offers).flatten
@@ -104,8 +108,11 @@ private[spark] class LocalEndpoint(
  * master all run in the same JVM. It sits behind a TaskSchedulerImpl and handles launching tasks
  * on a single Executor (created by the LocalBackend) running locally.
  */
-private[spark] class LocalBackend(scheduler: TaskSchedulerImpl, val totalCores: Int)
-  extends SchedulerBackend with ExecutorBackend {
+private[spark] class LocalBackend(
+    conf: SparkConf,
+    scheduler: TaskSchedulerImpl,
+    val totalCores: Int)
+  extends SchedulerBackend with ExecutorBackend with Logging {
 
   private val appId = "local-" + System.currentTimeMillis
   var localEndpoint: RpcEndpointRef = null
@@ -116,7 +123,7 @@ private[spark] class LocalBackend(scheduler: TaskSchedulerImpl, val totalCores:
   }
 
   override def stop() {
-    localEndpoint.send(StopExecutor)
+    localEndpoint.sendWithReply(StopExecutor)
   }
 
   override def reviveOffers() {
 
@@ -43,7 +43,13 @@ private[spark] trait ActorLogReceive {
 
     private val _receiveWithLogging = receiveWithLogging
 
-    override def isDefinedAt(o: Any): Boolean = _receiveWithLogging.isDefinedAt(o)
+    override def isDefinedAt(o: Any): Boolean = {
+      val handled = _receiveWithLogging.isDefinedAt(o)
+      if (!handled) {
+        log.debug(s"Received unexpected actor system event: $o")
+      }
+      handled
+    }
 
     override def apply(o: Any): Unit = {
       if (log.isDebugEnabled) {
 
@@ -56,19 +56,13 @@ class ExecutorAllocationManagerSuite extends FunSuite with LocalSparkContext wit
     // Min < 0
     val conf1 = conf.clone().set("spark.dynamicAllocation.minExecutors", "-1")
     intercept[SparkException] { contexts += new SparkContext(conf1) }
-    SparkEnv.get.stop()
-    SparkContext.clearActiveContext()
 
     // Max < 0
     val conf2 = conf.clone().set("spark.dynamicAllocation.maxExecutors", "-1")
     intercept[SparkException] { contexts += new SparkContext(conf2) }
-    SparkEnv.get.stop()
-    SparkContext.clearActiveContext()
 
     // Both min and max, but min > max
     intercept[SparkException] { createSparkContext(2, 1) }
-    SparkEnv.get.stop()
-    SparkContext.clearActiveContext()
 
     // Both min and max, and min == max
     val sc1 = createSparkContext(1, 1)
 
@@ -41,7 +41,7 @@ object DirectKafkaWordCount {
         |  <brokers> is a list of one or more Kafka brokers
         |  <topics> is a list of one or more kafka topics to consume from
         |
-        """".stripMargin)
+        """.stripMargin)
       System.exit(1)
     }
 
 
@@ -23,10 +23,9 @@ import org.apache.spark.{Logging, Partition, SparkContext, SparkException, TaskC
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.NextIterator
 
-import java.util.Properties
 import kafka.api.{FetchRequestBuilder, FetchResponse}
 import kafka.common.{ErrorMapping, TopicAndPartition}
-import kafka.consumer.{ConsumerConfig, SimpleConsumer}
+import kafka.consumer.SimpleConsumer
 import kafka.message.{MessageAndMetadata, MessageAndOffset}
 import kafka.serializer.Decoder
 import kafka.utils.VerifiableProperties
 
@@ -25,7 +25,7 @@ import java.util.UUID
 private[ml] trait Identifiable extends Serializable {
 
   /**
-   * A unique id for the object. The default implementation concatenates the class name, "-", and 8
+   * A unique id for the object. The default implementation concatenates the class name, "_", and 8
    * random hex chars.
    */
   private[ml] val uid: String =
 
@@ -227,15 +227,16 @@ object Vectors {
    * @param elements vector elements in (index, value) pairs.
    */
   def sparse(size: Int, elements: Seq[(Int, Double)]): Vector = {
-    require(size > 0)
+    require(size > 0, "The size of the requested sparse vector must be greater than 0.")
 
     val (indices, values) = elements.sortBy(_._1).unzip
     var prev = -1
     indices.foreach { i =>
       require(prev < i, s"Found duplicate indices: $i.")
       prev = i
     }
-    require(prev < size)
+    require(prev < size, s"You may not write an element to index $prev because the declared " +
+      s"size of your vector is $size")
 
     new SparseVector(size, indices.toArray, values.toArray)
   }
@@ -309,7 +310,8 @@ object Vectors {
    * @return norm in L^p^ space.
    */
   def norm(vector: Vector, p: Double): Double = {
-    require(p >= 1.0)
+    require(p >= 1.0, "To compute the p-norm of the vector, we require that you specify a p>=1. " +
+      s"You specified p=$p.")
     val values = vector match {
       case DenseVector(vs) => vs
       case SparseVector(n, ids, vs) => vs
@@ -360,7 +362,8 @@ object Vectors {
    * @return squared distance between two Vectors.
    */
   def sqdist(v1: Vector, v2: Vector): Double = {
-    require(v1.size == v2.size, "vector dimension mismatch")
+    require(v1.size == v2.size, s"Vector dimensions do not match: Dim(v1)=${v1.size} and Dim(v2)" +
+      s"=${v2.size}.")
     var squaredDistance = 0.0
     (v1, v2) match {
       case (v1: SparseVector, v2: SparseVector) =>
@@ -518,7 +521,9 @@ class SparseVector(
     val indices: Array[Int],
     val values: Array[Double]) extends Vector {
 
-  require(indices.length == values.length)
+  require(indices.length == values.length, "Sparse vectors require that the dimension of the" +
+    s" indices match the dimension of the values. You provided ${indices.size} indices and " +
+    s" ${values.size} values.")
 
   override def toString: String =
     "(%s,%s,%s)".format(size, indices.mkString("[", ",", "]"), values.mkString("[", ",", "]"))
 
@@ -17,16 +17,13 @@
 
 package org.apache.spark.ml.param
 
+import org.apache.spark.ml.param.shared.{HasInputCol, HasMaxIter}
+
 /** A subclass of Params for testing. */
-class TestParams extends Params {
+class TestParams extends Params with HasMaxIter with HasInputCol {
 
-  val maxIter = new IntParam(this, "maxIter", "max number of iterations")
   def setMaxIter(value: Int): this.type = { set(maxIter, value); this }
-  def getMaxIter: Int = getOrDefault(maxIter)
-
-  val inputCol = new Param[String](this, "inputCol", "input column name")
   def setInputCol(value: String): this.type = { set(inputCol, value); this }
-  def getInputCol: String = getOrDefault(inputCol)
 
   setDefault(maxIter -> 10)
 
 
@@ -59,6 +59,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  maxIter=100, regParam=0.1)
         """
         super(LogisticRegression, self).__init__()
+        self._setDefault(maxIter=100, regParam=0.1)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
@@ -71,7 +72,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
         Sets params for logistic regression.
         """
         kwargs = self.setParams._input_kwargs
-        return self._set_params(**kwargs)
+        return self._set(**kwargs)
 
     def _create_model(self, java_model):
         return LogisticRegressionModel(java_model)
Original file line number	Diff line number	Diff line change
`@@ -142,11 +142,10 @@ private[spark] class TaskSchedulerImpl(`
`142`	`142`
`143`	`143`	`if (!isLocal && conf.getBoolean("spark.speculation", false)) {`
`144`	`144`	`logInfo("Starting speculative execution thread")`
`145`		`- import sc.env.actorSystem.dispatcher`
`146`	`145`	`sc.env.actorSystem.scheduler.schedule(SPECULATION_INTERVAL_MS milliseconds,`
`147`	`146`	`SPECULATION_INTERVAL_MS milliseconds) {`
`148`	`147`	`Utils.tryOrStopSparkContext(sc) { checkSpeculatableTasks() }`
`149`		`- }`
	`148`	`+ }(sc.env.actorSystem.dispatcher)`
`150`	`149`	`}`
`151`	`150`	`}`
`152`	`151`
`@@ -394,7 +393,7 @@ private[spark] class TaskSchedulerImpl(`
`394`	`393`
`395`	`394`	`def error(message: String) {`
`396`	`395`	`synchronized {`
`397`		`- if (activeTaskSets.size > 0) {`
	`396`	`+ if (activeTaskSets.nonEmpty) {`
`398`	`397`	`// Have each task set throw a SparkException with the error`
`399`	`398`	`for ((taskSetId, manager) <- activeTaskSets) {`
`400`	`399`	`try {`
`@@ -407,8 +406,7 @@ private[spark] class TaskSchedulerImpl(`
`407`	`406`	`// No task sets are active but we still got an error. Just exit since this`
`408`	`407`	`// must mean the error is during registration.`
`409`	`408`	`// It might be good to do something smarter here in the future.`
`410`		`- logError("Exiting due to error from cluster scheduler: " + message)`
`411`		`- System.exit(1)`
	`409`	`+ throw new SparkException(s"Exiting due to error from cluster scheduler: $message")`
`412`	`410`	`}`
`413`	`411`	`}`
`414`	`412`	`}`
Original file line number	Diff line number	Diff line change
`@@ -41,7 +41,7 @@ object DirectKafkaWordCount {`
`41`	`41`	`\| <brokers> is a list of one or more Kafka brokers`
`42`	`42`	`\| <topics> is a list of one or more kafka topics to consume from`
`43`	`43`	`\|`
`44`		`- """".stripMargin)`
	`44`	`+ """.stripMargin)`
`45`	`45`	`System.exit(1)`
`46`	`46`	`}`
`47`	`47`