apache
diff --git a/‎LICENSE
Lines changed: 16 additions & 0 deletions b/‎LICENSE
Lines changed: 16 additions & 0 deletions
diff --git a/‎assembly/pom.xml
Lines changed: 1 addition & 1 deletion b/‎assembly/pom.xml
Lines changed: 1 addition & 1 deletion
diff --git a/‎bagel/pom.xml
Lines changed: 1 addition & 1 deletion b/‎bagel/pom.xml
Lines changed: 1 addition & 1 deletion
diff --git a/‎conf/metrics.properties.template
Lines changed: 9 additions & 0 deletions b/‎conf/metrics.properties.template
Lines changed: 9 additions & 0 deletions
diff --git a/‎core/pom.xml
Lines changed: 7 additions & 1 deletion b/‎core/pom.xml
Lines changed: 7 additions & 1 deletion
diff --git a/‎core/src/main/java/org/apache/spark/util/collection/TimSort.java
Lines changed: 4 additions & 5 deletions b/‎core/src/main/java/org/apache/spark/util/collection/TimSort.java
Lines changed: 4 additions & 5 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/Accumulators.scala
Lines changed: 34 additions & 8 deletions b/‎core/src/main/scala/org/apache/spark/Accumulators.scala
Lines changed: 34 additions & 8 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/ContextCleaner.scala
Lines changed: 47 additions & 13 deletions b/‎core/src/main/scala/org/apache/spark/ContextCleaner.scala
Lines changed: 47 additions & 13 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
Lines changed: 59 additions & 6 deletions b/‎core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
Lines changed: 59 additions & 6 deletions
@@ -771,6 +771,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
+========================================================================
+For TestTimSort (core/src/test/java/org/apache/spark/util/collection/TestTimSort.java):
+========================================================================
+Copyright (C) 2015 Stijn de Gouw
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
 
 ========================================================================
 For LimitedInputStream
 
@@ -20,7 +20,7 @@
   <modelVersion>4.0.0</modelVersion>
   <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent</artifactId>
+    <artifactId>spark-parent_2.10</artifactId>
     <version>1.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -20,7 +20,7 @@
   <modelVersion>4.0.0</modelVersion>
   <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent</artifactId>
+    <artifactId>spark-parent_2.10</artifactId>
     <version>1.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -122,6 +122,15 @@
 
 #worker.sink.csv.unit=minutes
 
+# Enable Slf4jSink for all instances by class name
+#*.sink.slf4j.class=org.apache.spark.metrics.sink.Slf4jSink
+
+# Polling period for Slf4JSink
+#*.sink.sl4j.period=1
+
+#*.sink.sl4j.unit=minutes
+
+
 # Enable jvm source for instance master, worker, driver and executor
 #master.source.jvm.class=org.apache.spark.metrics.source.JvmSource
 
 
@@ -20,7 +20,7 @@
   <modelVersion>4.0.0</modelVersion>
   <parent>
     <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent</artifactId>
+    <artifactId>spark-parent_2.10</artifactId>
     <version>1.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
@@ -319,6 +319,12 @@
       <artifactId>selenium-java</artifactId>
       <scope>test</scope>
     </dependency>
+    <!-- Added for selenium: -->
+    <dependency>
+      <groupId>xml-apis</groupId>
+      <artifactId>xml-apis</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-all</artifactId>
 
@@ -425,15 +425,14 @@ private void pushRun(int runBase, int runLen) {
     private void mergeCollapse() {
       while (stackSize > 1) {
         int n = stackSize - 2;
-        if (n > 0 && runLen[n-1] <= runLen[n] + runLen[n+1]) {
+        if ( (n >= 1 && runLen[n-1] <= runLen[n] + runLen[n+1])
+          || (n >= 2 && runLen[n-2] <= runLen[n] + runLen[n-1])) {
           if (runLen[n - 1] < runLen[n + 1])
             n--;
-          mergeAt(n);
-        } else if (runLen[n] <= runLen[n + 1]) {
-          mergeAt(n);
-        } else {
+        } else if (runLen[n] > runLen[n + 1]) {
           break; // Invariant is established
         }
+        mergeAt(n);
       }
     }
 
 
@@ -23,6 +23,7 @@ import java.lang.ThreadLocal
 
 import scala.collection.generic.Growable
 import scala.collection.mutable.Map
+import scala.ref.WeakReference
 import scala.reflect.ClassTag
 
 import org.apache.spark.serializer.JavaSerializer
@@ -279,13 +280,24 @@ object AccumulatorParam {
 
 // TODO: The multi-thread support in accumulators is kind of lame; check
 // if there's a more intuitive way of doing it right
-private[spark] object Accumulators {
-  // TODO: Use soft references? => need to make readObject work properly then
-  val originals = Map[Long, Accumulable[_, _]]()
-  val localAccums = new ThreadLocal[Map[Long, Accumulable[_, _]]]() {
+private[spark] object Accumulators extends Logging {
+  /**
+   * This global map holds the original accumulator objects that are created on the driver.
+   * It keeps weak references to these objects so that accumulators can be garbage-collected
+   * once the RDDs and user-code that reference them are cleaned up.
+   */
+  val originals = Map[Long, WeakReference[Accumulable[_, _]]]()
+
+  /**
+   * This thread-local map holds per-task copies of accumulators; it is used to collect the set
+   * of accumulator updates to send back to the driver when tasks complete. After tasks complete,
+   * this map is cleared by `Accumulators.clear()` (see Executor.scala).
+   */
+  private val localAccums = new ThreadLocal[Map[Long, Accumulable[_, _]]]() {
     override protected def initialValue() = Map[Long, Accumulable[_, _]]()
   }
-  var lastId: Long = 0
+
+  private var lastId: Long = 0
 
   def newId(): Long = synchronized {
     lastId += 1
@@ -294,7 +306,7 @@ private[spark] object Accumulators {
 
   def register(a: Accumulable[_, _], original: Boolean): Unit = synchronized {
     if (original) {
-      originals(a.id) = a
+      originals(a.id) = new WeakReference[Accumulable[_, _]](a)
     } else {
       localAccums.get()(a.id) = a
     }
@@ -303,7 +315,13 @@ private[spark] object Accumulators {
   // Clear the local (non-original) accumulators for the current thread
   def clear() {
     synchronized {
-      localAccums.get.clear
+      localAccums.get.clear()
+    }
+  }
+
+  def remove(accId: Long) {
+    synchronized {
+      originals.remove(accId)
     }
   }
 
@@ -320,7 +338,15 @@ private[spark] object Accumulators {
   def add(values: Map[Long, Any]): Unit = synchronized {
     for ((id, value) <- values) {
       if (originals.contains(id)) {
-        originals(id).asInstanceOf[Accumulable[Any, Any]] ++= value
+        // Since we are now storing weak references, we must check whether the underlying data
+        // is valid.
+        originals(id).get match {
+          case Some(accum) => accum.asInstanceOf[Accumulable[Any, Any]] ++= value
+          case None =>
+            throw new IllegalAccessError("Attempted to access garbage collected Accumulator.")
+        }
+      } else {
+        logWarning(s"Ignoring accumulator update for unknown accumulator id $id")
       }
     }
   }
 
@@ -32,6 +32,7 @@ private sealed trait CleanupTask
 private case class CleanRDD(rddId: Int) extends CleanupTask
 private case class CleanShuffle(shuffleId: Int) extends CleanupTask
 private case class CleanBroadcast(broadcastId: Long) extends CleanupTask
+private case class CleanAccum(accId: Long) extends CleanupTask
 
 /**
  * A WeakReference associated with a CleanupTask.
@@ -104,16 +105,30 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
     cleaningThread.start()
   }
 
-  /** Stop the cleaner. */
+  /**
+   * Stop the cleaning thread and wait until the thread has finished running its current task.
+   */
   def stop() {
     stopped = true
+    // Interrupt the cleaning thread, but wait until the current task has finished before
+    // doing so. This guards against the race condition where a cleaning thread may
+    // potentially clean similarly named variables created by a different SparkContext,
+    // resulting in otherwise inexplicable block-not-found exceptions (SPARK-6132).
+    synchronized {
+      cleaningThread.interrupt()
+    }
+    cleaningThread.join()
   }
 
   /** Register a RDD for cleanup when it is garbage collected. */
   def registerRDDForCleanup(rdd: RDD[_]) {
     registerForCleanup(rdd, CleanRDD(rdd.id))
   }
 
+  def registerAccumulatorForCleanup(a: Accumulable[_, _]): Unit = {
+    registerForCleanup(a, CleanAccum(a.id))
+  }
+
   /** Register a ShuffleDependency for cleanup when it is garbage collected. */
   def registerShuffleForCleanup(shuffleDependency: ShuffleDependency[_, _, _]) {
     registerForCleanup(shuffleDependency, CleanShuffle(shuffleDependency.shuffleId))
@@ -135,19 +150,25 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
       try {
         val reference = Option(referenceQueue.remove(ContextCleaner.REF_QUEUE_POLL_TIMEOUT))
           .map(_.asInstanceOf[CleanupTaskWeakReference])
-        reference.map(_.task).foreach { task =>
-          logDebug("Got cleaning task " + task)
-          referenceBuffer -= reference.get
-          task match {
-            case CleanRDD(rddId) =>
-              doCleanupRDD(rddId, blocking = blockOnCleanupTasks)
-            case CleanShuffle(shuffleId) =>
-              doCleanupShuffle(shuffleId, blocking = blockOnShuffleCleanupTasks)
-            case CleanBroadcast(broadcastId) =>
-              doCleanupBroadcast(broadcastId, blocking = blockOnCleanupTasks)
+        // Synchronize here to avoid being interrupted on stop()
+        synchronized {
+          reference.map(_.task).foreach { task =>
+            logDebug("Got cleaning task " + task)
+            referenceBuffer -= reference.get
+            task match {
+              case CleanRDD(rddId) =>
+                doCleanupRDD(rddId, blocking = blockOnCleanupTasks)
+              case CleanShuffle(shuffleId) =>
+                doCleanupShuffle(shuffleId, blocking = blockOnShuffleCleanupTasks)
+              case CleanBroadcast(broadcastId) =>
+                doCleanupBroadcast(broadcastId, blocking = blockOnCleanupTasks)
+              case CleanAccum(accId) =>
+                doCleanupAccum(accId, blocking = blockOnCleanupTasks)
+            }
           }
         }
       } catch {
+        case ie: InterruptedException if stopped => // ignore
         case e: Exception => logError("Error in cleaning thread", e)
       }
     }
@@ -181,15 +202,27 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
   /** Perform broadcast cleanup. */
   def doCleanupBroadcast(broadcastId: Long, blocking: Boolean) {
     try {
-      logDebug("Cleaning broadcast " + broadcastId)
+      logDebug(s"Cleaning broadcast $broadcastId")
       broadcastManager.unbroadcast(broadcastId, true, blocking)
       listeners.foreach(_.broadcastCleaned(broadcastId))
-      logInfo("Cleaned broadcast " + broadcastId)
+      logDebug(s"Cleaned broadcast $broadcastId")
     } catch {
       case e: Exception => logError("Error cleaning broadcast " + broadcastId, e)
     }
   }
 
+  /** Perform accumulator cleanup. */
+  def doCleanupAccum(accId: Long, blocking: Boolean) {
+    try {
+      logDebug("Cleaning accumulator " + accId)
+      Accumulators.remove(accId)
+      listeners.foreach(_.accumCleaned(accId))
+      logInfo("Cleaned accumulator " + accId)
+    } catch {
+      case e: Exception => logError("Error cleaning accumulator " + accId, e)
+    }
+  }
+
   private def blockManagerMaster = sc.env.blockManager.master
   private def broadcastManager = sc.env.broadcastManager
   private def mapOutputTrackerMaster = sc.env.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]
@@ -206,4 +239,5 @@ private[spark] trait CleanerListener {
   def rddCleaned(rddId: Int)
   def shuffleCleaned(shuffleId: Int)
   def broadcastCleaned(broadcastId: Long)
+  def accumCleaned(accId: Long)
 }
@@ -17,33 +17,86 @@
 
 package org.apache.spark
 
-import akka.actor.Actor
+import scala.concurrent.duration._
+import scala.collection.mutable
+
+import akka.actor.{Actor, Cancellable}
+
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.storage.BlockManagerId
-import org.apache.spark.scheduler.TaskScheduler
+import org.apache.spark.scheduler.{SlaveLost, TaskScheduler}
 import org.apache.spark.util.ActorLogReceive
 
 /**
  * A heartbeat from executors to the driver. This is a shared message used by several internal
- * components to convey liveness or execution information for in-progress tasks.
+ * components to convey liveness or execution information for in-progress tasks. It will also 
+ * expire the hosts that have not heartbeated for more than spark.network.timeout.
  */
 private[spark] case class Heartbeat(
     executorId: String,
     taskMetrics: Array[(Long, TaskMetrics)], // taskId -> TaskMetrics
     blockManagerId: BlockManagerId)
 
+private[spark] case object ExpireDeadHosts 
+    
 private[spark] case class HeartbeatResponse(reregisterBlockManager: Boolean)
 
 /**
  * Lives in the driver to receive heartbeats from executors..
  */
-private[spark] class HeartbeatReceiver(scheduler: TaskScheduler)
+private[spark] class HeartbeatReceiver(sc: SparkContext, scheduler: TaskScheduler)
   extends Actor with ActorLogReceive with Logging {
 
+  // executor ID -> timestamp of when the last heartbeat from this executor was received
+  private val executorLastSeen = new mutable.HashMap[String, Long]
+  
+  private val executorTimeoutMs = sc.conf.getLong("spark.network.timeout", 
+    sc.conf.getLong("spark.storage.blockManagerSlaveTimeoutMs", 120)) * 1000
+  
+  private val checkTimeoutIntervalMs = sc.conf.getLong("spark.network.timeoutInterval",
+    sc.conf.getLong("spark.storage.blockManagerTimeoutIntervalMs", 60)) * 1000
+  
+  private var timeoutCheckingTask: Cancellable = null
+  
+  override def preStart(): Unit = {
+    import context.dispatcher
+    timeoutCheckingTask = context.system.scheduler.schedule(0.seconds,
+      checkTimeoutIntervalMs.milliseconds, self, ExpireDeadHosts)
+    super.preStart()
+  }
+  
   override def receiveWithLogging = {
     case Heartbeat(executorId, taskMetrics, blockManagerId) =>
-      val response = HeartbeatResponse(
-        !scheduler.executorHeartbeatReceived(executorId, taskMetrics, blockManagerId))
+      val unknownExecutor = !scheduler.executorHeartbeatReceived(
+        executorId, taskMetrics, blockManagerId)
+      val response = HeartbeatResponse(reregisterBlockManager = unknownExecutor)
+      executorLastSeen(executorId) = System.currentTimeMillis()
       sender ! response
+    case ExpireDeadHosts =>
+      expireDeadHosts()
+  }
+
+  private def expireDeadHosts(): Unit = {
+    logTrace("Checking for hosts with no recent heartbeats in HeartbeatReceiver.")
+    val now = System.currentTimeMillis()
+    for ((executorId, lastSeenMs) <- executorLastSeen) {
+      if (now - lastSeenMs > executorTimeoutMs) {
+        logWarning(s"Removing executor $executorId with no recent heartbeats: " +
+          s"${now - lastSeenMs} ms exceeds timeout $executorTimeoutMs ms")
+        scheduler.executorLost(executorId, SlaveLost("Executor heartbeat " +
+          "timed out after ${now - lastSeenMs} ms"))
+        if (sc.supportDynamicAllocation) {
+          sc.killExecutor(executorId)
+        }
+        executorLastSeen.remove(executorId)
+      }
+    }
+  }
+  
+  override def postStop(): Unit = {
+    if (timeoutCheckingTask != null) {
+      timeoutCheckingTask.cancel()
+    }
+    super.postStop()
   }
 }
Original file line number	Diff line number	Diff line change
`@@ -425,15 +425,14 @@ private void pushRun(int runBase, int runLen) {`
`425`	`425`	`private void mergeCollapse() {`
`426`	`426`	`while (stackSize > 1) {`
`427`	`427`	`int n = stackSize - 2;`
`428`		`- if (n > 0 && runLen[n-1] <= runLen[n] + runLen[n+1]) {`
	`428`	`+ if ( (n >= 1 && runLen[n-1] <= runLen[n] + runLen[n+1])`
	`429`	`+ \|\| (n >= 2 && runLen[n-2] <= runLen[n] + runLen[n-1])) {`
`429`	`430`	`if (runLen[n - 1] < runLen[n + 1])`
`430`	`431`	`n--;`
`431`		`- mergeAt(n);`
`432`		`- } else if (runLen[n] <= runLen[n + 1]) {`
`433`		`- mergeAt(n);`
`434`		`- } else {`
	`432`	`+ } else if (runLen[n] > runLen[n + 1]) {`
`435`	`433`	`break; // Invariant is established`
`436`	`434`	`}`
	`435`	`+ mergeAt(n);`
`437`	`436`	`}`
`438`	`437`	`}`
`439`	`438`