apache · Ngone51 · Sep 26, 2019 · Sep 26, 2019 · Oct 9, 2019 · Oct 9, 2019
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -69,13 +69,14 @@ private[spark] class AppStatusListener(
 
   // Keep track of live entities, so that task metrics can be efficiently updated (without
   // causing too many writes to the underlying store, and other expensive operations).
-  private val liveStages = new ConcurrentHashMap[(Int, Int), LiveStage]()
-  private val liveJobs = new HashMap[Int, LiveJob]()
-  private val liveExecutors = new HashMap[String, LiveExecutor]()
-  private val deadExecutors = new HashMap[String, LiveExecutor]()
-  private val liveTasks = new HashMap[Long, LiveTask]()
-  private val liveRDDs = new HashMap[Int, LiveRDD]()
-  private val pools = new HashMap[String, SchedulerPool]()
+  // variables are visible for tests.
+  private[spark] val liveStages = new ConcurrentHashMap[(Int, Int), LiveStage]()
+  private[spark] val liveJobs = new HashMap[Int, LiveJob]()
+  private[spark] val liveExecutors = new HashMap[String, LiveExecutor]()
+  private[spark] val deadExecutors = new HashMap[String, LiveExecutor]()
+  private[spark] val liveTasks = new HashMap[Long, LiveTask]()
+  private[spark] val liveRDDs = new HashMap[Int, LiveRDD]()
+  private[spark] val pools = new HashMap[String, SchedulerPool]()
 
   private val SQL_EXECUTION_ID_KEY = "spark.sql.execution.id"
   // Keep the active executor count as a separate variable to avoid having to do synchronization
@@ -103,6 +104,87 @@ private[spark] class AppStatusListener(
     }
   }
 
+  // visible for tests
+  private[spark] def recoverLiveEntities(): Unit = {
+    if (!live) {
+      kvstore.view(classOf[JobDataWrapper])
+        .asScala.filter(_.info.status == JobExecutionStatus.RUNNING)
+        .map(_.toLiveJob).foreach(job => liveJobs.put(job.jobId, job))
+
+      kvstore.view(classOf[ExecutorSummaryWrapper]).asScala.filter(_.info.isActive)
+        .map(_.toLiveExecutor).foreach(exec => liveExecutors.put(exec.executorId, exec))
+
+      kvstore.view(classOf[ExecutorSummaryWrapper]).asScala.filter(!_.info.isActive)
+        .map(_.toLiveExecutor).foreach(exec => deadExecutors.put(exec.executorId, exec))
+
+      kvstore.view(classOf[StageDataWrapper]).asScala
+        .filter { stageData =>
+          stageData.info.status == v1.StageStatus.PENDING ||
 // [SPARK-24415] Wait for all tasks to finish before removing stage from live list 
 val removeStage = 
   stage.activeTasks == 0 && 
     (v1.StageStatus.COMPLETE.equals(stage.status) || 
       v1.StageStatus.FAILED.equals(stage.status)) 
 if (removeStage) { 
   update(stage, now, last = true) 
 } else { 
   maybeUpdate(stage, now) 
 } 
 // Remove stage only if there are no active tasks remaining 
 val removeStage = stage.activeTasks == 0 
 update(stage, now, last = removeStage) 
 if (removeStage) { 
   liveStages.remove((event.stageInfo.stageId, event.stageInfo.attemptNumber)) 
 } 
 // Check if there are any pending stages that match this job; mark those as skipped. 
 val it = liveStages.entrySet.iterator() 
 while (it.hasNext()) { 
   val e = it.next() 
   if (job.stageIds.contains(e.getKey()._1)) { 
     val stage = e.getValue() 
     if (v1.StageStatus.PENDING.equals(stage.status)) { 
       stage.status = v1.StageStatus.SKIPPED 
       job.skippedStages += stage.info.stageId 
       job.skippedTasks += stage.info.numTasks 
       job.activeStages -= 1 
       pools.get(stage.schedulingPool).foreach { pool => 
         pool.stageIds = pool.stageIds - stage.info.stageId 
         update(pool, now) 
       } 
       it.remove() 
       update(stage, now, last = true) 
     } 
   } 
 } 
 // [SPARK-24415] Wait for all tasks to finish before removing stage from live list 
 val removeStage = 
   stage.activeTasks == 0 && 
     (v1.StageStatus.COMPLETE.equals(stage.status) || 
       v1.StageStatus.FAILED.equals(stage.status)) 
 if (removeStage) { 
   update(stage, now, last = true) 
 } else { 
   maybeUpdate(stage, now) 
 } 
 // Remove stage only if there are no active tasks remaining 
 val removeStage = stage.activeTasks == 0 
 update(stage, now, last = removeStage) 
 if (removeStage) { 
   liveStages.remove((event.stageInfo.stageId, event.stageInfo.attemptNumber)) 
 } 
 // Check if there are any pending stages that match this job; mark those as skipped. 
 val it = liveStages.entrySet.iterator() 
 while (it.hasNext()) { 
   val e = it.next() 
   if (job.stageIds.contains(e.getKey()._1)) { 
     val stage = e.getValue() 
     if (v1.StageStatus.PENDING.equals(stage.status)) { 
       stage.status = v1.StageStatus.SKIPPED 
       job.skippedStages += stage.info.stageId 
       job.skippedTasks += stage.info.numTasks 
       job.activeStages -= 1 
  
       pools.get(stage.schedulingPool).foreach { pool => 
         pool.stageIds = pool.stageIds - stage.info.stageId 
         update(pool, now) 
       } 
  
       it.remove() 
       update(stage, now, last = true) 
     } 
   } 
 } 
+            stageData.info.status == v1.StageStatus.ACTIVE ||
+            (stageData.info.numActiveTasks > 0 && stageData.info.status != v1.StageStatus.SKIPPED)
+        }.map { stageData =>
+          val stageId = stageData.info.stageId
+          val jobs = liveJobs.values.filter(_.stageIds.contains(stageId)).toSeq
+          stageData.toLiveStage(jobs)
+        }.foreach { stage =>
+          val stageId = stage.info.stageId
+          val stageAttempt = stage.info.attemptNumber()
+          liveStages.put((stageId, stageAttempt), stage)
+
+          kvstore.view(classOf[ExecutorStageSummaryWrapper])
+            .index("stage")
+            .first(Array(stageId, stageAttempt))
+            .last(Array(stageId, stageAttempt))
+            .asScala
+            .map(_.toLiveExecutorStageSummary)
+            .foreach { esummary =>
+              stage.executorSummaries.put(esummary.executorId, esummary)
+              if (esummary.isBlacklisted) {
+                stage.blackListedExecutors += esummary.executorId
+                liveExecutors.get(esummary.executorId).foreach(_.isBlacklisted = true)
+                liveExecutors.get(esummary.executorId).foreach(_.blacklistedInStages += stageId)
+              }
+            }
+
+          kvstore.view(classOf[TaskDataWrapper])
+            .parent(Array(stageId, stageAttempt))
+            .index(TaskIndexNames.STATUS)
+            .first(TaskState.RUNNING.toString)
+            .last(TaskState.RUNNING.toString)
+            .asScala
+            .map(_.toLiveTask)
+            .foreach { task =>
+              liveTasks.put(task.info.taskId, task)
+              stage.activeTasksPerExecutor(task.info.executorId) += 1
+            }
+
+          stage.savedTasks.addAndGet(kvstore.count(classOf[TaskDataWrapper]).intValue())
+        }
+
+      kvstore.view(classOf[RDDStorageInfoWrapper]).asScala
+        .foreach { rddWrapper =>
+          val liveRdd = rddWrapper.toLiveRDD(liveExecutors)
+          liveRDDs.put(liveRdd.info.id, liveRdd)
+        }
+
+      kvstore.view(classOf[PoolData]).asScala.foreach { poolData =>
+        val schedulerPool = poolData.toSchedulerPool
+        pools.put(schedulerPool.name, schedulerPool)
+      }
+    }
+  }
+
+  // used for tests only
+  private[spark] def clearLiveEntities(): Unit = {
+    liveStages.clear()
+    liveJobs.clear()
+    liveExecutors.clear()
+    deadExecutors.clear()
+    liveTasks.clear()
+    liveRDDs.clear()
+    pools.clear()
+  }
+
   override def onOtherEvent(event: SparkListenerEvent): Unit = event match {
     case SparkListenerLogStart(version) => sparkVersion = version
     case _ =>
@@ -877,6 +959,12 @@ private[spark] class AppStatusListener(
     }
   }
 
+  // used in tests only
+  private[spark] def flush(): Unit = {
+    val now = System.nanoTime()
+    flush(update(_, now))
+  }
+
   /** Go through all `LiveEntity`s and use `entityFlushFunc(entity)` to flush them. */
   private def flush(entityFlushFunc: LiveEntity => Unit): Unit = {
     liveStages.values.asScala.foreach { stage =>

diff --git a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
@@ -59,21 +59,23 @@ private[spark] abstract class LiveEntity {
 
 }
 
-private class LiveJob(
+private[spark] class LiveJob(
     val jobId: Int,
-    name: String,
+    val name: String,
     val submissionTime: Option[Date],
     val stageIds: Seq[Int],
-    jobGroup: Option[String],
-    numTasks: Int,
-    sqlExecutionId: Option[Long]) extends LiveEntity {
+    val jobGroup: Option[String],
+    val numTasks: Int,
+    val sqlExecutionId: Option[Long]) extends LiveEntity {
 
   var activeTasks = 0
   var completedTasks = 0
   var failedTasks = 0
 
   // Holds both the stage ID and the task index, packed into a single long value.
   val completedIndices = new OpenHashSet[Long]()
+  // will only be set when recover LiveJob is needed.
+  var numCompletedIndices = 0
 
   var killedTasks = 0
   var killedSummary: Map[String, Int] = Map()
@@ -85,6 +87,8 @@ private class LiveJob(
   var completionTime: Option[Date] = None
 
   var completedStages: Set[Int] = Set()
+  // will only be set when recover LiveJob is needed.
+  var numCompletedStages = 0
   var activeStages = 0
   var failedStages = 0
 
@@ -104,9 +108,9 @@ private class LiveJob(
       skippedTasks,
       failedTasks,
       killedTasks,
-      completedIndices.size,
+      completedIndices.size + numCompletedIndices,
       activeStages,
-      completedStages.size,
+      completedStages.size + numCompletedStages,
       skippedStages.size,
       failedStages,
       killedSummary)
@@ -115,7 +119,7 @@ private class LiveJob(
 
 }
 
-private class LiveTask(
+private[spark] class LiveTask(
     var info: TaskInfo,
     stageId: Int,
     stageAttemptId: Int,
@@ -229,7 +233,7 @@ private class LiveTask(
 
 }
 
-private class LiveExecutor(val executorId: String, _addTime: Long) extends LiveEntity {
+private[spark] class LiveExecutor(val executorId: String, _addTime: Long) extends LiveEntity {
 
   var hostPort: String = null
   var host: String = null
@@ -272,7 +276,7 @@ private class LiveExecutor(val executorId: String, _addTime: Long) extends LiveE
   def hasMemoryInfo: Boolean = totalOnHeap >= 0L
 
   // peak values for executor level metrics
-  val peakExecutorMetrics = new ExecutorMetrics()
+  var peakExecutorMetrics = new ExecutorMetrics()
 
   def hostname: String = if (host != null) host else hostPort.split(":")(0)
 
@@ -316,10 +320,10 @@ private class LiveExecutor(val executorId: String, _addTime: Long) extends LiveE
   }
 }
 
-private class LiveExecutorStageSummary(
+private[spark] class LiveExecutorStageSummary(
     stageId: Int,
     attemptId: Int,
-    executorId: String) extends LiveEntity {
+    val executorId: String) extends LiveEntity {
 
   import LiveEntityHelpers._
 
@@ -353,7 +357,7 @@ private class LiveExecutorStageSummary(
 
 }
 
-private class LiveStage extends LiveEntity {
+private[spark] class LiveStage extends LiveEntity {
 
   import LiveEntityHelpers._
 
@@ -370,6 +374,8 @@ private class LiveStage extends LiveEntity {
   var completedTasks = 0
   var failedTasks = 0
   val completedIndices = new OpenHashSet[Int]()
+  // will only be set when recover LiveStage is needed.
+  var numCompletedIndices = 0
 
   var killedTasks = 0
   var killedSummary: Map[String, Int] = Map()
@@ -405,7 +411,7 @@ private class LiveStage extends LiveEntity {
       numCompleteTasks = completedTasks,
       numFailedTasks = failedTasks,
       numKilledTasks = killedTasks,
-      numCompletedIndices = completedIndices.size,
+      numCompletedIndices = completedIndices.size + numCompletedIndices,
 
       submissionTime = info.submissionTime.map(new Date(_)),
       firstTaskLaunchedTime =
@@ -464,7 +470,7 @@ private class LiveStage extends LiveEntity {
  * used by the partition in the executors, and thus may differ from the storage level requested
  * by the application.
  */
-private class LiveRDDPartition(val blockName: String, rddLevel: StorageLevel) {
+private[spark] class LiveRDDPartition(val blockName: String, rddLevel: StorageLevel) {
 
   import LiveEntityHelpers._
 
@@ -496,7 +502,7 @@ private class LiveRDDPartition(val blockName: String, rddLevel: StorageLevel) {
 
 }
 
-private class LiveRDDDistribution(exec: LiveExecutor) {
+private[spark] class LiveRDDDistribution(exec: LiveExecutor) {
 
   import LiveEntityHelpers._
 
@@ -513,6 +519,7 @@ private class LiveRDDDistribution(exec: LiveExecutor) {
   def toApi(): v1.RDDDataDistribution = {
     if (lastUpdate == null) {
       lastUpdate = new v1.RDDDataDistribution(
+        executorId,
         weakIntern(exec.hostPort),
         memoryUsed,
         exec.maxMemory - exec.memoryUsed,
@@ -535,18 +542,18 @@ private class LiveRDDDistribution(exec: LiveExecutor) {
  * RDDs, this covers the case where an early stage is run on the unpersisted RDD, and a later stage
  * it started after the RDD is marked for caching.
  */
-private class LiveRDD(val info: RDDInfo, storageLevel: StorageLevel) extends LiveEntity {
+private[spark] class LiveRDD(val info: RDDInfo, storageLevel: StorageLevel) extends LiveEntity {
 
   import LiveEntityHelpers._
 
   var memoryUsed = 0L
   var diskUsed = 0L
 
   private val levelDescription = weakIntern(storageLevel.description)
-  private val partitions = new HashMap[String, LiveRDDPartition]()
-  private val partitionSeq = new RDDPartitionSeq()
+  private[spark] val partitions = new HashMap[String, LiveRDDPartition]()
+  private[spark] val partitionSeq = new RDDPartitionSeq()
 
-  private val distributions = new HashMap[String, LiveRDDDistribution]()
+  private[spark] val distributions = new HashMap[String, LiveRDDDistribution]()
 
   def partition(blockName: String): LiveRDDPartition = {
     partitions.getOrElseUpdate(blockName, {
@@ -600,7 +607,7 @@ private class LiveRDD(val info: RDDInfo, storageLevel: StorageLevel) extends Liv
 
 }
 
-private class SchedulerPool(name: String) extends LiveEntity {
+private[spark] class SchedulerPool(val name: String) extends LiveEntity {
 
   var stageIds = Set[Int]()
 
@@ -750,7 +757,7 @@ private object LiveEntityHelpers {
  * Internally, the sequence is mutable, and elements can modify the data they expose. Additions and
  * removals are O(1). It is not safe to do multiple writes concurrently.
  */
-private class RDDPartitionSeq extends Seq[v1.RDDPartitionInfo] {
+private[spark] class RDDPartitionSeq extends Seq[v1.RDDPartitionInfo] {
 
   @volatile private var _head: LiveRDDPartition = null
   @volatile private var _tail: LiveRDDPartition = null

diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
@@ -31,6 +31,8 @@ import org.apache.spark.JobExecutionStatus
 import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.metrics.ExecutorMetricType
 import org.apache.spark.resource.ResourceInformation
+import org.apache.spark.status.{LiveExecutor, LiveRDDDistribution, LiveRDDPartition}
+import org.apache.spark.storage.StorageLevel
 
 case class ApplicationInfo private[spark](
     id: String,
@@ -181,6 +183,7 @@ class RDDStorageInfo private[spark](
     val partitions: Option[Seq[RDDPartitionInfo]])
 
 class RDDDataDistribution private[spark](
+    val executorId: String,
     val address: String,
     val memoryUsed: Long,
     val memoryRemaining: Long,
@@ -192,14 +195,35 @@ class RDDDataDistribution private[spark](
     @JsonDeserialize(contentAs = classOf[JLong])
     val onHeapMemoryRemaining: Option[Long],
     @JsonDeserialize(contentAs = classOf[JLong])
-    val offHeapMemoryRemaining: Option[Long])
+    val offHeapMemoryRemaining: Option[Long]) {
+
+  private[spark] def toLiveRDDDistribution(executors: scala.collection.Map[String, LiveExecutor])
+  : LiveRDDDistribution = {
+    val exec = executors.get(executorId).get
+    val liveRDDDistribution = new LiveRDDDistribution(exec)
+    liveRDDDistribution.memoryUsed = memoryUsed
+    liveRDDDistribution.diskUsed = diskUsed
+    liveRDDDistribution.onHeapUsed = onHeapMemoryUsed.getOrElse(0)
+    liveRDDDistribution.offHeapUsed = offHeapMemoryUsed.getOrElse(0)
+    liveRDDDistribution.lastUpdate = this
+    liveRDDDistribution
+  }
+}
 
 class RDDPartitionInfo private[spark](
     val blockName: String,
     val storageLevel: String,
     val memoryUsed: Long,
     val diskUsed: Long,
-    val executors: Seq[String])
+    val executors: Seq[String]) {
+
+  def toLiveRDDPartition: LiveRDDPartition = {
+    val liveRDDPartition = new LiveRDDPartition(blockName,
+      StorageLevel.fromDescription(storageLevel))
+    liveRDDPartition.value = this
+    liveRDDPartition
+  }
+}
 
 class StageData private[spark](
     val status: StageStatus,