@holdenk's comments around comments and naming

dagrawal3409 · dagrawal3409 · commit 11e4adcb1763 · 2020-08-17T16:51:56.000-07:00
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -294,6 +294,8 @@ private[spark] class CoarseGrainedExecutorBackend(
         override def run(): Unit = {
           var lastTaskRunningTime = System.nanoTime()
           val sleep_time = 1000 // 1s
+          // This config is internal and only used by unit tests to force an executor
+          // to hang around for longer when decommissioned.
           val initialSleepMillis = env.conf.getInt(
             "spark.executor.decommission.initial.sleep.millis", sleep_time)
           if (initialSleepMillis > 0) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -1847,7 +1847,13 @@ private[spark] class DAGScheduler(
               fileLost = true,
               hostToUnregisterOutputs = hostToUnregisterOutputs,
               maybeEpoch = Some(task.epoch),
-              ignoreShuffleVersion = isHostDecommissioned)
+              // shuffleFileLostEpoch is ignored when a host is decommissioned because some
+              // decommissioned executors on that host might have been removed before this fetch
+              // failure and might have bumped up the shuffleFileLostEpoch. We ignore that, and
+              // proceed with unconditional removal of shuffle outputs from all executors on that
+              // host, including from those that we still haven't confirmed as lost due to heartbeat
+              // delays.
+              ignoreShuffleFileLostEpoch = isHostDecommissioned)
           }
         }
 
@@ -2014,7 +2020,7 @@ private[spark] class DAGScheduler(
       fileLost: Boolean,
       hostToUnregisterOutputs: Option[String],
       maybeEpoch: Option[Long] = None,
-      ignoreShuffleVersion: Boolean = false): Unit = {
+      ignoreShuffleFileLostEpoch: Boolean = false): Unit = {
     val currentEpoch = maybeEpoch.getOrElse(mapOutputTracker.getEpoch)
     logDebug(s"Considering removal of executor $execId; " +
       s"fileLost: $fileLost, currentEpoch: $currentEpoch")
@@ -2025,7 +2031,7 @@ private[spark] class DAGScheduler(
       clearCacheLocs()
     }
     if (fileLost) {
-      val remove = if (ignoreShuffleVersion) {
+      val remove = if (ignoreShuffleFileLostEpoch) {
         true
       } else if (!shuffleFileLostEpoch.contains(execId) ||
         shuffleFileLostEpoch(execId) < currentEpoch) {
diff --git a/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala
@@ -236,6 +236,8 @@ class DecommissionWorkerSuite
       val jobResult = sc.parallelize(1 to 2, 2).mapPartitionsWithIndex((_, _) => {
         val executorId = SparkEnv.get.executorId
         val context = TaskContext.get()
+        // Only sleep in the first attempt to create the required window for decommissioning.
+        // Subsequent attempts don't need to be delayed to speed up the test.
         if (context.attemptNumber() == 0 && context.stageAttemptNumber() == 0) {
           val sleepTimeSeconds = if (executorId == executorToDecom) 10 else 1
           Thread.sleep(sleepTimeSeconds * 1000L)