[SPARK-979] Randomize order of offers.

kayousterhout · markhamstra · commit 77d36568d986 · 2014-03-14T14:31:10.000-07:00
This commit randomizes the order of resource offers to avoid scheduling all tasks on the same small set of machines. This is a much simpler solution to SPARK-979 than alteryx#7. Author: Kay Ousterhout <kayousterhout@gmail.com> Closes alteryx#27 from kayousterhout/randomize and squashes the following commits: 435d817 [Kay Ousterhout] [SPARK-979] Randomize order of offers. Conflicts: core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterSchedulerSuite.scala core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala
@@ -24,6 +24,7 @@ import java.util.{TimerTask, Timer}
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashMap
 import scala.collection.mutable.HashSet
+import scala.util.Random
 
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
@@ -217,9 +218,11 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
       }
     }
 
-    // Build a list of tasks to assign to each worker
-    val tasks = offers.map(o => new ArrayBuffer[TaskDescription](o.cores))
-    val availableCpus = offers.map(o => o.cores).toArray
+    // Randomly shuffle offers to avoid always placing tasks on the same set of workers.
+    val shuffledOffers = Random.shuffle(offers)
+    // Build a list of tasks to assign to each worker.
+    val tasks = shuffledOffers.map(o => new ArrayBuffer[TaskDescription](o.cores))
+    val availableCpus = shuffledOffers.map(o => o.cores).toArray
     val sortedTaskSets = rootPool.getSortedTaskSetQueue()
     for (taskSet <- sortedTaskSets) {
       logDebug("parentName: %s, name: %s, runningTasks: %s".format(
@@ -232,9 +235,9 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
     for (taskSet <- sortedTaskSets; maxLocality <- TaskLocality.values) {
       do {
         launchedTask = false
-        for (i <- 0 until offers.size) {
-          val execId = offers(i).executorId
-          val host = offers(i).host
+        for (i <- 0 until shuffledOffers.size) {
+          val execId = shuffledOffers(i).executorId
+          val host = shuffledOffers(i).host
           for (task <- taskSet.resourceOffer(execId, host, availableCpus(i), maxLocality)) {
             tasks(i) += task
             val tid = task.taskId
diff --git a/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterSchedulerSuite.scala
@@ -27,6 +27,13 @@ import scala.collection.mutable.ArrayBuffer
 
 import java.util.Properties
 
+class FakeSchedulerBackend extends SchedulerBackend {
+  def start() {}
+  def stop() {}
+  def reviveOffers() {}
+  def defaultParallelism() = 1
+}
+
 class FakeTaskSetManager(
     initPriority: Int,
     initStageId: Int,
@@ -106,7 +113,8 @@ class FakeTaskSetManager(
 
 class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging {
 
-  def createDummyTaskSetManager(priority: Int, stage: Int, numTasks: Int, cs: ClusterScheduler, taskSet: TaskSet): FakeTaskSetManager = {
+  def createDummyTaskSetManager(priority: Int, stage: Int, numTasks: Int, cs: ClusterScheduler,
+      taskSet: TaskSet): FakeTaskSetManager = {
     new FakeTaskSetManager(priority, stage, numTasks, cs , taskSet)
   }
 
@@ -134,10 +142,7 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging
   test("FIFO Scheduler Test") {
     sc = new SparkContext("local", "ClusterSchedulerSuite")
     val clusterScheduler = new ClusterScheduler(sc)
-    var tasks = ArrayBuffer[Task[_]]()
-    val task = new FakeTask(0)
-    tasks += task
-    val taskSet = new TaskSet(tasks.toArray,0,0,0,null)
+    val taskSet = FakeTask.createTaskSet(1)
 
     val rootPool = new Pool("", SchedulingMode.FIFO, 0, 0)
     val schedulableBuilder = new FIFOSchedulableBuilder(rootPool)
@@ -161,10 +166,7 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging
   test("Fair Scheduler Test") {
     sc = new SparkContext("local", "ClusterSchedulerSuite")
     val clusterScheduler = new ClusterScheduler(sc)
-    var tasks = ArrayBuffer[Task[_]]()
-    val task = new FakeTask(0)
-    tasks += task
-    val taskSet = new TaskSet(tasks.toArray,0,0,0,null)
+    val taskSet = FakeTask.createTaskSet(1)
 
     val xmlPath = getClass.getClassLoader.getResource("fairscheduler.xml").getFile()
     System.setProperty("spark.scheduler.allocation.file", xmlPath)
@@ -218,10 +220,7 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging
   test("Nested Pool Test") {
     sc = new SparkContext("local", "ClusterSchedulerSuite")
     val clusterScheduler = new ClusterScheduler(sc)
-    var tasks = ArrayBuffer[Task[_]]()
-    val task = new FakeTask(0)
-    tasks += task
-    val taskSet = new TaskSet(tasks.toArray,0,0,0,null)
+    val taskSet = FakeTask.createTaskSet(1)
 
     val rootPool = new Pool("", SchedulingMode.FAIR, 0, 0)
     val pool0 = new Pool("0", SchedulingMode.FAIR, 3, 1)
@@ -264,4 +263,35 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging
     checkTaskSetId(rootPool, 6)
     checkTaskSetId(rootPool, 2)
   }
+
+  test("Scheduler does not always schedule tasks on the same workers") {
+    sc = new SparkContext("local", "ClusterSchedulerSuite")
+    val taskScheduler = new ClusterScheduler(sc) 
+    taskScheduler.initialize(new FakeSchedulerBackend)
+    // Need to initialize a DAGScheduler for the taskScheduler to use for callbacks.
+    var dagScheduler = new DAGScheduler(taskScheduler) {
+      override def taskStarted(task: Task[_], taskInfo: TaskInfo) {}
+      override def executorGained(execId: String, host: String) {}
+    }
+
+    val numFreeCores = 1
+    val workerOffers = Seq(new WorkerOffer("executor0", "host0", numFreeCores),
+      new WorkerOffer("executor1", "host1", numFreeCores))
+    // Repeatedly try to schedule a 1-task job, and make sure that it doesn't always
+    // get scheduled on the same executor. While there is a chance this test will fail
+    // because the task randomly gets placed on the first executor all 1000 times, the
+    // probability of that happening is 2^-1000 (so sufficiently small to be considered
+    // negligible).
+    val numTrials = 1000
+    val selectedExecutorIds = 1.to(numTrials).map { _ =>
+      val taskSet = FakeTask.createTaskSet(1)
+      taskScheduler.submitTasks(taskSet)
+      val taskDescriptions = taskScheduler.resourceOffers(workerOffers).flatten
+      assert(1 === taskDescriptions.length)
+      taskDescriptions(0).executorId
+    }
+    var count = selectedExecutorIds.count(_ == workerOffers(0).executorId)
+    assert(count > 0)
+    assert(count < numTrials)
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala
@@ -87,7 +87,7 @@ class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Lo
   test("TaskSet with no preferences") {
     sc = new SparkContext("local", "test")
     val sched = new FakeClusterScheduler(sc, ("exec1", "host1"))
-    val taskSet = createTaskSet(1)
+    val taskSet = FakeTask.createTaskSet(1)
     val manager = new ClusterTaskSetManager(sched, taskSet)
 
     // Offer a host with no CPUs
@@ -113,7 +113,7 @@ class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Lo
   test("multiple offers with no preferences") {
     sc = new SparkContext("local", "test")
     val sched = new FakeClusterScheduler(sc, ("exec1", "host1"))
-    val taskSet = createTaskSet(3)
+    val taskSet = FakeTask.createTaskSet(3)
     val manager = new ClusterTaskSetManager(sched, taskSet)
 
     // First three offers should all find tasks
@@ -144,7 +144,7 @@ class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Lo
   test("basic delay scheduling") {
     sc = new SparkContext("local", "test")
     val sched = new FakeClusterScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
-    val taskSet = createTaskSet(4,
+    val taskSet = FakeTask.createTaskSet(4,
       Seq(TaskLocation("host1", "exec1")),
       Seq(TaskLocation("host2", "exec2")),
       Seq(TaskLocation("host1"), TaskLocation("host2", "exec2")),
@@ -189,7 +189,7 @@ class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Lo
     sc = new SparkContext("local", "test")
     val sched = new FakeClusterScheduler(sc,
       ("exec1", "host1"), ("exec2", "host2"), ("exec3", "host3"))
-    val taskSet = createTaskSet(5,
+    val taskSet = FakeTask.createTaskSet(5,
       Seq(TaskLocation("host1")),
       Seq(TaskLocation("host2")),
       Seq(TaskLocation("host2")),
@@ -228,7 +228,7 @@ class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Lo
   test("delay scheduling with failed hosts") {
     sc = new SparkContext("local", "test")
     val sched = new FakeClusterScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
-    val taskSet = createTaskSet(3,
+    val taskSet = FakeTask.createTaskSet(3,
       Seq(TaskLocation("host1")),
       Seq(TaskLocation("host2")),
       Seq(TaskLocation("host3"))
@@ -260,7 +260,7 @@ class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Lo
   test("task result lost") {
     sc = new SparkContext("local", "test")
     val sched = new FakeClusterScheduler(sc, ("exec1", "host1"))
-    val taskSet = createTaskSet(1)
+    val taskSet = FakeTask.createTaskSet(1)
     val clock = new FakeClock
     val manager = new ClusterTaskSetManager(sched, taskSet, clock)
 
@@ -277,7 +277,7 @@ class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Lo
   test("repeated failures lead to task set abortion") {
     sc = new SparkContext("local", "test")
     val sched = new FakeClusterScheduler(sc, ("exec1", "host1"))
-    val taskSet = createTaskSet(1)
+    val taskSet = FakeTask.createTaskSet(1)
     val clock = new FakeClock
     val manager = new ClusterTaskSetManager(sched, taskSet, clock)
 
@@ -297,21 +297,6 @@ class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Lo
     }
   }
 
-
-  /**
-   * Utility method to create a TaskSet, potentially setting a particular sequence of preferred
-   * locations for each task (given as varargs) if this sequence is not empty.
-   */
-  def createTaskSet(numTasks: Int, prefLocs: Seq[TaskLocation]*): TaskSet = {
-    if (prefLocs.size != 0 && prefLocs.size != numTasks) {
-      throw new IllegalArgumentException("Wrong number of task locations")
-    }
-    val tasks = Array.tabulate[Task[_]](numTasks) { i =>
-      new FakeTask(i, if (prefLocs.size != 0) prefLocs(i) else Nil)
-    }
-    new TaskSet(tasks, 0, 0, 0, null)
-  }
-
   def createTaskResult(id: Int): DirectTaskResult[Int] = {
     new DirectTaskResult[Int](id, mutable.Map.empty, new TaskMetrics)
   }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/cluster/FakeTask.scala b/core/src/test/scala/org/apache/spark/scheduler/cluster/FakeTask.scala
@@ -18,10 +18,26 @@
 package org.apache.spark.scheduler.cluster
 
 import org.apache.spark.TaskContext
-import org.apache.spark.scheduler.{TaskLocation, Task}
+import org.apache.spark.scheduler.{TaskLocation, Task, TaskSet}
 
 class FakeTask(stageId: Int, prefLocs: Seq[TaskLocation] = Nil) extends Task[Int](stageId, 0) {
   override def runTask(context: TaskContext): Int = 0
 
   override def preferredLocations: Seq[TaskLocation] = prefLocs
 }
+
+object FakeTask {
+  /**
+   * Utility method to create a TaskSet, potentially setting a particular sequence of preferred
+   * locations for each task (given as varargs) if this sequence is not empty.
+   */
+  def createTaskSet(numTasks: Int, prefLocs: Seq[TaskLocation]*): TaskSet = {
+    if (prefLocs.size != 0 && prefLocs.size != numTasks) {
+      throw new IllegalArgumentException("Wrong number of task locations")
+    }
+    val tasks = Array.tabulate[Task[_]](numTasks) { i =>
+      new FakeTask(i, if (prefLocs.size != 0) prefLocs(i) else Nil)
+    }
+    new TaskSet(tasks, 0, 0, 0, null)
+  }
+}