@@ -417,6 +417,53 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
417
417
}
418
418
}
419
419
420
+ test(" Running tasks should be killed after first fetch failure" ) {
421
+ val rescheduleDelay = 300L
422
+ val conf = new SparkConf ().
423
+ set(" spark.scheduler.executorTaskBlacklistTime" , rescheduleDelay.toString).
424
+ // don't wait to jump locality levels in this test
425
+ set(" spark.locality.wait" , " 0" )
426
+
427
+ val killedTasks = new ArrayBuffer [Long ]
428
+ sc = new SparkContext (" local" , " test" , conf)
429
+ // two executors on same host, one on different.
430
+ val sched = new FakeTaskScheduler (sc, (" exec1" , " host1" ),
431
+ (" exec1.1" , " host1" ), (" exec2" , " host2" ))
432
+ sched.initialize(new FakeSchedulerBackend () {
433
+ override def killTask (
434
+ taskId : Long ,
435
+ executorId : String ,
436
+ interruptThread : Boolean ,
437
+ reason : String ): Unit = {
438
+ killedTasks += taskId
439
+ }
440
+ })
441
+ // affinity to exec1 on host1 - which we will fail.
442
+ val taskSet = FakeTask .createTaskSet(4 )
443
+ val clock = new ManualClock
444
+ clock.advance(1 )
445
+ val manager = new TaskSetManager (sched, taskSet, 4 , None , clock)
446
+
447
+ val offerResult1 = manager.resourceOffer(" exec1" , " host1" , ANY )
448
+ assert(offerResult1.isDefined, " Expect resource offer to return a task" )
449
+
450
+ assert(offerResult1.get.index === 0 )
451
+ assert(offerResult1.get.executorId === " exec1" )
452
+
453
+ val offerResult2 = manager.resourceOffer(" exec2" , " host2" , ANY )
454
+ assert(offerResult2.isDefined, " Expect resource offer to return a task" )
455
+
456
+ assert(offerResult2.get.index === 1 )
457
+ assert(offerResult2.get.executorId === " exec2" )
458
+ // At this point, we have 2 tasks running and 2 pending. First fetch failure should
459
+ // abort all the pending tasks but the running tasks should not be aborted.
460
+ assert(killedTasks.isEmpty)
461
+ manager.handleFailedTask(offerResult1.get.taskId, TaskState .FINISHED ,
462
+ FetchFailed (BlockManagerId (" exec-host2" , " host2" , 12345 ), 0 , 0 , 0 , " ignored" ))
463
+ assert(killedTasks.size === 1 )
464
+ assert(killedTasks(0 ) === offerResult2.get.taskId)
465
+ }
466
+
420
467
test(" executors should be blacklisted after task failure, in spite of locality preferences" ) {
421
468
val rescheduleDelay = 300L
422
469
val conf = new SparkConf ().
@@ -1107,6 +1154,13 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
1107
1154
set(config.BLACKLIST_ENABLED , true )
1108
1155
sc = new SparkContext (" local" , " test" , conf)
1109
1156
sched = new FakeTaskScheduler (sc, (" exec1" , " host1" ), (" exec2" , " host2" ))
1157
+ sched.initialize(new FakeSchedulerBackend () {
1158
+ override def killTask (
1159
+ taskId : Long ,
1160
+ executorId : String ,
1161
+ interruptThread : Boolean ,
1162
+ reason : String ): Unit = {}
1163
+ })
1110
1164
val taskSet = FakeTask .createTaskSet(4 )
1111
1165
val tsm = new TaskSetManager (sched, taskSet, 4 )
1112
1166
// we need a spy so we can attach our mock blacklist
0 commit comments