[CARMEL-7589][CARMEL-1376][CARMEL-4216] Limit the max numbers of task… (apache#216)

fenzhu · GitHub Enterprise · commit 5e3615d838a8 · 2024-02-26T20:45:35.000-06:00
[CARMEL-7589][CARMEL-1376][CARMEL-4216] Limit the max numbers of tasks that one stage could generate
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -41,7 +41,7 @@ import org.apache.spark.errors.SparkCoreErrors
 import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config
-import org.apache.spark.internal.config.{JOB_GROUP_MAX_SHUFFLE_SIZE, RDD_CACHE_VISIBILITY_TRACKING_ENABLED, REMOVE_EXECUTOR_ON_FETCH_FAILURE, TASK_SUBMISSION_ASYNC}
+import org.apache.spark.internal.config.{JOB_GROUP_MAX_SHUFFLE_SIZE, RDD_CACHE_VISIBILITY_TRACKING_ENABLED, RDD_MAX_PARTITIONS, REMOVE_EXECUTOR_ON_FETCH_FAILURE, TASK_SUBMISSION_ASYNC}
 import org.apache.spark.internal.config.Tests.TEST_NO_STAGE_RETRY
 import org.apache.spark.network.shuffle.{BlockStoreClient, MergeFinalizerListener}
 import org.apache.spark.network.shuffle.protocol.MergeStatuses
@@ -225,6 +225,8 @@ private[spark] class DAGScheduler(
   /** If enabled, FetchFailed will not cause stage retry, in order to surface the problem. */
   private val disallowStageRetryForTest = sc.getConf.get(TEST_NO_STAGE_RETRY)
 
+  private val MAX_PARTITIONS_IN_STAGE = sc.getConf.get(RDD_MAX_PARTITIONS)
+
   private val removeExecutorOnFetchFailure = sc.getConf.get(REMOVE_EXECUTOR_ON_FETCH_FAILURE)
 
   private val shouldMergeResourceProfiles = sc.getConf.get(config.RESOURCE_PROFILE_MERGE_CONFLICTS)
@@ -674,6 +676,10 @@ private[spark] class DAGScheduler(
     checkBarrierStageWithNumSlots(rdd, resourceProfile)
     checkBarrierStageWithRDDChainPattern(rdd, rdd.getNumPartitions)
     val numTasks = rdd.partitions.length
+    if (numTasks > MAX_PARTITIONS_IN_STAGE) {
+      throw new SparkException(s"RDD Partitions have reached the max limitation " +
+        s"$MAX_PARTITIONS_IN_STAGE, increase ${RDD_MAX_PARTITIONS.key} to work around.")
+    }
     val parents = getOrCreateParentStages(shuffleDeps, jobId)
     val id = nextStageId.getAndIncrement()
     val stage = new ShuffleMapStage(
@@ -809,6 +815,12 @@ private[spark] class DAGScheduler(
     checkBarrierStageWithRDDChainPattern(rdd, partitions.toSet.size)
     val parents = getOrCreateParentStages(shuffleDeps, jobId)
     val id = nextStageId.getAndIncrement()
+    // Use `partitions.length` instead of `rdd.partitions.length` to
+    // skip SELECT * table LIMIT operation
+    if (partitions.length > MAX_PARTITIONS_IN_STAGE) {
+      throw new SparkException(s"RDD Partitions have reached the max limitation " +
+        s"$MAX_PARTITIONS_IN_STAGE, increase ${RDD_MAX_PARTITIONS.key} to work around.")
+    }
     val stage = new ResultStage(id, rdd, func, partitions, parents, jobId,
       callSite, resourceProfile.id, resultSpillContext)
     stageIdToStage(id) = stage
@@ -1324,6 +1336,10 @@ private[spark] class DAGScheduler(
     if (rdd.partitions.length == 0) {
       throw SparkCoreErrors.cannotRunSubmitMapStageOnZeroPartitionRDDError()
     }
+    if (rdd.partitions.length > MAX_PARTITIONS_IN_STAGE) {
+      throw new SparkException(s"RDD Partitions have reached the max limitation " +
+        s"$MAX_PARTITIONS_IN_STAGE, increase ${RDD_MAX_PARTITIONS.key} to work around.")
+    }
 
     // SPARK-23626: `RDD.getPartitions()` can be slow, so we eagerly compute
     // `.partitions` on every RDD in the DAG to ensure that `getPartitions()`
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -41,7 +41,7 @@ import org.apache.spark._
 import org.apache.spark.broadcast.BroadcastManager
 import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
 import org.apache.spark.internal.config
-import org.apache.spark.internal.config.{SCHEDULER_ANALYTICS_TASK_SCHEDULER, TASK_SUBMISSION_ASYNC, Tests}
+import org.apache.spark.internal.config.{RDD_MAX_PARTITIONS, SCHEDULER_ANALYTICS_TASK_SCHEDULER, TASK_SUBMISSION_ASYNC, Tests}
 import org.apache.spark.network.shuffle.ExternalBlockStoreClient
 import org.apache.spark.rdd.{DeterministicLevel, RDD}
 import org.apache.spark.resource.{ExecutorResourceRequests, ResourceProfile, ResourceProfileBuilder, TaskResourceProfile, TaskResourceRequests}
@@ -981,6 +981,18 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     assertDataStructuresEmpty()
   }
 
+  test("test job abort if the task reached the limitation") {
+    conf.set(RDD_MAX_PARTITIONS.key, "3")
+    val e = intercept[SparkException] {
+      val rdd = sc.parallelize(1 to 10, 4)
+      rdd.collect() === Array(1, 2)
+    }.getMessage
+    assert(e.contains("RDD Partitions have reached the max limitation"))
+    // Max partition restriction should skip LIMIT operation
+    val rdd = sc.parallelize(1 to 10, 4)
+    assert(rdd.take(2) === Array(1, 2))
+  }
+
   private val shuffleFileLossTests = Seq(
     ("executor process lost with shuffle service", ExecutorProcessLost("", None), true, false),
     ("worker lost with shuffle service", ExecutorProcessLost("", Some("hostA")), true, true),