WeichenXu123
diff --git a/‎core/src/main/resources/error/error-classes.json
+22-5 b/‎core/src/main/resources/error/error-classes.json
+22-5
diff --git a/‎sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java
+22-1 b/‎sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java
+22-1
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+1-1 b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+1-1
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+5-4 b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+5-4
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+12-2 b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+12-2
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+13-1 b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+13-1
diff --git a/‎sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
+5 b/‎sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
+5
diff --git a/‎sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
+3-1 b/‎sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
+3-1
diff --git a/‎sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala
+4-2 b/‎sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala
+4-2
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+6-2 b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+6-2
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEUtils.scala
+1-1 b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEUtils.scala
+1-1
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
+20-10 b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
+20-10
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewInRebalancePartitions.scala
+2-1 b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewInRebalancePartitions.scala
+2-1
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
+2 b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
+2
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DistributionAndOrderingUtils.scala
+11-2 b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DistributionAndOrderingUtils.scala
+11-2
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+3-2 b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+3-2
@@ -1063,6 +1063,28 @@
     ],
     "sqlState" : "42903"
   },
+  "INVALID_WRITE_DISTRIBUTION" : {
+    "message" : [
+      "The requested write distribution is invalid."
+    ],
+    "subClass" : {
+      "PARTITION_NUM_AND_SIZE" : {
+        "message" : [
+          "The partition number and advisory partition size can't be specified at the same time."
+        ]
+      },
+      "PARTITION_NUM_WITH_UNSPECIFIED_DISTRIBUTION" : {
+        "message" : [
+          "The number of partitions can't be specified with unspecified distribution."
+        ]
+      },
+      "PARTITION_SIZE_WITH_UNSPECIFIED_DISTRIBUTION" : {
+        "message" : [
+          "The advisory partition size can't be specified with unspecified distribution."
+        ]
+      }
+    }
+  },
   "LOCATION_ALREADY_EXISTS" : {
     "message" : [
       "Cannot name the managed table as <identifier>, as its associated location <location> already exists. Please pick a different table name, or remove the existing location first."
@@ -2931,11 +2953,6 @@
       "Unsupported data type <dataType>."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1178" : {
-    "message" : [
-      "The number of partitions can't be specified with unspecified distribution. Invalid writer requirements detected."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1181" : {
     "message" : [
       "Stream-stream join without equality predicate is not supported."
 
@@ -66,12 +66,33 @@ public interface RequiresDistributionAndOrdering extends Write {
    * <p>
    * Note that Spark doesn't support the number of partitions on {@link UnspecifiedDistribution},
    * the query will fail if the number of partitions are provided but the distribution is
-   * unspecified.
+   * unspecified. Data sources may either request a particular number of partitions or
+   * a preferred partition size via {@link #advisoryPartitionSizeInBytes}, not both.
    *
    * @return the required number of partitions, any value less than 1 mean no requirement.
    */
   default int requiredNumPartitions() { return 0; }
 
+  /**
+   * Returns the advisory (not guaranteed) shuffle partition size in bytes for this write.
+   * <p>
+   * Implementations may override this to indicate the preferable partition size in shuffles
+   * performed to satisfy the requested distribution. Note that Spark doesn't support setting
+   * the advisory partition size for {@link UnspecifiedDistribution}, the query will fail if
+   * the advisory partition size is set but the distribution is unspecified. Data sources may
+   * either request a particular number of partitions via {@link #requiredNumPartitions()} or
+   * a preferred partition size, not both.
+   * <p>
+   * Data sources should be careful with large advisory sizes as it will impact the writing
+   * parallelism and may degrade the overall job performance.
+   * <p>
+   * Note this value only acts like a guidance and Spark does not guarantee the actual and advisory
+   * shuffle partition sizes will match. Ignored if the adaptive execution is disabled.
+   *
+   * @return the advisory partition size, any value less than 1 means no preference.
+   */
+  default long advisoryPartitionSizeInBytes() { return 0; }
+
   /**
    * Returns the ordering required by this write.
    * <p>
 
@@ -1743,7 +1743,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       // table `t` even if there is a Project node between the table scan node and Sort node.
       // We also need to propagate the missing attributes from the descendant node to the current
       // node, and project them way at the end via an extra Project.
-      case r @ RepartitionByExpression(partitionExprs, child, _)
+      case r @ RepartitionByExpression(partitionExprs, child, _, _)
         if !r.resolved || r.missingInput.nonEmpty =>
         val resolvedNoOuter = partitionExprs.map(resolveExpressionByPlanChildren(_, r))
         val (newPartitionExprs, newChild) = resolveExprsAndAddMissingAttrs(resolvedNoOuter, child)
 
@@ -1202,15 +1202,16 @@ object CollapseRepartition extends Rule[LogicalPlan] {
     }
     // Case 2: When a RepartitionByExpression has a child of global Sort, Repartition or
     // RepartitionByExpression we can remove the child.
-    case r @ RepartitionByExpression(_, child @ (Sort(_, true, _) | _: RepartitionOperation), _) =>
+    case r @ RepartitionByExpression(
+        _, child @ (Sort(_, true, _) | _: RepartitionOperation), _, _) =>
       r.withNewChildren(child.children)
     // Case 3: When a RebalancePartitions has a child of local or global Sort, Repartition or
     // RepartitionByExpression we can remove the child.
-    case r @ RebalancePartitions(_, child @ (_: Sort | _: RepartitionOperation), _) =>
+    case r @ RebalancePartitions(_, child @ (_: Sort | _: RepartitionOperation), _, _) =>
       r.withNewChildren(child.children)
     // Case 4: When a RebalancePartitions has a child of RebalancePartitions we can remove the
     // child.
-    case r @ RebalancePartitions(_, child: RebalancePartitions, _) =>
+    case r @ RebalancePartitions(_, child: RebalancePartitions, _, _) =>
       r.withNewChildren(child.children)
   }
 }
@@ -1222,7 +1223,7 @@ object CollapseRepartition extends Rule[LogicalPlan] {
 object OptimizeRepartition extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
     _.containsPattern(REPARTITION_OPERATION), ruleId) {
-    case r @ RepartitionByExpression(partitionExpressions, _, numPartitions)
+    case r @ RepartitionByExpression(partitionExpressions, _, numPartitions, _)
       if partitionExpressions.nonEmpty && partitionExpressions.forall(_.foldable) &&
         numPartitions.isEmpty =>
       r.copy(optNumPartitions = Some(1))
 
@@ -1790,6 +1790,8 @@ trait HasPartitionExpressions extends SQLConfHelper {
 
   def optNumPartitions: Option[Int]
 
+  def optAdvisoryPartitionSize: Option[Long]
+
   protected def partitioning: Partitioning = if (partitionExpressions.isEmpty) {
     RoundRobinPartitioning(numPartitions)
   } else {
@@ -1820,7 +1822,11 @@ trait HasPartitionExpressions extends SQLConfHelper {
 case class RepartitionByExpression(
     partitionExpressions: Seq[Expression],
     child: LogicalPlan,
-    optNumPartitions: Option[Int]) extends RepartitionOperation with HasPartitionExpressions {
+    optNumPartitions: Option[Int],
+    optAdvisoryPartitionSize: Option[Long] = None)
+  extends RepartitionOperation with HasPartitionExpressions {
+
+  require(optNumPartitions.isEmpty || optAdvisoryPartitionSize.isEmpty)
 
   override val partitioning: Partitioning = {
     if (numPartitions == 1) {
@@ -1857,7 +1863,11 @@ object RepartitionByExpression {
 case class RebalancePartitions(
     partitionExpressions: Seq[Expression],
     child: LogicalPlan,
-    optNumPartitions: Option[Int] = None) extends UnaryNode with HasPartitionExpressions {
+    optNumPartitions: Option[Int] = None,
+    optAdvisoryPartitionSize: Option[Long] = None) extends UnaryNode with HasPartitionExpressions {
+
+  require(optNumPartitions.isEmpty || optAdvisoryPartitionSize.isEmpty)
+
   override def maxRows: Option[Long] = child.maxRows
   override def output: Seq[Attribute] = child.output
   override val nodePatterns: Seq[TreePattern] = Seq(REBALANCE_PARTITIONS)
 
@@ -1803,7 +1803,19 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
 
   def numberOfPartitionsNotAllowedWithUnspecifiedDistributionError(): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1178",
+      errorClass = "INVALID_WRITE_DISTRIBUTION.PARTITION_NUM_WITH_UNSPECIFIED_DISTRIBUTION",
+      messageParameters = Map.empty)
+  }
+
+  def partitionSizeNotAllowedWithUnspecifiedDistributionError(): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_WRITE_DISTRIBUTION.PARTITION_SIZE_WITH_UNSPECIFIED_DISTRIBUTION",
+      messageParameters = Map.empty)
+  }
+
+  def numberAndSizeOfPartitionsNotAllowedTogether(): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_WRITE_DISTRIBUTION.PARTITION_NUM_AND_SIZE",
       messageParameters = Map.empty)
   }
 
 
@@ -55,6 +55,7 @@ abstract class InMemoryBaseTable(
     val distribution: Distribution = Distributions.unspecified(),
     val ordering: Array[SortOrder] = Array.empty,
     val numPartitions: Option[Int] = None,
+    val advisoryPartitionSize: Option[Long] = None,
     val isDistributionStrictlyRequired: Boolean = true,
     val numRowsPerSplit: Int = Int.MaxValue)
   extends Table with SupportsRead with SupportsWrite with SupportsMetadataColumns {
@@ -450,6 +451,10 @@ abstract class InMemoryBaseTable(
         numPartitions.getOrElse(0)
       }
 
+      override def advisoryPartitionSizeInBytes(): Long = {
+        advisoryPartitionSize.getOrElse(0)
+      }
+
       override def toBatch: BatchWrite = writer
 
       override def toStreaming: StreamingWrite = streamingWriter match {
 
@@ -39,10 +39,12 @@ class InMemoryTable(
     distribution: Distribution = Distributions.unspecified(),
     ordering: Array[SortOrder] = Array.empty,
     numPartitions: Option[Int] = None,
+    advisoryPartitionSize: Option[Long] = None,
     isDistributionStrictlyRequired: Boolean = true,
     override val numRowsPerSplit: Int = Int.MaxValue)
   extends InMemoryBaseTable(name, schema, partitioning, properties, distribution,
-    ordering, numPartitions, isDistributionStrictlyRequired, numRowsPerSplit) with SupportsDelete {
+    ordering, numPartitions, advisoryPartitionSize, isDistributionStrictlyRequired,
+    numRowsPerSplit) with SupportsDelete {
 
   override def canDeleteWhere(filters: Array[Filter]): Boolean = {
     InMemoryTable.supportsFilters(filters)
 
@@ -91,7 +91,7 @@ class BasicInMemoryTableCatalog extends TableCatalog {
       partitions: Array[Transform],
       properties: util.Map[String, String]): Table = {
     createTable(ident, schema, partitions, properties, Distributions.unspecified(),
-      Array.empty, None)
+      Array.empty, None, None)
   }
 
   override def createTable(
@@ -111,6 +111,7 @@ class BasicInMemoryTableCatalog extends TableCatalog {
       distribution: Distribution,
       ordering: Array[SortOrder],
       requiredNumPartitions: Option[Int],
+      advisoryPartitionSize: Option[Long],
       distributionStrictlyRequired: Boolean = true,
       numRowsPerSplit: Int = Int.MaxValue): Table = {
     if (tables.containsKey(ident)) {
@@ -121,7 +122,8 @@ class BasicInMemoryTableCatalog extends TableCatalog {
 
     val tableName = s"$name.${ident.quoted}"
     val table = new InMemoryTable(tableName, schema, partitions, properties, distribution,
-      ordering, requiredNumPartitions, distributionStrictlyRequired, numRowsPerSplit)
+      ordering, requiredNumPartitions, advisoryPartitionSize, distributionStrictlyRequired,
+      numRowsPerSplit)
     tables.put(ident, table)
     namespaces.putIfAbsent(ident.namespace.toList, Map())
     table
 
@@ -893,14 +893,18 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         } else {
           REPARTITION_BY_NUM
         }
-        exchange.ShuffleExchangeExec(r.partitioning, planLater(r.child), shuffleOrigin) :: Nil
+        exchange.ShuffleExchangeExec(
+          r.partitioning, planLater(r.child),
+          shuffleOrigin, r.optAdvisoryPartitionSize) :: Nil
       case r: logical.RebalancePartitions =>
         val shuffleOrigin = if (r.partitionExpressions.isEmpty) {
           REBALANCE_PARTITIONS_BY_NONE
         } else {
           REBALANCE_PARTITIONS_BY_COL
         }
-        exchange.ShuffleExchangeExec(r.partitioning, planLater(r.child), shuffleOrigin) :: Nil
+        exchange.ShuffleExchangeExec(
+          r.partitioning, planLater(r.child),
+          shuffleOrigin, r.optAdvisoryPartitionSize) :: Nil
       case ExternalRDD(outputObjAttr, rdd) => ExternalRDDScanExec(outputObjAttr, rdd) :: Nil
       case r: LogicalRDD =>
         RDDScanExec(r.output, r.rdd, "ExistingRDD", r.outputPartitioning, r.outputOrdering) :: Nil
 
@@ -30,7 +30,7 @@ object AQEUtils {
     // Project/Filter/LocalSort/CollectMetrics.
     // Note: we only care about `HashPartitioning` as `EnsureRequirements` can only optimize out
     // user-specified repartition with `HashPartitioning`.
-    case ShuffleExchangeExec(h: HashPartitioning, _, shuffleOrigin)
+    case ShuffleExchangeExec(h: HashPartitioning, _, shuffleOrigin, _)
         if shuffleOrigin == REPARTITION_BY_COL || shuffleOrigin == REPARTITION_BY_NUM =>
       val numPartitions = if (shuffleOrigin == REPARTITION_BY_NUM) {
         Some(h.numPartitions)
 
@@ -64,16 +64,6 @@ case class CoalesceShufflePartitions(session: SparkSession) extends AQEShuffleRe
         1
       }
     }
-    val advisoryTargetSize = conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES)
-    val minPartitionSize = if (Utils.isTesting) {
-      // In the tests, we usually set the target size to a very small value that is even smaller
-      // than the default value of the min partition size. Here we also adjust the min partition
-      // size to be not larger than 20% of the target size, so that the tests don't need to set
-      // both configs all the time to check the coalescing behavior.
-      conf.getConf(SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_SIZE).min(advisoryTargetSize / 5)
-    } else {
-      conf.getConf(SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_SIZE)
-    }
 
     // Sub-plans under the Union operator can be coalesced independently, so we can divide them
     // into independent "coalesce groups", and all shuffle stages within each group have to be
@@ -100,6 +90,17 @@ case class CoalesceShufflePartitions(session: SparkSession) extends AQEShuffleRe
     val specsMap = mutable.HashMap.empty[Int, Seq[ShufflePartitionSpec]]
     // Coalesce partitions for each coalesce group independently.
     coalesceGroups.zip(minNumPartitionsByGroup).foreach { case (shuffleStages, minNumPartitions) =>
+      val advisoryTargetSize = advisoryPartitionSize(shuffleStages)
+      val minPartitionSize = if (Utils.isTesting) {
+        // In the tests, we usually set the target size to a very small value that is even smaller
+        // than the default value of the min partition size. Here we also adjust the min partition
+        // size to be not larger than 20% of the target size, so that the tests don't need to set
+        // both configs all the time to check the coalescing behavior.
+        conf.getConf(SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_SIZE).min(advisoryTargetSize / 5)
+      } else {
+        conf.getConf(SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_SIZE)
+      }
+
       val newPartitionSpecs = ShufflePartitionsUtil.coalescePartitions(
         shuffleStages.map(_.shuffleStage.mapStats),
         shuffleStages.map(_.partitionSpecs),
@@ -121,6 +122,15 @@ case class CoalesceShufflePartitions(session: SparkSession) extends AQEShuffleRe
     }
   }
 
+  private def advisoryPartitionSize(shuffleStages: Seq[ShuffleStageInfo]): Long = {
+    val advisorySizes = shuffleStages.flatMap(_.shuffleStage.advisoryPartitionSize).toSet
+    if (advisorySizes.size == 1) {
+      advisorySizes.head
+    } else {
+      conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES)
+    }
+  }
+
   /**
    * Gather all coalesce-able groups such that the shuffle stages in each child of a Union operator
    * are in their independent groups if:
 
@@ -69,7 +69,8 @@ object OptimizeSkewInRebalancePartitions extends AQEShuffleReadRule {
   }
 
   private def tryOptimizeSkewedPartitions(shuffle: ShuffleQueryStageExec): SparkPlan = {
-    val advisorySize = conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES)
+    val defaultAdvisorySize = conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES)
+    val advisorySize = shuffle.advisoryPartitionSize.getOrElse(defaultAdvisorySize)
     val mapStats = shuffle.mapStats
     if (mapStats.isEmpty ||
       mapStats.get.bytesByPartitionId.forall(_ <= advisorySize)) {
 
@@ -180,6 +180,8 @@ case class ShuffleQueryStageExec(
       throw new IllegalStateException(s"wrong plan for shuffle stage:\n ${plan.treeString}")
   }
 
+  @transient val advisoryPartitionSize: Option[Long] = shuffle.advisoryPartitionSize
+
   @transient private lazy val shuffleFuture = shuffle.submitShuffleJob
 
   override protected def doMaterialize(): Future[Any] = shuffleFuture
 
@@ -36,6 +36,7 @@ object DistributionAndOrderingUtils {
       funCatalogOpt: Option[FunctionCatalog]): LogicalPlan = write match {
     case write: RequiresDistributionAndOrdering =>
       val numPartitions = write.requiredNumPartitions()
+      val partitionSize = write.advisoryPartitionSizeInBytes()
 
       val distribution = write.requiredDistribution match {
         case d: OrderedDistribution =>
@@ -49,17 +50,25 @@ object DistributionAndOrderingUtils {
 
       val queryWithDistribution = if (distribution.nonEmpty) {
         val optNumPartitions = if (numPartitions > 0) Some(numPartitions) else None
+        val optPartitionSize = if (partitionSize > 0) Some(partitionSize) else None
+
+        if (optNumPartitions.isDefined && optPartitionSize.isDefined) {
+          throw QueryCompilationErrors.numberAndSizeOfPartitionsNotAllowedTogether()
+        }
+
         // the conversion to catalyst expressions above produces SortOrder expressions
         // for OrderedDistribution and generic expressions for ClusteredDistribution
         // this allows RebalancePartitions/RepartitionByExpression to pick either
         // range or hash partitioning
         if (write.distributionStrictlyRequired()) {
-          RepartitionByExpression(distribution, query, optNumPartitions)
+          RepartitionByExpression(distribution, query, optNumPartitions, optPartitionSize)
         } else {
-          RebalancePartitions(distribution, query, optNumPartitions)
+          RebalancePartitions(distribution, query, optNumPartitions, optPartitionSize)
         }
       } else if (numPartitions > 0) {
         throw QueryCompilationErrors.numberOfPartitionsNotAllowedWithUnspecifiedDistributionError()
+      } else if (partitionSize > 0) {
+        throw QueryCompilationErrors.partitionSizeNotAllowedWithUnspecifiedDistributionError()
       } else {
         query
       }
 
@@ -188,7 +188,8 @@ case class EnsureRequirements(
             }
 
             child match {
-              case ShuffleExchangeExec(_, c, so) => ShuffleExchangeExec(newPartitioning, c, so)
+              case ShuffleExchangeExec(_, c, so, ps) =>
+                ShuffleExchangeExec(newPartitioning, c, so, ps)
               case _ => ShuffleExchangeExec(newPartitioning, child)
             }
           }
@@ -578,7 +579,7 @@ case class EnsureRequirements(
 
   def apply(plan: SparkPlan): SparkPlan = {
     val newPlan = plan.transformUp {
-      case operator @ ShuffleExchangeExec(upper: HashPartitioning, child, shuffleOrigin)
+      case operator @ ShuffleExchangeExec(upper: HashPartitioning, child, shuffleOrigin, _)
           if optimizeOutRepartition &&
             (shuffleOrigin == REPARTITION_BY_COL || shuffleOrigin == REPARTITION_BY_NUM) =>
         def hasSemanticEqualPartitioning(partitioning: Partitioning): Boolean = {
Original file line number	Diff line number	Diff line change
`@@ -69,7 +69,8 @@ object OptimizeSkewInRebalancePartitions extends AQEShuffleReadRule {`
`69`	`69`	`}`
`70`	`70`
`71`	`71`	`private def tryOptimizeSkewedPartitions(shuffle: ShuffleQueryStageExec): SparkPlan = {`
`72`		`- val advisorySize = conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES)`
	`72`	`+ val defaultAdvisorySize = conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES)`
	`73`	`+ val advisorySize = shuffle.advisoryPartitionSize.getOrElse(defaultAdvisorySize)`
`73`	`74`	`val mapStats = shuffle.mapStats`
`74`	`75`	`if (mapStats.isEmpty \|\|`
`75`	`76`	`mapStats.get.bytesByPartitionId.forall(_ <= advisorySize)) {`
Original file line number	Diff line number	Diff line change
`@@ -180,6 +180,8 @@ case class ShuffleQueryStageExec(`
`180`	`180`	`throw new IllegalStateException(s"wrong plan for shuffle stage:\n ${plan.treeString}")`
`181`	`181`	`}`
`182`	`182`
	`183`	`+ @transient val advisoryPartitionSize: Option[Long] = shuffle.advisoryPartitionSize`
	`184`	`+`
`183`	`185`	`@transient private lazy val shuffleFuture = shuffle.submitShuffleJob`
`184`	`186`
`185`	`187`	`override protected def doMaterialize(): Future[Any] = shuffleFuture`
Original file line number	Diff line number	Diff line change
`@@ -188,7 +188,8 @@ case class EnsureRequirements(`
`188`	`188`	`}`
`189`	`189`
`190`	`190`	`child match {`
`191`		`- case ShuffleExchangeExec(_, c, so) => ShuffleExchangeExec(newPartitioning, c, so)`
	`191`	`+ case ShuffleExchangeExec(_, c, so, ps) =>`
	`192`	`+ ShuffleExchangeExec(newPartitioning, c, so, ps)`
`192`	`193`	`case _ => ShuffleExchangeExec(newPartitioning, child)`
`193`	`194`	`}`
`194`	`195`	`}`
`@@ -578,7 +579,7 @@ case class EnsureRequirements(`
`578`	`579`
`579`	`580`	`def apply(plan: SparkPlan): SparkPlan = {`
`580`	`581`	`val newPlan = plan.transformUp {`
`581`		`- case operator @ ShuffleExchangeExec(upper: HashPartitioning, child, shuffleOrigin)`
	`582`	`+ case operator @ ShuffleExchangeExec(upper: HashPartitioning, child, shuffleOrigin, _)`
`582`	`583`	`if optimizeOutRepartition &&`
`583`	`584`	`(shuffleOrigin == REPARTITION_BY_COL \|\| shuffleOrigin == REPARTITION_BY_NUM) =>`
`584`	`585`	`def hasSemanticEqualPartitioning(partitioning: Partitioning): Boolean = {`