fix test

carsonwang · carsonwang · commit e4bfc22b7ee1 · 2019-02-27T18:08:48.000+08:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
@@ -54,7 +54,7 @@ private[execution] object SparkPlanInfo {
     val children = plan match {
       case ReusedExchangeExec(_, child) => child :: Nil
       case a: AdaptiveSparkPlanExec => a.finalPlan.plan :: Nil
-      case stage: QueryFragmentExec => stage.plan :: Nil
+      case fragment: QueryFragmentExec => fragment.plan :: Nil
       case _ => plan.children ++ plan.subqueries
     }
     val metrics = plan.metrics.toSeq.map { case (key, metric) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryFragmentExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryFragmentExec.scala
@@ -139,7 +139,7 @@ case class BroadcastQueryFragmentExec(id: Int, plan: BroadcastExchangeExec)
  * A wrapper of QueryFragment to indicate that it's reused. Note that this is not a query fragment.
  */
 case class ReusedQueryFragmentExec(child: QueryFragmentExec, output: Seq[Attribute])
-  extends UnaryExecNode {
+  extends LeafExecNode {
 
   // Ignore this wrapper for canonicalizing.
   override def doCanonicalize(): SparkPlan = child.canonicalized
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/rule/AssertChildFragmentsMaterialized.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/rule/AssertChildFragmentsMaterialized.scala
@@ -21,8 +21,8 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.adaptive.QueryFragmentExec
 
-// A sanity check rule to make sure we are running query stage optimizer rules on a sub-tree of
-// query plan with all input stages materialized.
+// A sanity check rule to make sure we are running query fragment optimizer rules on a sub-tree of
+// query plan with all input fragments materialized.
 object AssertChildFragmentsMaterialized extends Rule[SparkPlan] {
   override def apply(plan: SparkPlan): SparkPlan = plan.transform {
     case q: QueryFragmentExec if !q.materialize().isCompleted =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/rule/ReduceNumShufflePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/rule/ReduceNumShufflePartitions.scala
@@ -17,17 +17,17 @@
 
 package org.apache.spark.sql.execution.adaptive.rule
 
-import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration.Duration
 
 import org.apache.spark.MapOutputStatistics
+import org.apache.spark.SparkException
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, UnknownPartitioning}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{ShuffledRowRDD, SparkPlan, UnaryExecNode}
-import org.apache.spark.sql.execution.adaptive.{QueryFragmentExec, ShuffleQueryFragmentExec}
+import org.apache.spark.sql.execution.adaptive.{QueryFragmentExec, ReusedQueryFragmentExec, ShuffleQueryFragmentExec}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.ThreadUtils
 
@@ -61,7 +61,9 @@ case class ReduceNumShufflePartitions(conf: SQLConf) extends Rule[SparkPlan] {
         ThreadUtils.awaitResult(metricsFuture, Duration.Zero)
     }
 
-    val allFragmentLeaves = plan.collectLeaves().forall(_.isInstanceOf[QueryFragmentExec])
+    val allFragmentLeaves = plan.collectLeaves().forall { node =>
+      node.isInstanceOf[QueryFragmentExec] || node.isInstanceOf[ReusedQueryFragmentExec]
+    }
 
     if (allFragmentLeaves) {
       // ShuffleQueryFragment gives null mapOutputStatistics when the input RDD has 0 partitions,
@@ -76,6 +78,8 @@ case class ReduceNumShufflePartitions(conf: SQLConf) extends Rule[SparkPlan] {
           // number of output partitions.
           case fragment: ShuffleQueryFragmentExec =>
             CoalescedShuffleReaderExec(fragment, partitionStartIndices)
+          case r@ReusedQueryFragmentExec(fragment: ShuffleQueryFragmentExec, output) =>
+            CoalescedShuffleReaderExec(r, partitionStartIndices)
         }
       } else {
         plan
@@ -152,7 +156,9 @@ case class ReduceNumShufflePartitions(conf: SQLConf) extends Rule[SparkPlan] {
         partitionStartIndices += i
         // reset postShuffleInputSize.
         postShuffleInputSize = nextShuffleInputSize
-      } else postShuffleInputSize += nextShuffleInputSize
+      } else {
+        postShuffleInputSize += nextShuffleInputSize
+      }
 
       i += 1
     }
@@ -162,7 +168,7 @@ case class ReduceNumShufflePartitions(conf: SQLConf) extends Rule[SparkPlan] {
 }
 
 case class CoalescedShuffleReaderExec(
-    child: ShuffleQueryFragmentExec,
+    child: SparkPlan,
     partitionStartIndices: Array[Int]) extends UnaryExecNode {
 
   override def output: Seq[Attribute] = child.output
@@ -175,7 +181,13 @@ case class CoalescedShuffleReaderExec(
 
   override protected def doExecute(): RDD[InternalRow] = {
     if (cachedShuffleRDD == null) {
-      cachedShuffleRDD = child.plan.createShuffledRDD(Some(partitionStartIndices))
+      cachedShuffleRDD = child match {
+        case fragment: ShuffleQueryFragmentExec =>
+          fragment.plan.createShuffledRDD(Some(partitionStartIndices))
+        case ReusedQueryFragmentExec(fragment: ShuffleQueryFragmentExec, _) =>
+          fragment.plan.createShuffledRDD(Some(partitionStartIndices))
+        case _ => throw new SparkException("Invalid child for CoalescedShuffleReaderExec")
+      }
     }
     cachedShuffleRDD
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ReduceNumShufflePartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ReduceNumShufflePartitionsSuite.scala
@@ -558,7 +558,10 @@ class ReduceNumShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterA
 
       val leafFragments = level1Fragments.flatMap { fragment =>
         // All of the child fragments of result fragment have only one child fragment.
-        val children = fragment.plan.collect { case q: QueryFragmentExec => q }
+        val children = fragment.plan.collect {
+          case q: QueryFragmentExec => q
+          case r: ReusedQueryFragmentExec => r.child
+        }
         assert(children.length == 1)
         children
       }

Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,7 @@ private[execution] object SparkPlanInfo {`
`54`	`54`	`val children = plan match {`
`55`	`55`	`case ReusedExchangeExec(_, child) => child :: Nil`
`56`	`56`	`case a: AdaptiveSparkPlanExec => a.finalPlan.plan :: Nil`
`57`		`- case stage: QueryFragmentExec => stage.plan :: Nil`
	`57`	`+ case fragment: QueryFragmentExec => fragment.plan :: Nil`
`58`	`58`	`case _ => plan.children ++ plan.subqueries`
`59`	`59`	`}`
`60`	`60`	`val metrics = plan.metrics.toSeq.map { case (key, metric) =>`