move ReuseMap to catalyst util, add docs

peter-toth · peter-toth · commit fe78a316df3d · 2020-06-29T14:02:44.000+02:00
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ReuseMap.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ReuseMap.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.util
+
+import scala.collection.mutable.Map
+
+import org.apache.spark.sql.catalyst.plans.QueryPlan
+import org.apache.spark.sql.types.StructType
+
+/**
+ * Map of canonicalized plans that can be used to find reuse possibilities.
+ *
+ * To avoid costly canonicalization of a plan:
+ * - we use its schema first to check if it can be replaced to a reused one at all
+ * - we insert it into the map of canonicalized plans only when at least 2 have the same schema
+ */
+class ReuseMap[T <: QueryPlan[_]] {
+  // scalastyle:off structural.type
+  private val map = Map[StructType, (T, Map[T2 forSome { type T2 >: T }, T])]()
+  // scalastyle:on structural.type
+
+  /**
+   * Find a matching plan with the same canonicalized form in the map or add the new plan to the
+   * map otherwise.
+   *
+   * @param plan the input plan
+   * @return the matching plan or the input plan
+   */
+  def lookup(plan: T): T = {
+    val (firstSameSchemaPlan, sameResultPlans) = map.getOrElseUpdate(plan.schema, plan -> Map())
+    if (firstSameSchemaPlan ne plan) {
+      if (sameResultPlans.isEmpty) {
+        sameResultPlans += firstSameSchemaPlan.canonicalized -> firstSameSchemaPlan
+      }
+      sameResultPlans.getOrElseUpdate(plan.canonicalized, plan)
+    } else {
+      plan
+    }
+  }
+
+  /**
+   * Find a matching plan with the same canonicalized form in the map and apply `f` on it or add
+   * the new plan to the map otherwise.
+   *
+   * @param plan the input plan
+   * @param f the function to apply
+   * @return the matching plan with `f` applied or the input plan
+   */
+  def addOrElse[T2 >: T](plan: T, f: T => T2): T2 = {
+    val found = lookup(plan)
+    if (found eq plan) {
+      plan
+    } else {
+      f(found)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/reuse/ReuseExchangeAndSubquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/reuse/ReuseExchangeAndSubquery.scala
@@ -17,51 +17,18 @@
 
 package org.apache.spark.sql.execution.reuse
 
-import scala.collection.mutable.Map
-import scala.language.existentials
-
-import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{BaseSubqueryExec, ExecSubqueryExpression, ReusedSubqueryExec, SparkPlan}
 import org.apache.spark.sql.execution.exchange.{Exchange, ReusedExchangeExec}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.ReuseMap
 
 /**
  * Find out duplicated exchanges and subqueries in the whole spark plan including subqueries, then
  * use the same exhange or subquery for all the references.
  */
 case class ReuseExchangeAndSubquery(conf: SQLConf) extends Rule[SparkPlan] {
 
-  private class ReuseMap[T <: QueryPlan[_]] {
-    // To avoid costly canonicalization of an exchange or a subquery:
-    // - we use its schema first to check if it can be replaced to a reused one at all
-    // - we insert it into the map of canonicalized plans only when at least 2 have the same schema
-    private val map = Map[StructType, (T, Map[T2 forSome { type T2 >: T }, T])]()
-
-    def lookup(plan: T): T = {
-      val (firstSameSchemaPlan, sameResultPlans) = map.getOrElseUpdate(plan.schema, plan -> Map())
-      if (firstSameSchemaPlan.ne(plan)) {
-        if (sameResultPlans.isEmpty) {
-          sameResultPlans +=
-            firstSameSchemaPlan.canonicalized -> firstSameSchemaPlan
-        }
-        sameResultPlans.getOrElseUpdate(plan.canonicalized, plan)
-      } else {
-        plan
-      }
-    }
-
-    def addOrElse[T2 >: T](plan: T, f: T => T2): T2 = {
-      val found = lookup(plan)
-      if (found eq plan) {
-        plan
-      } else {
-        f(found)
-      }
-    }
-  }
-
   def apply(plan: SparkPlan): SparkPlan = {
     if (conf.exchangeReuseEnabled || conf.subqueryReuseEnabled) {
       val exchanges = new ReuseMap[Exchange]()