[SPARK-26576][SQL] Broadcast hint not applied to partitioned table

jzhuge · rdblue · commit 4ffba0b4f026 · 2019-01-11T14:28:55.000-08:00
Make sure broadcast hint is applied to partitioned tables. Since the issue exists in branch 2.0 to 2.4, but not in master, I created this PR for branch-2.4. - A new unit test in PruneFileSourcePartitionsSuite - Unit test suites touched by SPARK-14581: JoinOptimizationSuite, FilterPushdownSuite, ColumnPruningSuite, and PruneFiltersSuite cloud-fan davies rxin Closes apache#23507 from jzhuge/SPARK-26576. Authored-by: John Zhuge <jzhuge@apache.org> Signed-off-by: gatorsmile <gatorsmile@gmail.com> (cherry picked from commit b9eb0e8) Conflicts: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
@@ -69,9 +69,6 @@ object PhysicalOperation extends PredicateHelper {
         val substitutedCondition = substitute(aliases)(condition)
         (fields, filters ++ splitConjunctivePredicates(substitutedCondition), other, aliases)
 
-      case BroadcastHint(child) =>
-        collectProjectsAndFilters(child)
-
       case other =>
         (None, Nil, other, Map.empty)
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala
@@ -17,14 +17,19 @@
 
 package org.apache.spark.sql.hive.execution
 
+import org.scalatest.Matchers._
+
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project, ResolvedHint}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions}
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
+import org.apache.spark.sql.functions.broadcast
 import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types.StructType
 
@@ -66,4 +71,16 @@ class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with Te
       }
     }
   }
+
+  test("SPARK-26576 Broadcast hint not applied to partitioned table") {
+    withTable("tbl") {
+      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+        spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl")
+        val df = spark.table("tbl")
+        val qe = df.join(broadcast(df), "p").queryExecution
+        qe.optimizedPlan.collect { case _: ResolvedHint => } should have size 1
+        qe.sparkPlan.collect { case j: BroadcastHashJoinExec => j } should have size 1
+      }
+    }
+  }
 }

Original file line number	Diff line number	Diff line change
`@@ -69,9 +69,6 @@ object PhysicalOperation extends PredicateHelper {`
`69`	`69`	`val substitutedCondition = substitute(aliases)(condition)`
`70`	`70`	`(fields, filters ++ splitConjunctivePredicates(substitutedCondition), other, aliases)`
`71`	`71`
`72`		`- case BroadcastHint(child) =>`
`73`		`- collectProjectsAndFilters(child)`
`74`		`-`
`75`	`72`	`case other =>`
`76`	`73`	`(None, Nil, other, Map.empty)`
`77`	`74`	`}`