Skip to content

Commit 4ffba0b

Browse files
jzhugerdblue
authored andcommitted
[SPARK-26576][SQL] Broadcast hint not applied to partitioned table
Make sure broadcast hint is applied to partitioned tables. Since the issue exists in branch 2.0 to 2.4, but not in master, I created this PR for branch-2.4. - A new unit test in PruneFileSourcePartitionsSuite - Unit test suites touched by SPARK-14581: JoinOptimizationSuite, FilterPushdownSuite, ColumnPruningSuite, and PruneFiltersSuite cloud-fan davies rxin Closes apache#23507 from jzhuge/SPARK-26576. Authored-by: John Zhuge <jzhuge@apache.org> Signed-off-by: gatorsmile <gatorsmile@gmail.com> (cherry picked from commit b9eb0e8) Conflicts: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala
1 parent 01d899d commit 4ffba0b

File tree

2 files changed

+18
-4
lines changed

2 files changed

+18
-4
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,6 @@ object PhysicalOperation extends PredicateHelper {
6969
val substitutedCondition = substitute(aliases)(condition)
7070
(fields, filters ++ splitConjunctivePredicates(substitutedCondition), other, aliases)
7171

72-
case BroadcastHint(child) =>
73-
collectProjectsAndFilters(child)
74-
7572
case other =>
7673
(None, Nil, other, Map.empty)
7774
}

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,19 @@
1717

1818
package org.apache.spark.sql.hive.execution
1919

20+
import org.scalatest.Matchers._
21+
2022
import org.apache.spark.sql.QueryTest
2123
import org.apache.spark.sql.catalyst.dsl.expressions._
2224
import org.apache.spark.sql.catalyst.dsl.plans._
23-
import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project}
25+
import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project, ResolvedHint}
2426
import org.apache.spark.sql.catalyst.rules.RuleExecutor
2527
import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions}
2628
import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
29+
import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
30+
import org.apache.spark.sql.functions.broadcast
2731
import org.apache.spark.sql.hive.test.TestHiveSingleton
32+
import org.apache.spark.sql.internal.SQLConf
2833
import org.apache.spark.sql.test.SQLTestUtils
2934
import org.apache.spark.sql.types.StructType
3035

@@ -66,4 +71,16 @@ class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with Te
6671
}
6772
}
6873
}
74+
75+
test("SPARK-26576 Broadcast hint not applied to partitioned table") {
76+
withTable("tbl") {
77+
withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
78+
spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl")
79+
val df = spark.table("tbl")
80+
val qe = df.join(broadcast(df), "p").queryExecution
81+
qe.optimizedPlan.collect { case _: ResolvedHint => } should have size 1
82+
qe.sparkPlan.collect { case j: BroadcastHashJoinExec => j } should have size 1
83+
}
84+
}
85+
}
6986
}

0 commit comments

Comments
 (0)