Skip to content

Commit 2eaa7cb

Browse files
wangyumGitHub Enterprise
authored andcommitted
[HADP-55621] Avoid apply bloom filter pruning on the build bloom filter side (apache#653)
1 parent f7f8ec1 commit 2eaa7cb

File tree

2 files changed

+31
-1
lines changed

2 files changed

+31
-1
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RuntimeFilterPruning.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ object RuntimeFilterPruning
360360
private def hasDynamicPruningSubquery(plan: LogicalPlan): Boolean = {
361361
plan match {
362362
case Filter(_: DynamicPruningSubquery, _) => true
363-
case _ => false
363+
case _ => plan.children.exists(hasDynamicPruningSubquery)
364364
}
365365
}
366366

sql/core/src/test/scala/org/apache/spark/sql/DynamicBloomFilterJoinPruningSuite.scala

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import java.sql.{Date, Timestamp}
2222
import org.scalatest.GivenWhenThen
2323

2424
import org.apache.spark.sql.catalyst.expressions.{BloomFilterMightContain, CodegenObjectFactoryMode, DynamicPruningExpression, Expression, Literal}
25+
import org.apache.spark.sql.catalyst.plans.logical.Filter
2526
import org.apache.spark.sql.execution._
2627
import org.apache.spark.sql.execution.adaptive._
2728
import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
@@ -356,4 +357,33 @@ class DynamicBloomFilterPruningSuiteAEOn extends DynamicBloomFilterPruningSuiteB
356357
}
357358
}
358359
}
360+
361+
test("HADP-55621: Avoid apply bloom filter pruning on the build bloom filter side") {
362+
withTable("t1", "t2", "t3") {
363+
sql(
364+
"""
365+
|CREATE TABLE t1 using parquet AS
366+
|SELECT id, split(concat(id, ','), ',') AS s FROM range(1000)
367+
|""".stripMargin)
368+
sql(
369+
"""
370+
|CREATE TABLE t2 using parquet AS
371+
|SELECT id FROM range(10000000)
372+
|""".stripMargin)
373+
withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "12mb") {
374+
val df = sql(
375+
"""
376+
|SELECT * FROM (
377+
| SELECT id, tmp.listings display_id FROM t1 LATERAL VIEW explode(s) tmp AS listings
378+
|)
379+
|WHERE id IN (SELECT id FROM t2) AND display_id IN (SELECT id FROM t2)
380+
|""".stripMargin)
381+
val containsBloomFilter = df.queryExecution.optimizedPlan.exists {
382+
case f: Filter => f.condition.exists(_.isInstanceOf[BloomFilterMightContain])
383+
case _ => false
384+
}
385+
assert(containsBloomFilter)
386+
}
387+
}
388+
}
359389
}

0 commit comments

Comments
 (0)