Skip to content

Commit 4b19f49

Browse files
wangyumdongjoon-hyun
authored andcommitted
[SPARK-33845][SQL] Remove unnecessary if when trueValue and falseValue are foldable boolean types
### What changes were proposed in this pull request? Improve `SimplifyConditionals`. Simplify `If(cond, TrueLiteral, FalseLiteral)` to `cond`. Simplify `If(cond, FalseLiteral, TrueLiteral)` to `Not(cond)`. The use case is: ```sql create table t1 using parquet as select id from range(10); select if (id > 2, false, true) from t1; ``` Before this pr: ``` == Physical Plan == *(1) Project [if ((id#1L > 2)) false else true AS (IF((id > CAST(2 AS BIGINT)), false, true))#2] +- *(1) ColumnarToRow +- FileScan parquet default.t1[id#1L] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/opensource/spark/spark-warehouse/org.apache.spark.sql.DataF..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:bigint> ``` After this pr: ``` == Physical Plan == *(1) Project [(id#1L <= 2) AS (IF((id > CAST(2 AS BIGINT)), false, true))#2] +- *(1) ColumnarToRow +- FileScan parquet default.t1[id#1L] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/opensource/spark/spark-warehouse/org.apache.spark.sql.DataF..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:bigint> ``` ### Why are the changes needed? Improve query performance. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes apache#30849 from wangyum/SPARK-33798-2. Authored-by: Yuming Wang <yumwang@ebay.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
1 parent b4bea1a commit 4b19f49

File tree

4 files changed

+39
-17
lines changed

4 files changed

+39
-17
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,8 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper {
475475
case If(TrueLiteral, trueValue, _) => trueValue
476476
case If(FalseLiteral, _, falseValue) => falseValue
477477
case If(Literal(null, _), _, falseValue) => falseValue
478+
case If(cond, TrueLiteral, FalseLiteral) => cond
479+
case If(cond, FalseLiteral, TrueLiteral) => Not(cond)
478480
case If(cond, trueValue, falseValue)
479481
if cond.deterministic && trueValue.semanticEquals(falseValue) => trueValue
480482
case If(cond, l @ Literal(null, _), FalseLiteral) if !cond.nullable => And(cond, l)

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ class PushFoldableIntoBranchesSuite
5353

5454
test("Push down EqualTo through If") {
5555
assertEquivalent(EqualTo(ifExp, Literal(4)), FalseLiteral)
56-
assertEquivalent(EqualTo(ifExp, Literal(3)), If(a, FalseLiteral, TrueLiteral))
56+
assertEquivalent(EqualTo(ifExp, Literal(3)), Not(a))
5757

5858
// Push down at most one not foldable expressions.
5959
assertEquivalent(
@@ -67,7 +67,7 @@ class PushFoldableIntoBranchesSuite
6767
val nonDeterministic = If(LessThan(Rand(1), Literal(0.5)), Literal(1), Literal(2))
6868
assert(!nonDeterministic.deterministic)
6969
assertEquivalent(EqualTo(nonDeterministic, Literal(2)),
70-
If(LessThan(Rand(1), Literal(0.5)), FalseLiteral, TrueLiteral))
70+
GreaterThanOrEqual(Rand(1), Literal(0.5)))
7171
assertEquivalent(EqualTo(nonDeterministic, Literal(3)),
7272
If(LessThan(Rand(1), Literal(0.5)), FalseLiteral, FalseLiteral))
7373

@@ -102,8 +102,7 @@ class PushFoldableIntoBranchesSuite
102102
assertEquivalent(Remainder(ifExp, Literal(4)), If(a, Literal(2), Literal(3)))
103103
assertEquivalent(Divide(If(a, Literal(2.0), Literal(3.0)), Literal(1.0)),
104104
If(a, Literal(2.0), Literal(3.0)))
105-
assertEquivalent(And(If(a, FalseLiteral, TrueLiteral), TrueLiteral),
106-
If(a, FalseLiteral, TrueLiteral))
105+
assertEquivalent(And(If(a, FalseLiteral, TrueLiteral), TrueLiteral), Not(a))
107106
assertEquivalent(Or(If(a, FalseLiteral, TrueLiteral), TrueLiteral), TrueLiteral)
108107
}
109108

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import org.apache.spark.sql.AnalysisException
2121
import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
2222
import org.apache.spark.sql.catalyst.dsl.expressions._
2323
import org.apache.spark.sql.catalyst.dsl.plans._
24-
import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, ArrayTransform, CaseWhen, Expression, GreaterThan, If, LambdaFunction, Literal, MapFilter, NamedExpression, Or, UnresolvedNamedLambdaVariable}
24+
import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, ArrayTransform, CaseWhen, Expression, GreaterThan, If, LambdaFunction, LessThanOrEqual, Literal, MapFilter, NamedExpression, Or, UnresolvedNamedLambdaVariable}
2525
import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
2626
import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
2727
import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LocalRelation, LogicalPlan, UpdateTable}
@@ -236,12 +236,13 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
236236
Literal(2) === nestedCaseWhen,
237237
TrueLiteral,
238238
FalseLiteral)
239-
val branches = Seq((UnresolvedAttribute("i") > Literal(10)) -> branchValue)
240-
val condition = CaseWhen(branches)
241-
testFilter(originalCond = condition, expectedCond = condition)
242-
testJoin(originalCond = condition, expectedCond = condition)
243-
testDelete(originalCond = condition, expectedCond = condition)
244-
testUpdate(originalCond = condition, expectedCond = condition)
239+
val condition = CaseWhen(Seq((UnresolvedAttribute("i") > Literal(10)) -> branchValue))
240+
val expectedCond =
241+
CaseWhen(Seq((UnresolvedAttribute("i") > Literal(10)) -> (Literal(2) === nestedCaseWhen)))
242+
testFilter(originalCond = condition, expectedCond = expectedCond)
243+
testJoin(originalCond = condition, expectedCond = expectedCond)
244+
testDelete(originalCond = condition, expectedCond = expectedCond)
245+
testUpdate(originalCond = condition, expectedCond = expectedCond)
245246
}
246247

247248
test("inability to replace null in non-boolean branches of If inside another If") {
@@ -252,10 +253,14 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
252253
Literal(3)),
253254
TrueLiteral,
254255
FalseLiteral)
255-
testFilter(originalCond = condition, expectedCond = condition)
256-
testJoin(originalCond = condition, expectedCond = condition)
257-
testDelete(originalCond = condition, expectedCond = condition)
258-
testUpdate(originalCond = condition, expectedCond = condition)
256+
val expectedCond = Literal(5) > If(
257+
UnresolvedAttribute("i") === Literal(15),
258+
Literal(null, IntegerType),
259+
Literal(3))
260+
testFilter(originalCond = condition, expectedCond = expectedCond)
261+
testJoin(originalCond = condition, expectedCond = expectedCond)
262+
testDelete(originalCond = condition, expectedCond = expectedCond)
263+
testUpdate(originalCond = condition, expectedCond = expectedCond)
259264
}
260265

261266
test("replace null in If used as a join condition") {
@@ -405,9 +410,9 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
405410
val lambda1 = LambdaFunction(
406411
function = If(cond, Literal(null, BooleanType), TrueLiteral),
407412
arguments = lambdaArgs)
408-
// the optimized lambda body is: if(arg > 0, false, true)
413+
// the optimized lambda body is: if(arg > 0, false, true) => arg <= 0
409414
val lambda2 = LambdaFunction(
410-
function = If(cond, FalseLiteral, TrueLiteral),
415+
function = LessThanOrEqual(condArg, Literal(0)),
411416
arguments = lambdaArgs)
412417
testProjection(
413418
originalExpr = createExpr(argument, lambda1) as 'x,

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,4 +199,20 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P
199199
If(Factorial(5) > 100L, b, nullLiteral).eval(EmptyRow))
200200
}
201201
}
202+
203+
test("SPARK-33845: remove unnecessary if when the outputs are boolean type") {
204+
assertEquivalent(
205+
If(IsNotNull(UnresolvedAttribute("a")), TrueLiteral, FalseLiteral),
206+
IsNotNull(UnresolvedAttribute("a")))
207+
assertEquivalent(
208+
If(IsNotNull(UnresolvedAttribute("a")), FalseLiteral, TrueLiteral),
209+
IsNull(UnresolvedAttribute("a")))
210+
211+
assertEquivalent(
212+
If(GreaterThan(Rand(0), UnresolvedAttribute("a")), TrueLiteral, FalseLiteral),
213+
GreaterThan(Rand(0), UnresolvedAttribute("a")))
214+
assertEquivalent(
215+
If(GreaterThan(Rand(0), UnresolvedAttribute("a")), FalseLiteral, TrueLiteral),
216+
LessThanOrEqual(Rand(0), UnresolvedAttribute("a")))
217+
}
202218
}

0 commit comments

Comments
 (0)