Skip to content

Commit a3d2954

Browse files
ulysses-youHyukjinKwon
authored andcommitted
[SPARK-33421][SQL] Support Greatest and Least in Expression Canonicalize
### What changes were proposed in this pull request? Add `Greatest` and `Least` check in `Canonicalize`. ### Why are the changes needed? The children of both `Greatest` and `Least` are order Irrelevant. Let's say we have `greatest(1, 2)` and `greatest(2, 1)`. We can get the same canonicalized expression in this case. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Add test. Closes #30330 from ulysses-you/SPARK-33421. Authored-by: ulysses <youxiduo@weidian.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
1 parent a288716 commit a3d2954

File tree

2 files changed

+35
-0
lines changed

2 files changed

+35
-0
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,13 @@ object Canonicalize {
109109
// order the list in the In operator
110110
case In(value, list) if list.length > 1 => In(value, list.sortBy(_.hashCode()))
111111

112+
case g: Greatest =>
113+
val newChildren = orderCommutative(g, { case Greatest(children) => children })
114+
Greatest(newChildren)
115+
case l: Least =>
116+
val newChildren = orderCommutative(l, { case Least(children) => children })
117+
Least(newChildren)
118+
112119
case _ => e
113120
}
114121
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,4 +142,32 @@ class CanonicalizeSuite extends SparkFunSuite {
142142
}
143143
}
144144
}
145+
146+
test("SPARK-33421: Support Greatest and Least in Expression Canonicalize") {
147+
Seq(Least(_), Greatest(_)).foreach { f =>
148+
// test deterministic expr
149+
val expr1 = f(Seq(Literal(1), Literal(2), Literal(3)))
150+
val expr2 = f(Seq(Literal(3), Literal(1), Literal(2)))
151+
val expr3 = f(Seq(Literal(1), Literal(1), Literal(1)))
152+
assert(expr1.canonicalized == expr2.canonicalized)
153+
assert(expr1.canonicalized != expr3.canonicalized)
154+
assert(expr2.canonicalized != expr3.canonicalized)
155+
156+
// test non-deterministic expr
157+
val randExpr1 = f(Seq(Literal(1), rand(1)))
158+
val randExpr2 = f(Seq(rand(1), Literal(1)))
159+
val randExpr3 = f(Seq(Literal(1), rand(2)))
160+
assert(randExpr1.canonicalized == randExpr2.canonicalized)
161+
assert(randExpr1.canonicalized != randExpr3.canonicalized)
162+
assert(randExpr2.canonicalized != randExpr3.canonicalized)
163+
164+
// test nested expr
165+
val nestedExpr1 = f(Seq(Literal(1), f(Seq(Literal(2), Literal(3)))))
166+
val nestedExpr2 = f(Seq(f(Seq(Literal(2), Literal(3))), Literal(1)))
167+
val nestedExpr3 = f(Seq(f(Seq(Literal(1), Literal(1))), Literal(1)))
168+
assert(nestedExpr1.canonicalized == nestedExpr2.canonicalized)
169+
assert(nestedExpr1.canonicalized != nestedExpr3.canonicalized)
170+
assert(nestedExpr2.canonicalized != nestedExpr3.canonicalized)
171+
}
172+
}
145173
}

0 commit comments

Comments
 (0)