Skip to content

Commit 6594601

Browse files
committed
init
1 parent 1d18096 commit 6594601

File tree

2 files changed

+54
-14
lines changed

2 files changed

+54
-14
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -235,26 +235,39 @@ object ReorderAssociativeOperator extends Rule[LogicalPlan] {
235235
* [[InSet (value, HashSet[Literal])]] which is much faster.
236236
*/
237237
object OptimizeIn extends Rule[LogicalPlan] {
238+
def optimizeIn(expr: In, v: Expression, list: Seq[Expression]): Expression = {
239+
val newList = ExpressionSet(list).toSeq
240+
if (newList.length == 1
241+
// TODO: `EqualTo` for structural types are not working. Until SPARK-24443 is addressed,
242+
// TODO: we exclude them in this rule.
243+
&& !v.isInstanceOf[CreateNamedStruct]
244+
&& !newList.head.isInstanceOf[CreateNamedStruct]) {
245+
EqualTo(v, newList.head)
246+
} else if (newList.length > SQLConf.get.optimizerInSetConversionThreshold) {
247+
val hSet = newList.map(e => e.eval(EmptyRow))
248+
InSet(v, HashSet() ++ hSet)
249+
} else if (newList.length < list.length) {
250+
expr.copy(list = newList)
251+
} else { // newList.length == list.length && newList.length > 1
252+
expr
253+
}
254+
}
255+
238256
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
239257
case q: LogicalPlan => q transformExpressionsDown {
240258
case In(v, list) if list.isEmpty =>
241259
// When v is not nullable, the following expression will be optimized
242260
// to FalseLiteral which is tested in OptimizeInSuite.scala
243261
If(IsNotNull(v), FalseLiteral, Literal(null, BooleanType))
244-
case expr @ In(v, list) if expr.inSetConvertible =>
245-
val newList = ExpressionSet(list).toSeq
246-
if (newList.length == 1
247-
// TODO: `EqualTo` for structural types are not working. Until SPARK-24443 is addressed,
248-
// TODO: we exclude them in this rule.
249-
&& !v.isInstanceOf[CreateNamedStruct]
250-
&& !newList.head.isInstanceOf[CreateNamedStruct]) {
251-
EqualTo(v, newList.head)
252-
} else if (newList.length > SQLConf.get.optimizerInSetConversionThreshold) {
253-
val hSet = newList.map(e => e.eval(EmptyRow))
254-
InSet(v, HashSet() ++ hSet)
255-
} else if (newList.length < list.length) {
256-
expr.copy(list = newList)
257-
} else { // newList.length == list.length && newList.length > 1
262+
case expr @ In(v, list) =>
263+
// split list to 2 parts so that we can push down convertible part
264+
val (convertible, nonConvertible) = list.partition(_.isInstanceOf[Literal])
265+
if (convertible.nonEmpty && nonConvertible.isEmpty) {
266+
optimizeIn(expr, v, list)
267+
} else if (convertible.nonEmpty && nonConvertible.nonEmpty) {
268+
val optimizedIn = optimizeIn(In(v, convertible), v, convertible)
269+
And(optimizedIn, In(v, nonConvertible))
270+
} else {
258271
expr
259272
}
260273
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,4 +238,31 @@ class OptimizeInSuite extends PlanTest {
238238

239239
comparePlans(optimized, correctAnswer)
240240
}
241+
242+
test("SPARK-32196: Extract convertible part if In is not convertible") {
243+
val originalQuery1 =
244+
testRelation
245+
.where(In(UnresolvedAttribute("a"), Seq(Literal(1), UnresolvedAttribute("b"))))
246+
.analyze
247+
val optimized1 = Optimize.execute(originalQuery1)
248+
val correctAnswer1 =
249+
testRelation
250+
.where(
251+
And(EqualTo(UnresolvedAttribute("a"), Literal(1)),
252+
In(UnresolvedAttribute("a"), Seq(UnresolvedAttribute("b"))))
253+
)
254+
.analyze
255+
comparePlans(optimized1, correctAnswer1)
256+
257+
val originalQuery2 =
258+
testRelation
259+
.where(In(UnresolvedAttribute("a"), Seq(UnresolvedAttribute("b"))))
260+
.analyze
261+
val optimized2 = Optimize.execute(originalQuery2)
262+
val correctAnswer2 =
263+
testRelation
264+
.where(In(UnresolvedAttribute("a"), Seq(UnresolvedAttribute("b"))))
265+
.analyze
266+
comparePlans(optimized2, correctAnswer2)
267+
}
241268
}

0 commit comments

Comments
 (0)