Skip to content

Commit 121dc96

Browse files
mgaido91gatorsmile
authored andcommitted
[SPARK-23087][SQL] CheckCartesianProduct too restrictive when condition is false/null
## What changes were proposed in this pull request? CheckCartesianProduct raises an AnalysisException also when the join condition is always false/null. In this case, we shouldn't raise it, since the result will not be a cartesian product. ## How was this patch tested? added UT Author: Marco Gaido <marcogaido91@gmail.com> Closes #20333 from mgaido91/SPARK-23087.
1 parent 00d1691 commit 121dc96

File tree

2 files changed

+21
-3
lines changed

2 files changed

+21
-3
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala

+7-3
Original file line numberDiff line numberDiff line change
@@ -1108,15 +1108,19 @@ object CheckCartesianProducts extends Rule[LogicalPlan] with PredicateHelper {
11081108
*/
11091109
def isCartesianProduct(join: Join): Boolean = {
11101110
val conditions = join.condition.map(splitConjunctivePredicates).getOrElse(Nil)
1111-
!conditions.map(_.references).exists(refs => refs.exists(join.left.outputSet.contains)
1112-
&& refs.exists(join.right.outputSet.contains))
1111+
1112+
conditions match {
1113+
case Seq(Literal.FalseLiteral) | Seq(Literal(null, BooleanType)) => false
1114+
case _ => !conditions.map(_.references).exists(refs =>
1115+
refs.exists(join.left.outputSet.contains) && refs.exists(join.right.outputSet.contains))
1116+
}
11131117
}
11141118

11151119
def apply(plan: LogicalPlan): LogicalPlan =
11161120
if (SQLConf.get.crossJoinEnabled) {
11171121
plan
11181122
} else plan transform {
1119-
case j @ Join(left, right, Inner | LeftOuter | RightOuter | FullOuter, condition)
1123+
case j @ Join(left, right, Inner | LeftOuter | RightOuter | FullOuter, _)
11201124
if isCartesianProduct(j) =>
11211125
throw new AnalysisException(
11221126
s"""Detected cartesian product for ${j.joinType.sql} join between logical plans

sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala

+14
Original file line numberDiff line numberDiff line change
@@ -274,4 +274,18 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
274274
checkAnswer(innerJoin, Row(1) :: Nil)
275275
}
276276

277+
test("SPARK-23087: don't throw Analysis Exception in CheckCartesianProduct when join condition " +
278+
"is false or null") {
279+
val df = spark.range(10)
280+
val dfNull = spark.range(10).select(lit(null).as("b"))
281+
val planNull = df.join(dfNull, $"id" === $"b", "left").queryExecution.analyzed
282+
283+
spark.sessionState.executePlan(planNull).optimizedPlan
284+
285+
val dfOne = df.select(lit(1).as("a"))
286+
val dfTwo = spark.range(10).select(lit(2).as("b"))
287+
val planFalse = dfOne.join(dfTwo, $"a" === $"b", "left").queryExecution.analyzed
288+
289+
spark.sessionState.executePlan(planFalse).optimizedPlan
290+
}
277291
}

0 commit comments

Comments
 (0)