Skip to content

Commit 9463cf6

Browse files
authored
Fix incorrect NULL IN () optimization (#17092)
* Unit test showing incorrect `NULL IN ()` simplification The optimization does not manifest as a bug in typical queries, because the `ConstantEvaluator` prevents that. However, it still needs to be corrected (or removed), otherwise it's a bug waiting to manifest somewhere. * Fix incorrect `NULL IN ()` optimization
1 parent 3d6541d commit 9463cf6

File tree

2 files changed

+77
-6
lines changed

2 files changed

+77
-6
lines changed

datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs

Lines changed: 56 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1662,20 +1662,20 @@ impl<S: SimplifyInfo> TreeNodeRewriter for Simplifier<'_, S> {
16621662
// expr IN () --> false
16631663
// expr NOT IN () --> true
16641664
Expr::InList(InList {
1665-
expr,
1665+
expr: _,
16661666
list,
16671667
negated,
1668-
}) if list.is_empty() && *expr != Expr::Literal(ScalarValue::Null, None) => {
1669-
Transformed::yes(lit(negated))
1670-
}
1668+
}) if list.is_empty() => Transformed::yes(lit(negated)),
16711669

16721670
// null in (x, y, z) --> null
16731671
// null not in (x, y, z) --> null
16741672
Expr::InList(InList {
16751673
expr,
1676-
list: _,
1674+
list,
16771675
negated: _,
1678-
}) if is_null(expr.as_ref()) => Transformed::yes(lit_bool_null()),
1676+
}) if is_null(expr.as_ref()) && !list.is_empty() => {
1677+
Transformed::yes(lit_bool_null())
1678+
}
16791679

16801680
// expr IN ((subquery)) -> expr IN (subquery), see ##5529
16811681
Expr::InList(InList {
@@ -3948,6 +3948,56 @@ mod tests {
39483948
assert_eq!(simplify(expr.clone()), expr);
39493949
}
39503950

3951+
#[test]
3952+
fn simplify_null_in_empty_inlist() {
3953+
// `NULL::boolean IN ()` == `NULL::boolean IN (SELECT foo FROM empty)` == false
3954+
let expr = in_list(lit_bool_null(), vec![], false);
3955+
assert_eq!(simplify(expr), lit(false));
3956+
3957+
// `NULL::boolean NOT IN ()` == `NULL::boolean NOT IN (SELECT foo FROM empty)` == true
3958+
let expr = in_list(lit_bool_null(), vec![], true);
3959+
assert_eq!(simplify(expr), lit(true));
3960+
3961+
// `NULL IN ()` == `NULL IN (SELECT foo FROM empty)` == false
3962+
let null_null = || Expr::Literal(ScalarValue::Null, None);
3963+
let expr = in_list(null_null(), vec![], false);
3964+
assert_eq!(simplify(expr), lit(false));
3965+
3966+
// `NULL NOT IN ()` == `NULL NOT IN (SELECT foo FROM empty)` == true
3967+
let expr = in_list(null_null(), vec![], true);
3968+
assert_eq!(simplify(expr), lit(true));
3969+
}
3970+
3971+
#[test]
3972+
fn just_simplifier_simplify_null_in_empty_inlist() {
3973+
let simplify = |expr: Expr| -> Expr {
3974+
let schema = expr_test_schema();
3975+
let execution_props = ExecutionProps::new();
3976+
let info = SimplifyContext::new(&execution_props).with_schema(schema);
3977+
let simplifier = &mut Simplifier::new(&info);
3978+
expr.rewrite(simplifier)
3979+
.expect("Failed to simplify expression")
3980+
.data
3981+
};
3982+
3983+
// `NULL::boolean IN ()` == `NULL::boolean IN (SELECT foo FROM empty)` == false
3984+
let expr = in_list(lit_bool_null(), vec![], false);
3985+
assert_eq!(simplify(expr), lit(false));
3986+
3987+
// `NULL::boolean NOT IN ()` == `NULL::boolean NOT IN (SELECT foo FROM empty)` == true
3988+
let expr = in_list(lit_bool_null(), vec![], true);
3989+
assert_eq!(simplify(expr), lit(true));
3990+
3991+
// `NULL IN ()` == `NULL IN (SELECT foo FROM empty)` == false
3992+
let null_null = || Expr::Literal(ScalarValue::Null, None);
3993+
let expr = in_list(null_null(), vec![], false);
3994+
assert_eq!(simplify(expr), lit(false));
3995+
3996+
// `NULL NOT IN ()` == `NULL NOT IN (SELECT foo FROM empty)` == true
3997+
let expr = in_list(null_null(), vec![], true);
3998+
assert_eq!(simplify(expr), lit(true));
3999+
}
4000+
39514001
#[test]
39524002
fn simplify_large_or() {
39534003
let expr = (0..5)

datafusion/sqllogictest/test_files/predicates.slt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -811,5 +811,26 @@ explain select x from t where x NOT IN (1,2,3,4,5) AND x IN (1,2,3);
811811
logical_plan EmptyRelation
812812
physical_plan EmptyExec
813813

814+
query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression InSubquery\(InSubquery \{ expr: Literal\(Int64\(NULL\), None\), subquery: <subquery>, negated: false \}\)
815+
WITH empty AS (SELECT 10 WHERE false)
816+
SELECT
817+
NULL IN (SELECT * FROM empty), -- should be false, as the right side is empty relation
818+
NULL NOT IN (SELECT * FROM empty) -- should be true, as the right side is empty relation
819+
FROM (SELECT 1) t;
820+
821+
query I
822+
WITH empty AS (SELECT 10 WHERE false)
823+
SELECT * FROM (SELECT 1) t
824+
WHERE NOT (NULL IN (SELECT * FROM empty)); -- all rows should be returned
825+
----
826+
1
827+
828+
query I
829+
WITH empty AS (SELECT 10 WHERE false)
830+
SELECT * FROM (SELECT 1) t
831+
WHERE NULL NOT IN (SELECT * FROM empty); -- all rows should be returned
832+
----
833+
1
834+
814835
statement ok
815836
drop table t;

0 commit comments

Comments
 (0)