-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Closed as not planned
Closed as not planned
Copy link
Labels
bugSomething isn't workingSomething isn't working
Description
Describe the bug
When using an IN clause containing NULL (e.g., col IN (1, NULL)), ExprSimplifier appears to return incorrect results.
To Reproduce
Reproduce test:
mod tests {
use std::sync::Arc;
use arrow_schema::{DataType, Field, SchemaRef};
use datafusion_common::{Column, DFSchema, ScalarValue};
use datafusion_expr::{BinaryExpr, Cast, Expr, Operator};
use datafusion_expr::execution_props::ExecutionProps;
use datafusion_expr::simplify::SimplifyContext;
use datafusion_optimizer::simplify_expressions::ExprSimplifier;
pub fn optimize_expr(schema: SchemaRef, expr: Expr) -> datafusion_common::Result<Expr> {
let df_schema = Arc::new(DFSchema::try_from(schema.as_ref().clone())?);
// DataFusion needs the simplify and coerce passes to be applied before
// expressions can be handled by the physical planner.
let props = ExecutionProps::default();
let simplify_context = SimplifyContext::new(&props).with_schema(df_schema.clone());
let simplifier = ExprSimplifier::new(simplify_context);
let expr = simplifier.simplify(expr)?;
let expr = simplifier.coerce(expr, &df_schema)?;
Ok(expr)
}
#[test]
fn test() {
let schema = Arc::new(arrow_schema::Schema::new(vec![
Field::new("x", DataType::Int32, true),
]));
// x IN (1, NULL)
let expr = Expr::BinaryExpr(BinaryExpr::new(
Box::new(Expr::BinaryExpr(BinaryExpr::new(
Box::new(Expr::Column(Column::new(None::<String>, "x"))),
Operator::Eq,
Box::new(Expr::Literal(ScalarValue::Int32(Some(1)), None)),
))),
Operator::Or,
Box::new(Expr::BinaryExpr(BinaryExpr::new(
Box::new(Expr::Column(Column::new(None::<String>, "x"))),
Operator::Eq,
Box::new(Expr::Cast(Cast::new(Box::new(Expr::Literal(ScalarValue::Null, None)), DataType::Int32))),
))),
));
println!("expr: {:#?}", expr);
let logical_expr = optimize_expr(schema.clone(), expr).unwrap();
println!("logical_expr: {:#?}", logical_expr);
}
}the output:
expr: BinaryExpr(
BinaryExpr {
left: BinaryExpr(
BinaryExpr {
left: Column(
Column {
relation: None,
name: "x",
},
),
op: Eq,
right: Literal(
Int32(1),
None,
),
},
),
op: Or,
right: BinaryExpr(
BinaryExpr {
left: Column(
Column {
relation: None,
name: "x",
},
),
op: Eq,
right: Cast(
Cast {
expr: Literal(
NULL,
None,
),
data_type: Int32,
},
),
},
),
},
)
logical_expr: BinaryExpr(
BinaryExpr {
left: BinaryExpr(
BinaryExpr {
left: Column(
Column {
relation: None,
name: "x",
},
),
op: Eq,
right: Literal(
Int32(1),
None,
),
},
),
op: Or,
right: Literal(
Boolean(NULL),
None,
),
},
)
the output of datafusion 49.0.2 is:
logical_expr: BinaryExpr(
BinaryExpr {
left: BinaryExpr(
BinaryExpr {
left: Column(
Column {
relation: None,
name: "x",
},
),
op: Eq,
right: Literal(
Int32(1),
None,
),
},
),
op: Or,
right: BinaryExpr(
BinaryExpr {
left: Column(
Column {
relation: None,
name: "x",
},
),
op: Eq,
right: Literal(
Int32(NULL),
None,
),
},
),
},
)
Expected behavior
No response
Additional context
No response
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working