Skip to content

Commit 7d87976

Browse files
authored
Avoid the need to rewrite expressions when evaluating logical case nullability (#18849)
## Which issue does this PR close? - None, follow up to PR #17813 ## Rationale for this change In #17813, the when expressions are rewritten using `GuaranteesRewrite` before evaluating their bounds. This PR avoids the rewrite cost by inlining the special case of `GuaranteesRewrite` with a single `Null` guarantee into the `PredicateBounds::evaluate_bounds` logic. ## What changes are included in this PR? Inlines guarantee rewriting into predicate bounds evaluation. ## Are these changes tested? Covered by existing tests ## Are there any user-facing changes? No
1 parent e50b939 commit 7d87976

File tree

2 files changed

+21
-31
lines changed

2 files changed

+21
-31
lines changed

datafusion/expr/src/expr_schema.rs

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ use crate::expr::{
2121
InSubquery, Placeholder, ScalarFunction, TryCast, Unnest, WindowFunction,
2222
WindowFunctionParams,
2323
};
24-
use crate::expr_rewriter::rewrite_with_guarantees;
2524
use crate::type_coercion::functions::{
2625
data_types_with_scalar_udf, fields_with_aggregate_udf, fields_with_window_udf,
2726
};
@@ -34,7 +33,6 @@ use datafusion_common::{
3433
not_impl_err, plan_datafusion_err, plan_err, Column, DataFusionError, ExprSchema,
3534
Result, ScalarValue, Spans, TableReference,
3635
};
37-
use datafusion_expr_common::interval_arithmetic::NullableInterval;
3836
use datafusion_expr_common::type_coercion::binary::BinaryTypeCoercer;
3937
use datafusion_functions_window_common::field::WindowUDFFieldArgs;
4038
use std::sync::Arc;
@@ -307,29 +305,9 @@ impl ExprSchemable for Expr {
307305
// For branches with a nullable 'then' expression, try to determine
308306
// if the 'then' expression is ever reachable in the situation where
309307
// it would evaluate to null.
310-
311-
// First, derive a variant of the 'when' expression, where all occurrences
312-
// of the 'then' expression have been replaced by 'NULL'.
313-
let certainly_null_expr = unwrap_certainly_null_expr(t).clone();
314-
let certainly_null_type =
315-
match certainly_null_expr.get_type(input_schema) {
316-
Err(e) => return Some(Err(e)),
317-
Ok(datatype) => datatype,
318-
};
319-
let null_interval = NullableInterval::Null {
320-
datatype: certainly_null_type,
321-
};
322-
let guarantees = vec![(certainly_null_expr, null_interval)];
323-
let when_with_null =
324-
match rewrite_with_guarantees(*w.clone(), &guarantees) {
325-
Err(e) => return Some(Err(e)),
326-
Ok(e) => e.data,
327-
};
328-
329-
// Next, determine the bounds of the derived 'when' expression to see if it
330-
// can ever evaluate to true.
331308
let bounds = match predicate_bounds::evaluate_bounds(
332-
&when_with_null,
309+
w,
310+
Some(unwrap_certainly_null_expr(t)),
333311
input_schema,
334312
) {
335313
Err(e) => return Some(Err(e)),

datafusion/expr/src/predicate_bounds.rs

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,19 @@ use datafusion_expr_common::operator::Operator;
4848
///
4949
pub(super) fn evaluate_bounds(
5050
predicate: &Expr,
51+
certainly_null_expr: Option<&Expr>,
5152
input_schema: &dyn ExprSchema,
5253
) -> Result<NullableInterval> {
53-
let evaluator = PredicateBoundsEvaluator { input_schema };
54+
let evaluator = PredicateBoundsEvaluator {
55+
input_schema,
56+
certainly_null_expr,
57+
};
5458
evaluator.evaluate_bounds(predicate)
5559
}
5660

5761
struct PredicateBoundsEvaluator<'a> {
5862
input_schema: &'a dyn ExprSchema,
63+
certainly_null_expr: Option<&'a Expr>,
5964
}
6065

6166
impl PredicateBoundsEvaluator<'_> {
@@ -165,6 +170,13 @@ impl PredicateBoundsEvaluator<'_> {
165170
return NullableInterval::FALSE;
166171
}
167172

173+
// Check if the expression is the `certainly_null_expr` that was passed in.
174+
if let Some(certainly_null_expr) = &self.certainly_null_expr {
175+
if expr.eq(certainly_null_expr) {
176+
return NullableInterval::TRUE;
177+
}
178+
}
179+
168180
// `expr` is nullable, so our default answer for `is null` is going to be `{ TRUE, FALSE }`.
169181
// Try to see if we can narrow it down to just one option.
170182
match expr {
@@ -237,7 +249,7 @@ mod tests {
237249

238250
fn eval_bounds(predicate: &Expr) -> Result<NullableInterval> {
239251
let schema = DFSchema::try_from(Schema::empty())?;
240-
evaluate_bounds(predicate, &schema)
252+
evaluate_bounds(predicate, None, &schema)
241253
}
242254

243255
#[test]
@@ -467,7 +479,7 @@ mod tests {
467479

468480
for case in cases {
469481
assert_eq!(
470-
evaluate_bounds(&case.0, case.1).unwrap(),
482+
evaluate_bounds(&case.0, None, case.1).unwrap(),
471483
case.2,
472484
"Failed for {}",
473485
case.0
@@ -543,14 +555,14 @@ mod tests {
543555

544556
for case in cases {
545557
assert_eq!(
546-
evaluate_bounds(&case.0, &not_nullable_schema).unwrap(),
558+
evaluate_bounds(&case.0, None, &not_nullable_schema).unwrap(),
547559
case.1,
548560
"Failed for {}",
549561
case.0
550562
);
551563

552564
assert_eq!(
553-
evaluate_bounds(&case.0, &nullable_schema).unwrap(),
565+
evaluate_bounds(&case.0, None, &nullable_schema).unwrap(),
554566
NullableInterval::ANY_TRUTH_VALUE,
555567
"Failed for {}",
556568
case.0
@@ -623,14 +635,14 @@ mod tests {
623635

624636
for case in cases {
625637
assert_eq!(
626-
evaluate_bounds(&case.0, &not_nullable_schema).unwrap(),
638+
evaluate_bounds(&case.0, None, &not_nullable_schema).unwrap(),
627639
case.1,
628640
"Failed for {}",
629641
case.0
630642
);
631643

632644
assert_eq!(
633-
evaluate_bounds(&case.0, &nullable_schema).unwrap(),
645+
evaluate_bounds(&case.0, None, &nullable_schema).unwrap(),
634646
NullableInterval::ANY_TRUTH_VALUE,
635647
"Failed for {}",
636648
case.0

0 commit comments

Comments
 (0)