Skip to content

Commit e4a056f

Browse files
authored
Add Expression Simplification API (#1717)
* Add Expression Simplification API * fmt
1 parent 46879f1 commit e4a056f

File tree

4 files changed

+273
-81
lines changed

4 files changed

+273
-81
lines changed

datafusion/src/logical_plan/expr.rs

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@
2020
2121
pub use super::Operator;
2222
use crate::error::{DataFusionError, Result};
23+
use crate::execution::context::ExecutionProps;
2324
use crate::field_util::get_indexed_field;
2425
use crate::logical_plan::{
2526
plan::Aggregate, window_frames, DFField, DFSchema, LogicalPlan,
2627
};
28+
use crate::optimizer::simplify_expressions::{ConstEvaluator, Simplifier};
2729
use crate::physical_plan::functions::Volatility;
2830
use crate::physical_plan::{
2931
aggregates, expressions::binary_operator_data_type, functions, udf::ScalarUDF,
@@ -971,6 +973,58 @@ impl Expr {
971973
Ok(expr)
972974
}
973975
}
976+
977+
/// Simplifies this [`Expr`]`s as much as possible, evaluating
978+
/// constants and applying algebraic simplifications
979+
///
980+
/// # Example:
981+
/// `b > 2 AND b > 2`
982+
/// can be written to
983+
/// `b > 2`
984+
///
985+
/// ```
986+
/// use datafusion::logical_plan::*;
987+
/// use datafusion::error::Result;
988+
/// use datafusion::execution::context::ExecutionProps;
989+
///
990+
/// /// Simple implementation that provides `Simplifier` the information it needs
991+
/// #[derive(Default)]
992+
/// struct Info {
993+
/// execution_props: ExecutionProps,
994+
/// };
995+
///
996+
/// impl SimplifyInfo for Info {
997+
/// fn is_boolean_type(&self, expr: &Expr) -> Result<bool> {
998+
/// Ok(false)
999+
/// }
1000+
/// fn nullable(&self, expr: &Expr) -> Result<bool> {
1001+
/// Ok(true)
1002+
/// }
1003+
/// fn execution_props(&self) -> &ExecutionProps {
1004+
/// &self.execution_props
1005+
/// }
1006+
/// }
1007+
///
1008+
/// // b < 2
1009+
/// let b_lt_2 = col("b").gt(lit(2));
1010+
///
1011+
/// // (b < 2) OR (b < 2)
1012+
/// let expr = b_lt_2.clone().or(b_lt_2.clone());
1013+
///
1014+
/// // (b < 2) OR (b < 2) --> (b < 2)
1015+
/// let expr = expr.simplify(&Info::default()).unwrap();
1016+
/// assert_eq!(expr, b_lt_2);
1017+
/// ```
1018+
pub fn simplify<S: SimplifyInfo>(self, info: &S) -> Result<Self> {
1019+
let mut rewriter = Simplifier::new(info);
1020+
let mut const_evaluator = ConstEvaluator::new(info.execution_props());
1021+
1022+
// TODO iterate until no changes are made during rewrite
1023+
// (evaluating constants can enable new simplifications and
1024+
// simplifications can enable new constant evaluation)
1025+
// https://github.com/apache/arrow-datafusion/issues/1160
1026+
self.rewrite(&mut const_evaluator)?.rewrite(&mut rewriter)
1027+
}
9741028
}
9751029

9761030
impl Not for Expr {
@@ -1092,6 +1146,20 @@ pub trait ExprRewriter: Sized {
10921146
fn mutate(&mut self, expr: Expr) -> Result<Expr>;
10931147
}
10941148

1149+
/// The information necessary to apply algebraic simplification to an
1150+
/// [Expr]. See [SimplifyContext] for one implementation
1151+
pub trait SimplifyInfo {
1152+
/// returns true if this Expr has boolean type
1153+
fn is_boolean_type(&self, expr: &Expr) -> Result<bool>;
1154+
1155+
/// returns true of this expr is nullable (could possibly be NULL)
1156+
fn nullable(&self, expr: &Expr) -> Result<bool>;
1157+
1158+
/// Returns details needed for partial expression evaluation
1159+
fn execution_props(&self) -> &ExecutionProps;
1160+
}
1161+
1162+
/// Helper struct for building [Expr::Case]
10951163
pub struct CaseBuilder {
10961164
expr: Option<Box<Expr>>,
10971165
when_expr: Vec<Expr>,

datafusion/src/logical_plan/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ pub use expr::{
4747
signum, sin, split_part, sqrt, starts_with, strpos, substr, sum, tan, to_hex,
4848
translate, trim, trunc, unalias, unnormalize_col, unnormalize_cols, upper, when,
4949
Column, Expr, ExprRewriter, ExpressionVisitor, Literal, Recursion, RewriteRecursion,
50+
SimplifyInfo,
5051
};
5152
pub use extension::UserDefinedLogicalNode;
5253
pub use operators::Operator;

0 commit comments

Comments
 (0)