From 930653415c6303b34f856826ba73717b25b9574c Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sun, 5 Jun 2022 06:02:37 -0600 Subject: [PATCH] MINOR: Move `simplify_expression` rule to `datafusion-optimizer` crate (#2686) --- .github/workflows/dev_pr/labeler.yml | 2 +- datafusion/core/src/lib.rs | 5 +- datafusion/core/src/logical_plan/mod.rs | 3 +- datafusion/core/src/optimizer/mod.rs | 28 ------ datafusion/optimizer/Cargo.toml | 1 + .../src/expr_simplifier.rs} | 14 +-- datafusion/optimizer/src/lib.rs | 2 + .../src}/simplify_expressions.rs | 85 ++++++++++++------- 8 files changed, 66 insertions(+), 74 deletions(-) delete mode 100644 datafusion/core/src/optimizer/mod.rs rename datafusion/{core/src/logical_plan/expr_simplier.rs => optimizer/src/expr_simplifier.rs} (88%) rename datafusion/{core/src/optimizer => optimizer/src}/simplify_expressions.rs (96%) diff --git a/.github/workflows/dev_pr/labeler.yml b/.github/workflows/dev_pr/labeler.yml index 3d010355b569..90a4619d4d18 100644 --- a/.github/workflows/dev_pr/labeler.yml +++ b/.github/workflows/dev_pr/labeler.yml @@ -38,7 +38,7 @@ physical-expr: - datafusion/physical-expr/**/* optimizer: - - datafusion/core/src/optimizer/**/* + - datafusion/optimizer/**/* core: - datafusion/core/**/* diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs index 4b47222855d1..df863ebd1e44 100644 --- a/datafusion/core/src/lib.rs +++ b/datafusion/core/src/lib.rs @@ -213,7 +213,6 @@ pub mod datasource; pub mod error; pub mod execution; pub mod logical_plan; -pub mod optimizer; pub mod physical_optimizer; pub mod physical_plan; pub mod prelude; @@ -230,10 +229,10 @@ pub use parquet; pub use datafusion_common as common; pub use datafusion_data_access; pub use datafusion_expr as logical_expr; +pub use datafusion_optimizer as optimizer; pub use datafusion_physical_expr as physical_expr; -pub use datafusion_sql as sql; - pub use datafusion_row as row; +pub use datafusion_sql as sql; #[cfg(feature = "jit")] pub use datafusion_jit as jit; diff --git a/datafusion/core/src/logical_plan/mod.rs b/datafusion/core/src/logical_plan/mod.rs index 0a97314412de..eb6e788d500d 100644 --- a/datafusion/core/src/logical_plan/mod.rs +++ b/datafusion/core/src/logical_plan/mod.rs @@ -22,7 +22,6 @@ //! physical query plans and executed. mod expr; -mod expr_simplier; pub mod plan; mod registry; pub mod window_frames; @@ -36,6 +35,7 @@ pub use datafusion_expr::{ }, ExprSchemable, Operator, }; +pub use datafusion_optimizer::expr_simplifier::{ExprSimplifiable, SimplifyInfo}; pub use expr::{ abs, acos, and, approx_distinct, approx_percentile_cont, array, ascii, asin, atan, avg, bit_length, btrim, call_fn, case, ceil, character_length, chr, coalesce, col, @@ -54,7 +54,6 @@ pub use expr_rewriter::{ rewrite_sort_cols_by_aggs, unnormalize_col, unnormalize_cols, ExprRewritable, ExprRewriter, RewriteRecursion, }; -pub use expr_simplier::{ExprSimplifiable, SimplifyInfo}; pub use plan::{provider_as_source, source_as_provider}; pub use plan::{ CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateMemoryTable, diff --git a/datafusion/core/src/optimizer/mod.rs b/datafusion/core/src/optimizer/mod.rs deleted file mode 100644 index cf6412db9ab2..000000000000 --- a/datafusion/core/src/optimizer/mod.rs +++ /dev/null @@ -1,28 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! This module contains a query optimizer that operates against a logical plan and applies -//! some simple rules to a logical plan, such as "Projection Push Down" and "Type Coercion". - -#![allow(clippy::module_inception)] -pub mod simplify_expressions; - -pub use datafusion_optimizer::{ - common_subexpr_eliminate, eliminate_filter, eliminate_limit, filter_push_down, - limit_push_down, optimizer, projection_push_down, single_distinct_to_groupby, - subquery_filter_to_join, utils, -}; diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml index 4d024f4c51b0..1c9d4cc246fd 100644 --- a/datafusion/optimizer/Cargo.toml +++ b/datafusion/optimizer/Cargo.toml @@ -42,5 +42,6 @@ async-trait = "0.1.41" chrono = { version = "0.4", default-features = false } datafusion-common = { path = "../common", version = "8.0.0" } datafusion-expr = { path = "../expr", version = "8.0.0" } +datafusion-physical-expr = { path = "../physical-expr", version = "8.0.0" } hashbrown = { version = "0.12", features = ["raw"] } log = "^0.4" diff --git a/datafusion/core/src/logical_plan/expr_simplier.rs b/datafusion/optimizer/src/expr_simplifier.rs similarity index 88% rename from datafusion/core/src/logical_plan/expr_simplier.rs rename to datafusion/optimizer/src/expr_simplifier.rs index f5dc727f229f..d71ecdaa201f 100644 --- a/datafusion/core/src/logical_plan/expr_simplier.rs +++ b/datafusion/optimizer/src/expr_simplifier.rs @@ -17,11 +17,10 @@ //! Expression simplifier -use super::Expr; -use super::ExprRewritable; -use crate::execution::context::ExecutionProps; -use crate::optimizer::simplify_expressions::{ConstEvaluator, Simplifier}; +use crate::simplify_expressions::{ConstEvaluator, Simplifier}; use datafusion_common::Result; +use datafusion_expr::{expr_rewriter::ExprRewritable, Expr}; +use datafusion_physical_expr::execution_props::ExecutionProps; #[allow(rustdoc::private_intra_doc_links)] /// The information necessary to apply algebraic simplification to an @@ -54,9 +53,10 @@ impl ExprSimplifiable for Expr { /// `b > 2` /// /// ``` - /// use datafusion::logical_plan::*; - /// use datafusion::error::Result; - /// use datafusion::execution::context::ExecutionProps; + /// use datafusion_expr::{col, lit, Expr}; + /// use datafusion_common::Result; + /// use datafusion_physical_expr::execution_props::ExecutionProps; + /// use datafusion_optimizer::expr_simplifier::{SimplifyInfo, ExprSimplifiable}; /// /// /// Simple implementation that provides `Simplifier` the information it needs /// #[derive(Default)] diff --git a/datafusion/optimizer/src/lib.rs b/datafusion/optimizer/src/lib.rs index 9a5130050878..cde35507352b 100644 --- a/datafusion/optimizer/src/lib.rs +++ b/datafusion/optimizer/src/lib.rs @@ -18,10 +18,12 @@ pub mod common_subexpr_eliminate; pub mod eliminate_filter; pub mod eliminate_limit; +pub mod expr_simplifier; pub mod filter_push_down; pub mod limit_push_down; pub mod optimizer; pub mod projection_push_down; +pub mod simplify_expressions; pub mod single_distinct_to_groupby; pub mod subquery_filter_to_join; pub mod utils; diff --git a/datafusion/core/src/optimizer/simplify_expressions.rs b/datafusion/optimizer/src/simplify_expressions.rs similarity index 96% rename from datafusion/core/src/optimizer/simplify_expressions.rs rename to datafusion/optimizer/src/simplify_expressions.rs index 162da48941c4..9be9bfde83b9 100644 --- a/datafusion/core/src/optimizer/simplify_expressions.rs +++ b/datafusion/optimizer/src/simplify_expressions.rs @@ -17,9 +17,8 @@ //! Simplify expressions optimizer rule -use crate::execution::context::ExecutionProps; -use crate::logical_plan::{ExprSimplifiable, SimplifyInfo}; -use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule}; +use crate::expr_simplifier::ExprSimplifiable; +use crate::{expr_simplifier::SimplifyInfo, OptimizerConfig, OptimizerRule}; use arrow::array::new_null_array; use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; @@ -30,9 +29,9 @@ use datafusion_expr::{ lit, logical_plan::LogicalPlan, utils::from_plan, - Expr, ExprSchemable, Operator, Volatility, + ColumnarValue, Expr, ExprSchemable, Operator, Volatility, }; -use datafusion_physical_expr::create_physical_expr; +use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps}; /// Provides simplification information based on schema and properties pub(crate) struct SimplifyContext<'a, 'b> { @@ -95,7 +94,7 @@ impl<'a, 'b> SimplifyInfo for SimplifyContext<'a, 'b> { /// `Filter: b > 2` /// #[derive(Default)] -pub(crate) struct SimplifyExpressions {} +pub struct SimplifyExpressions {} /// returns true if `needle` is found in a chain of search_op /// expressions. Such as: (A AND B) AND C @@ -265,13 +264,13 @@ impl SimplifyExpressions { /// --> `a`, which is handled by [`Simplifier`] /// /// ``` -/// # use datafusion::prelude::*; -/// # use datafusion::logical_plan::ExprRewritable; -/// # use datafusion::optimizer::simplify_expressions::ConstEvaluator; -/// # use datafusion::execution::context::ExecutionProps; +/// # use datafusion_expr::{col, lit}; +/// # use datafusion_optimizer::simplify_expressions::ConstEvaluator; +/// # use datafusion_physical_expr::execution_props::ExecutionProps; +/// # use datafusion_expr::expr_rewriter::ExprRewritable; /// -/// let optimizer_config = ExecutionProps::new(); -/// let mut const_evaluator = ConstEvaluator::new(&optimizer_config); +/// let execution_props = ExecutionProps::new(); +/// let mut const_evaluator = ConstEvaluator::new(&execution_props); /// /// // (1 + 2) + a /// let expr = (lit(1) + lit(2)) + col("a"); @@ -295,7 +294,7 @@ pub struct ConstEvaluator<'a> { /// descendants) so this Expr can be evaluated can_evaluate: Vec, - optimizer_config: &'a ExecutionProps, + execution_props: &'a ExecutionProps, input_schema: DFSchema, input_batch: RecordBatch, } @@ -341,8 +340,8 @@ impl<'a> ExprRewriter for ConstEvaluator<'a> { impl<'a> ConstEvaluator<'a> { /// Create a new `ConstantEvaluator`. Session constants (such as /// the time for `now()` are taken from the passed - /// `optimizer_config`. - pub fn new(optimizer_config: &'a ExecutionProps) -> Self { + /// `execution_props`. + pub fn new(execution_props: &'a ExecutionProps) -> Self { let input_schema = DFSchema::empty(); // The dummy column name is unused and doesn't matter as only @@ -357,7 +356,7 @@ impl<'a> ConstEvaluator<'a> { Self { can_evaluate: vec![], - optimizer_config, + execution_props, input_schema, input_batch, } @@ -423,11 +422,11 @@ impl<'a> ConstEvaluator<'a> { &expr, &self.input_schema, &self.input_batch.schema(), - self.optimizer_config, + self.execution_props, )?; let col_val = phys_expr.evaluate(&self.input_batch)?; match col_val { - crate::physical_plan::ColumnarValue::Array(a) => { + ColumnarValue::Array(a) => { if a.len() != 1 { Err(DataFusionError::Execution(format!( "Could not evaluate the expressison, found a result of length {}", @@ -437,7 +436,7 @@ impl<'a> ConstEvaluator<'a> { Ok(ScalarValue::try_from_array(&a, 0)?) } } - crate::physical_plan::ColumnarValue::Scalar(s) => Ok(s), + ColumnarValue::Scalar(s) => Ok(s), } } } @@ -745,22 +744,42 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { } } +/// A macro to assert that one string is contained within another with +/// a nice error message if they are not. +/// +/// Usage: `assert_contains!(actual, expected)` +/// +/// Is a macro so test error +/// messages are on the same line as the failure; +/// +/// Both arguments must be convertable into Strings (Into) +#[macro_export] +macro_rules! assert_contains { + ($ACTUAL: expr, $EXPECTED: expr) => { + let actual_value: String = $ACTUAL.into(); + let expected_value: String = $EXPECTED.into(); + assert!( + actual_value.contains(&expected_value), + "Can not find expected in actual.\n\nExpected:\n{}\n\nActual:\n{}", + expected_value, + actual_value + ); + }; +} + #[cfg(test)] mod tests { use super::*; - use crate::assert_contains; - use crate::logical_plan::{call_fn, create_udf}; - use crate::physical_plan::functions::make_scalar_function; - use crate::physical_plan::udf::ScalarUDF; - use crate::test_util::scan_empty; use arrow::array::{ArrayRef, Int32Array}; use chrono::{DateTime, TimeZone, Utc}; use datafusion_common::DFField; + use datafusion_expr::logical_plan::table_scan; use datafusion_expr::{ - and, binary_expr, col, lit, lit_timestamp_nano, + and, binary_expr, call_fn, col, create_udf, lit, lit_timestamp_nano, logical_plan::builder::LogicalPlanBuilder, BuiltinScalarFunction, Expr, - ExprSchemable, + ExprSchemable, ScalarUDF, }; + use datafusion_physical_expr::functions::make_scalar_function; use std::collections::HashMap; use std::sync::Arc; @@ -1192,12 +1211,12 @@ mod tests { expected_expr: Expr, date_time: &DateTime, ) { - let optimizer_config = ExecutionProps { + let execution_props = ExecutionProps { query_execution_start_time: *date_time, var_providers: None, }; - let mut const_evaluator = ConstEvaluator::new(&optimizer_config); + let mut const_evaluator = ConstEvaluator::new(&execution_props); let evaluated_expr = input_expr .clone() .rewrite(&mut const_evaluator) @@ -1220,8 +1239,8 @@ mod tests { fn simplify(expr: Expr) -> Expr { let schema = expr_test_schema(); - let optimizer_config = ExecutionProps::new(); - let info = SimplifyContext::new(vec![&schema], &optimizer_config); + let execution_props = ExecutionProps::new(); + let info = SimplifyContext::new(vec![&schema], &execution_props); expr.simplify(&info).unwrap() } @@ -1522,7 +1541,7 @@ mod tests { Field::new("c", DataType::Boolean, false), Field::new("d", DataType::UInt32, false), ]); - scan_empty(Some("test"), &schema, None) + table_scan(Some("test"), &schema, None) .expect("creating scan") .build() .expect("building plan") @@ -1710,8 +1729,8 @@ mod tests { .aggregate( vec![col("a"), col("c")], vec![ - crate::logical_plan::max(col("b").eq(lit(true))), - crate::logical_plan::min(col("b")), + datafusion_expr::max(col("b").eq(lit(true))), + datafusion_expr::min(col("b")), ], ) .unwrap()