apache · andygrove · Feb 15, 2023 · Jan 31, 2023 · Feb 3, 2023 · Feb 4, 2023
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -38,6 +38,7 @@ datafusion = { version = "18.0.0", features = ["pyarrow", "avro"] }
 datafusion-expr = "18.0.0"
 datafusion-optimizer = "18.0.0"
 datafusion-common = { version = "18.0.0", features = ["pyarrow"] }
+datafusion-sql = "18.0.0"
 datafusion-substrait = "18.0.0"
 uuid = { version = "1.2", features = ["v4"] }
 mimalloc = { version = "*", optional = true, default-features = false }

diff --git a/datafusion/__init__.py b/datafusion/__init__.py
@@ -32,10 +32,14 @@
     SessionContext,
     SessionConfig,
     RuntimeConfig,
-    Expression,
     ScalarUDF,
 )
 
+from .expr import (
+    Expr,
+    TableScan,
+)
+
 __version__ = importlib_metadata.version(__name__)
 
 __all__ = [
@@ -44,11 +48,12 @@
     "SessionContext",
     "SessionConfig",
     "RuntimeConfig",
-    "Expression",
+    "Expr",
     "AggregateUDF",
     "ScalarUDF",
     "column",
     "literal",
+    "TableScan",
 ]
 
 
@@ -71,7 +76,7 @@ def evaluate(self) -> pa.Scalar:
 
 
 def column(value):
-    return Expression.column(value)
+    return Expr.column(value)
 
 
 col = column
@@ -80,7 +85,7 @@ def column(value):
 def literal(value):
     if not isinstance(value, pa.Scalar):
         value = pa.scalar(value)
-    return Expression.literal(value)
+    return Expr.literal(value)
 
 
 lit = literal

diff --git a/datafusion/expr.py b/datafusion/expr.py
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from ._internal import expr
+
+
+def __getattr__(name):
+    return getattr(expr, name)
diff --git a/datafusion/tests/test_context.py b/datafusion/tests/test_context.py
@@ -172,7 +172,8 @@ def test_dataset_filter_nested_data(ctx):
 
     df = ctx.table("t")
 
-    # This filter will not be pushed down to DatasetExec since it isn't supported
+    # This filter will not be pushed down to DatasetExec since it
+    # isn't supported
     df = df.select(
         column("nested_data")["a"] + column("nested_data")["b"],
         column("nested_data")["a"] - column("nested_data")["b"],

diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py
@@ -314,8 +314,8 @@ def test_execution_plan(aggregate_df):
 
     indent = plan.display_indent()
 
-    # indent plan will be different for everyone due to absolute path to filename, so
-    # we just check for some expected content
+    # indent plan will be different for everyone due to absolute path
+    # to filename, so we just check for some expected content
     assert "ProjectionExec:" in indent
     assert "AggregateExec:" in indent
     assert "CoalesceBatchesExec:" in indent

diff --git a/datafusion/tests/test_imports.py b/datafusion/tests/test_imports.py
@@ -22,11 +22,15 @@
     AggregateUDF,
     DataFrame,
     SessionContext,
-    Expression,
     ScalarUDF,
     functions,
 )
 
+from datafusion.expr import (
+    Expr,
+    TableScan,
+)
+
 
 def test_import_datafusion():
     assert datafusion.__name__ == "datafusion"
@@ -39,13 +43,15 @@ def test_datafusion_python_version():
 def test_class_module_is_datafusion():
     for klass in [
         SessionContext,
-        Expression,
         DataFrame,
         ScalarUDF,
         AggregateUDF,
     ]:
         assert klass.__module__ == "datafusion"
 
+    for klass in [Expr, TableScan]:
+        assert klass.__module__ == "datafusion.expr"
+
 
 def test_import_from_functions_submodule():
     from datafusion.functions import abs, sin  # noqa
@@ -62,7 +68,7 @@ def test_classes_are_inheritable():
     class MyExecContext(SessionContext):
         pass
 
-    class MyExpression(Expression):
+    class MyExpression(Expr):
         pass
 
     class MyDataFrame(DataFrame):

diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -27,6 +27,6 @@ API Reference
    api/config
    api/dataframe
    api/execution_context
-   api/expression
+   api/expr
    api/functions
    api/object_store
diff --git a/docs/source/api/expression.rst b/docs/source/api/expression.rst
@@ -18,10 +18,10 @@
 .. _api.expression:
 .. currentmodule:: datafusion
 
-Expression
+Expr
 ==========
 
 .. autosummary::
    :toctree: ../generated/
 
-   Expression
+   Expr
diff --git a/src/dataframe.rs b/src/dataframe.rs
@@ -18,7 +18,7 @@
 use crate::physical_plan::PyExecutionPlan;
 use crate::sql::logical::PyLogicalPlan;
 use crate::utils::wait_for_future;
-use crate::{errors::DataFusionError, expression::PyExpr};
+use crate::{errors::DataFusionError, expr::PyExpr};
 use datafusion::arrow::datatypes::Schema;
 use datafusion::arrow::pyarrow::{PyArrowConvert, PyArrowException, PyArrowType};
 use datafusion::arrow::util::pretty;

diff --git a/src/expression.rs → src/expr.rs b/src/expression.rs → src/expr.rs
@@ -24,8 +24,10 @@ use datafusion_expr::{col, lit, Cast, Expr, GetIndexedField};
 
 use datafusion::scalar::ScalarValue;
 
-/// An PyExpr that can be used on a DataFrame
-#[pyclass(name = "Expression", module = "datafusion", subclass)]
+pub mod table_scan;
+
+/// A PyExpr that can be used on a DataFrame
+#[pyclass(name = "Expr", module = "datafusion.expr", subclass)]
 #[derive(Debug, Clone)]
 pub(crate) struct PyExpr {
     pub(crate) expr: Expr,
@@ -133,3 +135,10 @@ impl PyExpr {
         expr.into()
     }
 }
+
+/// Initializes the `expr` module to match the pattern of `datafusion-expr` https://docs.rs/datafusion-expr/latest/datafusion_expr/
+pub(crate) fn init_module(m: &PyModule) -> PyResult<()> {
+    m.add_class::<PyExpr>()?;
+    m.add_class::<table_scan::PyTableScan>()?;
+    Ok(())
+}