Skip to content

chore: remove ScalarUDFImpl::return_type_from_exprs #15130

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -795,7 +795,7 @@ impl ScalarUDFImpl for TakeUDF {
&self.signature
}
fn return_type(&self, _args: &[DataType]) -> Result<DataType> {
not_impl_err!("Not called because the return_type_from_exprs is implemented")
not_impl_err!("Not called because the return_type_from_args is implemented")
}

/// This function returns the type of the first or second argument based on
Expand Down
40 changes: 2 additions & 38 deletions datafusion/expr/src/udf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,30 +172,14 @@ impl ScalarUDF {
///
/// # Notes
///
/// If a function implement [`ScalarUDFImpl::return_type_from_exprs`],
/// If a function implement [`ScalarUDFImpl::return_type_from_args`],
/// its [`ScalarUDFImpl::return_type`] should raise an error.
///
/// See [`ScalarUDFImpl::return_type`] for more details.
pub fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
self.inner.return_type(arg_types)
}

/// The datatype this function returns given the input argument input types.
/// This function is used when the input arguments are [`Expr`]s.
///
///
/// See [`ScalarUDFImpl::return_type_from_exprs`] for more details.
#[allow(deprecated)]
pub fn return_type_from_exprs(
&self,
args: &[Expr],
schema: &dyn ExprSchema,
arg_types: &[DataType],
) -> Result<DataType> {
// If the implementation provides a return_type_from_exprs, use it
self.inner.return_type_from_exprs(args, schema, arg_types)
}

/// Return the datatype this function returns given the input argument types.
///
/// See [`ScalarUDFImpl::return_type_from_args`] for more details.
Expand Down Expand Up @@ -351,7 +335,7 @@ pub struct ScalarFunctionArgs<'a> {
pub args: Vec<ColumnarValue>,
/// The number of rows in record batch being evaluated
pub number_rows: usize,
/// The return type of the scalar function returned (from `return_type` or `return_type_from_exprs`)
/// The return type of the scalar function returned (from `return_type` or `return_type_from_args`)
/// when creating the physical expression from the logical expression
pub return_type: &'a DataType,
}
Expand Down Expand Up @@ -540,16 +524,6 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
/// [`DataFusionError::Internal`]: datafusion_common::DataFusionError::Internal
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType>;

#[deprecated(since = "45.0.0", note = "Use `return_type_from_args` instead")]
fn return_type_from_exprs(
&self,
_args: &[Expr],
_schema: &dyn ExprSchema,
arg_types: &[DataType],
) -> Result<DataType> {
self.return_type(arg_types)
}

/// What type will be returned by this function, given the arguments?
///
/// By default, this function calls [`Self::return_type`] with the
Expand Down Expand Up @@ -889,16 +863,6 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl {
&self.aliases
}

#[allow(deprecated)]
fn return_type_from_exprs(
&self,
args: &[Expr],
schema: &dyn ExprSchema,
arg_types: &[DataType],
) -> Result<DataType> {
self.inner.return_type_from_exprs(args, schema, arg_types)
}

fn return_type_from_args(&self, args: ReturnTypeArgs) -> Result<ReturnInfo> {
self.inner.return_type_from_args(args)
}
Expand Down
32 changes: 2 additions & 30 deletions datafusion/functions-nested/src/extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1001,9 +1001,9 @@ where
mod tests {
use super::array_element_udf;
use arrow::datatypes::{DataType, Field};
use datafusion_common::{Column, DFSchema, ScalarValue};
use datafusion_common::{Column, DFSchema};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::{cast, Expr, ExprSchemable};
use datafusion_expr::{Expr, ExprSchemable};
use std::collections::HashMap;

// Regression test for https://github.com/apache/datafusion/issues/13755
Expand Down Expand Up @@ -1037,34 +1037,6 @@ mod tests {
fixed_size_list_type
);

// ScalarUDFImpl::return_type_from_exprs with typed exprs
assert_eq!(
udf.return_type_from_exprs(
&[
cast(Expr::Literal(ScalarValue::Null), array_type.clone()),
cast(Expr::Literal(ScalarValue::Null), index_type.clone()),
],
&schema,
&[array_type.clone(), index_type.clone()]
)
.unwrap(),
fixed_size_list_type
);

// ScalarUDFImpl::return_type_from_exprs with exprs not carrying type
assert_eq!(
udf.return_type_from_exprs(
&[
Expr::Column(Column::new_unqualified("my_array")),
Expr::Column(Column::new_unqualified("my_index")),
],
&schema,
&[array_type.clone(), index_type.clone()]
)
.unwrap(),
fixed_size_list_type
);

// Via ExprSchemable::get_type (e.g. SimplifyInfo)
let udf_expr = Expr::ScalarFunction(ScalarFunction {
func: array_element_udf(),
Expand Down
4 changes: 2 additions & 2 deletions datafusion/functions/src/core/union_extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ impl ScalarUDFImpl for UnionExtractFun {
}

fn return_type(&self, _: &[DataType]) -> Result<DataType> {
// should be using return_type_from_exprs and not calling the default implementation
internal_err!("union_extract should return type from exprs")
// should be using return_type_from_args and not calling the default implementation
internal_err!("union_extract should return type from args")
}

fn return_type_from_args(&self, args: ReturnTypeArgs) -> Result<ReturnInfo> {
Expand Down
4 changes: 0 additions & 4 deletions datafusion/physical-expr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@ mod partitioning;
mod physical_expr;
pub mod planner;
mod scalar_function;
pub mod udf {
#[allow(deprecated)]
pub use crate::scalar_function::create_physical_expr;
}
pub mod statistics;
pub mod utils;
pub mod window;
Expand Down
36 changes: 2 additions & 34 deletions datafusion/physical-expr/src/scalar_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ use crate::PhysicalExpr;

use arrow::array::{Array, RecordBatch};
use arrow::datatypes::{DataType, Schema};
use datafusion_common::{internal_err, DFSchema, Result, ScalarValue};
use datafusion_common::{internal_err, Result, ScalarValue};
use datafusion_expr::interval_arithmetic::Interval;
use datafusion_expr::sort_properties::ExprProperties;
use datafusion_expr::type_coercion::functions::data_types_with_scalar_udf;
use datafusion_expr::{
expr_vec_fmt, ColumnarValue, Expr, ReturnTypeArgs, ScalarFunctionArgs, ScalarUDF,
expr_vec_fmt, ColumnarValue, ReturnTypeArgs, ScalarFunctionArgs, ScalarUDF,
};

/// Physical expression of a scalar function
Expand Down Expand Up @@ -261,35 +261,3 @@ impl PhysicalExpr for ScalarFunctionExpr {
})
}
}

/// Create a physical expression for the UDF.
#[deprecated(since = "45.0.0", note = "use ScalarFunctionExpr::new() instead")]
pub fn create_physical_expr(
fun: &ScalarUDF,
input_phy_exprs: &[Arc<dyn PhysicalExpr>],
input_schema: &Schema,
args: &[Expr],
input_dfschema: &DFSchema,
) -> Result<Arc<dyn PhysicalExpr>> {
let input_expr_types = input_phy_exprs
.iter()
.map(|e| e.data_type(input_schema))
.collect::<Result<Vec<_>>>()?;

// verify that input data types is consistent with function's `TypeSignature`
data_types_with_scalar_udf(&input_expr_types, fun)?;

// Since we have arg_types, we don't need args and schema.
let return_type =
fun.return_type_from_exprs(args, input_dfschema, &input_expr_types)?;

Ok(Arc::new(
ScalarFunctionExpr::new(
fun.name(),
Arc::new(fun.clone()),
input_phy_exprs.to_vec(),
return_type,
)
.with_nullable(fun.is_nullable(args, input_dfschema)),
))
}
2 changes: 1 addition & 1 deletion datafusion/physical-plan/src/execution_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pub use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream};
pub use datafusion_expr::{Accumulator, ColumnarValue};
pub use datafusion_physical_expr::window::WindowExpr;
pub use datafusion_physical_expr::{
expressions, udf, Distribution, Partitioning, PhysicalExpr,
expressions, Distribution, Partitioning, PhysicalExpr,
};

use std::any::Any;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-plan/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pub use datafusion_expr::{Accumulator, ColumnarValue};
pub use datafusion_physical_expr::window::WindowExpr;
use datafusion_physical_expr::PhysicalSortExpr;
pub use datafusion_physical_expr::{
expressions, udf, Distribution, Partitioning, PhysicalExpr,
expressions, Distribution, Partitioning, PhysicalExpr,
};

pub use crate::display::{DefaultDisplay, DisplayAs, DisplayFormatType, VerboseDisplay};
Expand Down