Skip to content

Commit

Permalink
Move date_part, date_trunc, date_bin functions to datafusion-functions (
Browse files Browse the repository at this point in the history
#9435)

* Move date_part, date_trunc, date_bin functions to datafusion-functions

* I do not understand why the logical plan changed but updating the explain text to reflect the change. The physical plan is unchanged.

* Fix fmt

* Improvements to remove datafusion-functions dependency from sq and physical-expr

* Fix function arguments for date_bin, date_trunc and date_part.

* Fix projection change. Add new test date_bin monotonicity

---------

Co-authored-by: Mustafa Akur <mustafa.akur@synnada.ai>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
  • Loading branch information
3 people authored Mar 10, 2024
1 parent 6710e6d commit acddecb
Show file tree
Hide file tree
Showing 19 changed files with 1,981 additions and 1,740 deletions.
143 changes: 1 addition & 142 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use crate::type_coercion::binary::get_wider_type;
use crate::type_coercion::functions::data_types;
use crate::{FuncMonotonicity, Signature, TypeSignature, Volatility};

use arrow::datatypes::{DataType, Field, Fields, IntervalUnit, TimeUnit};
use arrow::datatypes::{DataType, Field, Fields, TimeUnit};
use datafusion_common::{plan_err, DataFusionError, Result};

use strum::IntoEnumIterator;
Expand Down Expand Up @@ -180,12 +180,6 @@ pub enum BuiltinScalarFunction {
Concat,
/// concat_ws
ConcatWithSeparator,
/// date_part
DatePart,
/// date_trunc
DateTrunc,
/// date_bin
DateBin,
/// ends_with
EndsWith,
/// initcap
Expand Down Expand Up @@ -382,9 +376,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Chr => Volatility::Immutable,
BuiltinScalarFunction::Concat => Volatility::Immutable,
BuiltinScalarFunction::ConcatWithSeparator => Volatility::Immutable,
BuiltinScalarFunction::DatePart => Volatility::Immutable,
BuiltinScalarFunction::DateTrunc => Volatility::Immutable,
BuiltinScalarFunction::DateBin => Volatility::Immutable,
BuiltinScalarFunction::EndsWith => Volatility::Immutable,
BuiltinScalarFunction::InitCap => Volatility::Immutable,
BuiltinScalarFunction::Left => Volatility::Immutable,
Expand Down Expand Up @@ -592,27 +583,6 @@ impl BuiltinScalarFunction {
}
BuiltinScalarFunction::Concat => Ok(Utf8),
BuiltinScalarFunction::ConcatWithSeparator => Ok(Utf8),
BuiltinScalarFunction::DatePart => Ok(Float64),
BuiltinScalarFunction::DateBin | BuiltinScalarFunction::DateTrunc => {
match &input_expr_types[1] {
Timestamp(Nanosecond, None) | Utf8 | Null => {
Ok(Timestamp(Nanosecond, None))
}
Timestamp(Nanosecond, tz_opt) => {
Ok(Timestamp(Nanosecond, tz_opt.clone()))
}
Timestamp(Microsecond, tz_opt) => {
Ok(Timestamp(Microsecond, tz_opt.clone()))
}
Timestamp(Millisecond, tz_opt) => {
Ok(Timestamp(Millisecond, tz_opt.clone()))
}
Timestamp(Second, tz_opt) => Ok(Timestamp(Second, tz_opt.clone())),
_ => plan_err!(
"The {self} function can only accept timestamp as the second arg."
),
}
}
BuiltinScalarFunction::InitCap => {
utf8_to_str_type(&input_expr_types[0], "initcap")
}
Expand Down Expand Up @@ -784,7 +754,6 @@ impl BuiltinScalarFunction {
/// Return the argument [`Signature`] supported by this function
pub fn signature(&self) -> Signature {
use DataType::*;
use IntervalUnit::*;
use TimeUnit::*;
use TypeSignature::*;
// note: the physical expression must accept the type returned by this function or the execution panics.
Expand Down Expand Up @@ -947,108 +916,6 @@ impl BuiltinScalarFunction {
],
self.volatility(),
),
BuiltinScalarFunction::DateTrunc => Signature::one_of(
vec![
Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
Exact(vec![
Utf8,
Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![Utf8, Timestamp(Microsecond, None)]),
Exact(vec![
Utf8,
Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![Utf8, Timestamp(Millisecond, None)]),
Exact(vec![
Utf8,
Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![Utf8, Timestamp(Second, None)]),
Exact(vec![
Utf8,
Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
]),
],
self.volatility(),
),
BuiltinScalarFunction::DateBin => {
let base_sig = |array_type: TimeUnit| {
vec![
Exact(vec![
Interval(MonthDayNano),
Timestamp(array_type.clone(), None),
Timestamp(Nanosecond, None),
]),
Exact(vec![
Interval(MonthDayNano),
Timestamp(array_type.clone(), Some(TIMEZONE_WILDCARD.into())),
Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![
Interval(DayTime),
Timestamp(array_type.clone(), None),
Timestamp(Nanosecond, None),
]),
Exact(vec![
Interval(DayTime),
Timestamp(array_type.clone(), Some(TIMEZONE_WILDCARD.into())),
Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![
Interval(MonthDayNano),
Timestamp(array_type.clone(), None),
]),
Exact(vec![
Interval(MonthDayNano),
Timestamp(array_type.clone(), Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![
Interval(DayTime),
Timestamp(array_type.clone(), None),
]),
Exact(vec![
Interval(DayTime),
Timestamp(array_type, Some(TIMEZONE_WILDCARD.into())),
]),
]
};

let full_sig = [Nanosecond, Microsecond, Millisecond, Second]
.into_iter()
.map(base_sig)
.collect::<Vec<_>>()
.concat();

Signature::one_of(full_sig, self.volatility())
}
BuiltinScalarFunction::DatePart => Signature::one_of(
vec![
Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
Exact(vec![
Utf8,
Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![Utf8, Timestamp(Millisecond, None)]),
Exact(vec![
Utf8,
Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![Utf8, Timestamp(Microsecond, None)]),
Exact(vec![
Utf8,
Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![Utf8, Timestamp(Second, None)]),
Exact(vec![
Utf8,
Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
]),
Exact(vec![Utf8, Date64]),
Exact(vec![Utf8, Date32]),
],
self.volatility(),
),
BuiltinScalarFunction::SplitPart => Signature::one_of(
vec![
Exact(vec![Utf8, Utf8, Int64]),
Expand Down Expand Up @@ -1240,11 +1107,6 @@ impl BuiltinScalarFunction {
| BuiltinScalarFunction::Pi
) {
Some(vec![Some(true)])
} else if matches!(
&self,
BuiltinScalarFunction::DateTrunc | BuiltinScalarFunction::DateBin
) {
Some(vec![None, Some(true)])
} else if *self == BuiltinScalarFunction::Log {
Some(vec![Some(true), Some(false)])
} else {
Expand Down Expand Up @@ -1337,9 +1199,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::CurrentDate => &["current_date", "today"],
BuiltinScalarFunction::CurrentTime => &["current_time"],
BuiltinScalarFunction::MakeDate => &["make_date"],
BuiltinScalarFunction::DateBin => &["date_bin"],
BuiltinScalarFunction::DateTrunc => &["date_trunc", "datetrunc"],
BuiltinScalarFunction::DatePart => &["date_part", "datepart"],
BuiltinScalarFunction::ToChar => &["to_char", "date_format"],
BuiltinScalarFunction::FromUnixtime => &["from_unixtime"],

Expand Down
5 changes: 5 additions & 0 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,11 @@ impl ScalarFunction {
args,
}
}

/// Create a new ScalarFunction expression with a user-defined function (UDF)
pub fn new_func_def(func_def: ScalarFunctionDefinition, args: Vec<Expr>) -> Self {
Self { func_def, args }
}
}

/// Access a sub field of a nested type, such as `Field` or `List`
Expand Down
6 changes: 0 additions & 6 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -819,9 +819,6 @@ nary_scalar_expr!(
);

// date functions
scalar_expr!(DatePart, date_part, part date, "extracts a subfield from the date");
scalar_expr!(DateTrunc, date_trunc, part date, "truncates the date to a specified level of precision");
scalar_expr!(DateBin, date_bin, stride source origin, "coerces an arbitrary timestamp to the start of the nearest specified interval");
scalar_expr!(
ToChar,
to_char,
Expand Down Expand Up @@ -1309,9 +1306,6 @@ mod test {
test_scalar_expr!(Trim, trim, string);
test_scalar_expr!(Upper, upper, string);

test_scalar_expr!(DatePart, date_part, part, date);
test_scalar_expr!(DateTrunc, date_trunc, part, date);
test_scalar_expr!(DateBin, date_bin, stride, source, origin);
test_scalar_expr!(FromUnixtime, from_unixtime, unixtime);

test_scalar_expr!(ArrayAppend, array_append, array, element);
Expand Down
Loading

0 comments on commit acddecb

Please sign in to comment.