Skip to content

fix interval units parsing #12448

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 10 additions & 49 deletions datafusion/sql/src/expr/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
// under the License.

use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
use arrow::compute::kernels::cast_utils::parse_interval_month_day_nano;
use arrow::compute::kernels::cast_utils::{
parse_interval_month_day_nano_config, IntervalParseConfig, IntervalUnit,
};
use arrow::datatypes::DECIMAL128_MAX_PRECISION;
use arrow_schema::DataType;
use datafusion_common::{
Expand Down Expand Up @@ -232,27 +234,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {

let value = interval_literal(*interval.value, negative)?;

let value = if has_units(&value) {
// If the interval already contains a unit
// `INTERVAL '5 month' rather than `INTERVAL '5' month`
// skip the other unit
value
} else {
// leading_field really means the unit if specified
// for example, "month" in `INTERVAL '5' month`
match interval.leading_field.as_ref() {
Some(leading_field) => {
format!("{value} {leading_field}")
}
None => {
// default to seconds for the units
// `INTERVAL '5' is parsed as '5 seconds'
format!("{value} seconds")
}
}
// leading_field really means the unit if specified
// for example, "month" in `INTERVAL '5' month`
let value = match interval.leading_field.as_ref() {
Some(leading_field) => format!("{value} {leading_field}"),
None => value,
};

let val = parse_interval_month_day_nano(&value)?;
let config = IntervalParseConfig::new(IntervalUnit::Second);
let val = parse_interval_month_day_nano_config(&value, config)?;
Ok(lit(ScalarValue::IntervalMonthDayNano(Some(val))))
}
}
Expand Down Expand Up @@ -292,35 +282,6 @@ fn interval_literal(interval_value: SQLExpr, negative: bool) -> Result<String> {
}
}

// TODO make interval parsing better in arrow-rs / expose `IntervalType`
fn has_units(val: &str) -> bool {
let val = val.to_lowercase();
val.ends_with("century")
|| val.ends_with("centuries")
|| val.ends_with("decade")
|| val.ends_with("decades")
|| val.ends_with("year")
|| val.ends_with("years")
|| val.ends_with("month")
|| val.ends_with("months")
|| val.ends_with("week")
|| val.ends_with("weeks")
|| val.ends_with("day")
|| val.ends_with("days")
|| val.ends_with("hour")
|| val.ends_with("hours")
|| val.ends_with("minute")
|| val.ends_with("minutes")
|| val.ends_with("second")
|| val.ends_with("seconds")
|| val.ends_with("millisecond")
|| val.ends_with("milliseconds")
|| val.ends_with("microsecond")
|| val.ends_with("microseconds")
|| val.ends_with("nanosecond")
|| val.ends_with("nanoseconds")
}

/// Try to decode bytes from hex literal string.
///
/// None will be returned if the input literal is hex-invalid.
Expand Down
4 changes: 4 additions & 0 deletions datafusion/sql/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
order_by,
partition_by,
cluster_by,
clustered_by,
options,
strict,
copy_grants,
Expand Down Expand Up @@ -346,6 +347,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
if cluster_by.is_some() {
return not_impl_err!("Cluster by not supported")?;
}
if clustered_by.is_some() {
return not_impl_err!("Clustered by not supported")?;
}
if options.is_some() {
return not_impl_err!("Options not supported")?;
}
Expand Down
6 changes: 0 additions & 6 deletions datafusion/sqllogictest/test_files/expr.slt
Original file line number Diff line number Diff line change
Expand Up @@ -229,12 +229,6 @@ SELECT interval '5 day'
----
5 days

# Hour is ignored, this matches PostgreSQL
query ?
SELECT interval '5 day' hour
----
5 days

query ?
SELECT interval '5 day 4 hours 3 minutes 2 seconds 100 milliseconds'
----
Expand Down
41 changes: 41 additions & 0 deletions datafusion/sqllogictest/test_files/interval.slt
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,31 @@ select i - d from t;
query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types
select i - ts from t;

# interval unit abreiviation and plurals
query ?
select interval '1s'
----
1.000000000 secs

query ?
select '1s'::interval
----
1.000000000 secs

query ?
select interval'1sec'
----
1.000000000 secs

query ?
select interval '1ms'
----
0.001000000 secs

query ?
select interval '1 y' + interval '1 year'
----
24 mons

# interval (scalar) + date / timestamp (array)
query D
Expand All @@ -502,6 +527,22 @@ select '1 month'::interval + ts from t;
2000-02-01T12:11:10
2000-03-01T00:00:00

# trailing extra unit, this matches PostgreSQL
query ?
select interval '5 day 1' hour
----
5 days 1 hours

# trailing extra unit, this matches PostgreSQL
query ?
select interval '5 day 0' hour
----
5 days

# This is interpreted as "0 hours" with PostgreSQL, should be fixed with
query error DataFusion error: Arrow error: Parser error: Invalid input syntax for type interval: "5 day HOUR"
Comment on lines +542 to +543
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SELECT interval '5 day' hour

# expected error interval (scalar) - date / timestamp (array)
query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types
select '1 month'::interval - d from t;
Expand Down