Skip to content

Update to sqlparser-rs v0.50.0 #12014

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ rand = "0.8"
regex = "1.8"
rstest = "0.22.0"
serde_json = "1"
sqlparser = { version = "0.49", features = ["visitor"] }
sqlparser = { version = "0.50.0", features = ["visitor"] }
tempfile = "3"
thiserror = "1.0.44"
tokio = { version = "1.36", features = ["macros", "rt", "sync"] }
Expand Down
4 changes: 2 additions & 2 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion datafusion/sql/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SQLExpr::Value(value) => {
self.parse_value(value, planner_context.prepare_param_data_types())
}
SQLExpr::Extract { field, expr } => {
SQLExpr::Extract { field, expr, .. } => {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I recommend explicitly listing out the other fields here and returning a NotImplemented error if there is some new syntax that DataFusion doesn't support.

Otherwise what we have seen a few times in the past is that the parser accepts some new syntax element but DataFusion just igores it silently

let mut extract_args = vec![
Expr::Literal(ScalarValue::from(format!("{field}"))),
self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
Expand Down
3 changes: 2 additions & 1 deletion datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
}
SQLDataType::Bytea => Ok(DataType::Binary),
SQLDataType::Interval => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
SQLDataType::Struct(fields) => {
SQLDataType::Struct(fields, _) => {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likewise here I think it would be good to name the field like

Suggested change
SQLDataType::Struct(fields, _) => {
SQLDataType::Struct(fields, _bracket_type) => {

So it is easier to understand by reading just the code what is being ignored

let fields = fields
.iter()
.enumerate()
Expand Down Expand Up @@ -513,6 +513,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
| SQLDataType::Union(_)
| SQLDataType::Nullable(_)
| SQLDataType::LowCardinality(_)
| SQLDataType::Trigger
=> not_impl_err!(
"Unsupported SQL type {sql_type:?}"
),
Expand Down
1 change: 1 addition & 0 deletions datafusion/sql/src/relation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
if let Some(func_args) = args {
let tbl_func_name = name.0.first().unwrap().value.to_string();
let args = func_args
.args
.into_iter()
.flat_map(|arg| {
if let FunctionArg::Unnamed(FunctionArgExpr::Expr(expr)) = arg
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sql/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
match statement {
Statement::ExplainTable {
describe_alias: DescribeAlias::Describe, // only parse 'DESCRIBE table_name' and not 'EXPLAIN table_name'
hive_format: _,
table_name,
..
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

per comment above, I recommend keeping the fields named explcitly

} => self.describe_table_to_plan(table_name),
Statement::Explain {
verbose,
Expand Down
5 changes: 4 additions & 1 deletion datafusion/sql/src/unparser/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,10 @@ impl TableRelationBuilder {
None => return Err(Into::into(UninitializedFieldError::from("name"))),
},
alias: self.alias.clone(),
args: self.args.clone(),
args: self.args.clone().map(|args| ast::TableFunctionArgs {
args,
settings: None,
}),
with_hints: self.with_hints.clone(),
version: self.version.clone(),
partitions: self.partitions.clone(),
Expand Down
5 changes: 4 additions & 1 deletion datafusion/sql/src/unparser/dialect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,10 @@ pub struct DefaultDialect {}
impl Dialect for DefaultDialect {
fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
let identifier_regex = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap();
if ALL_KEYWORDS.contains(&identifier.to_uppercase().as_str())
let id_upper = identifier.to_uppercase();
// special case ignore "ID", see https://github.com/sqlparser-rs/sqlparser-rs/issues/1382
// ID is a keyword in ClickHouse, but we don't want to quote it when unparsing SQL here
if (id_upper != "ID" && ALL_KEYWORDS.contains(&id_upper.as_str()))
|| !identifier_regex.is_match(identifier)
{
Some('"')
Expand Down
1 change: 1 addition & 0 deletions datafusion/sql/src/unparser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,7 @@ impl Unparser<'_> {
return Some(ast::Expr::Extract {
field,
expr: Box::new(date_expr),
syntax: ast::ExtractSyntax::From,
});
}
}
Expand Down
2 changes: 2 additions & 0 deletions datafusion/sql/src/unparser/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ impl Unparser<'_> {

let ast_join = ast::Join {
relation,
global: false,
join_operator: self
.join_operator_to_sql(join.join_type, join_constraint),
};
Expand Down Expand Up @@ -435,6 +436,7 @@ impl Unparser<'_> {

let ast_join = ast::Join {
relation,
global: false,
join_operator: self.join_operator_to_sql(
JoinType::Inner,
ast::JoinConstraint::On(ast::Expr::Value(ast::Value::Boolean(
Expand Down
1 change: 1 addition & 0 deletions datafusion/sql/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ pub(crate) fn value_to_string(value: &Value) -> Option<String> {
Value::SingleQuotedString(s) => Some(s.to_string()),
Value::DollarQuotedString(s) => Some(s.to_string()),
Value::Number(_, _) | Value::Boolean(_) => Some(value.to_string()),
Value::UnicodeStringLiteral(s) => Some(s.to_string()),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really understand why value_to_string supports (returns Some(String)) for some types of string, but not others, so I'm not clear whether Value::UnicodeStringLiteral should return Some or None?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect this was not carefully designed and likely is not an intentional oversight

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think what you have here is ok

Value::DoubleQuotedString(_)
| Value::EscapedStringLiteral(_)
| Value::NationalStringLiteral(_)
Expand Down
52 changes: 39 additions & 13 deletions datafusion/sqllogictest/test_files/expr.slt
Original file line number Diff line number Diff line change
Expand Up @@ -848,8 +848,10 @@ SELECT EXTRACT("year" FROM timestamp '2020-09-08T12:00:00+00:00')
----
2020

query error
query R
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🥳 🦜

SELECT EXTRACT('year' FROM timestamp '2020-09-08T12:00:00+00:00')
----
2020

query R
SELECT date_part('QUARTER', CAST('2000-01-01' AS DATE))
Expand All @@ -866,8 +868,10 @@ SELECT EXTRACT("quarter" FROM to_timestamp('2020-09-08T12:00:00+00:00'))
----
3

query error
query R
SELECT EXTRACT('quarter' FROM to_timestamp('2020-09-08T12:00:00+00:00'))
----
3

query R
SELECT date_part('MONTH', CAST('2000-01-01' AS DATE))
Expand All @@ -884,8 +888,10 @@ SELECT EXTRACT("month" FROM to_timestamp('2020-09-08T12:00:00+00:00'))
----
9

query error
query R
SELECT EXTRACT('month' FROM to_timestamp('2020-09-08T12:00:00+00:00'))
----
9

query R
SELECT date_part('WEEK', CAST('2003-01-01' AS DATE))
Expand All @@ -902,8 +908,10 @@ SELECT EXTRACT("WEEK" FROM to_timestamp('2020-09-08T12:00:00+00:00'))
----
37

query error
query R
SELECT EXTRACT('WEEK' FROM to_timestamp('2020-09-08T12:00:00+00:00'))
----
37

query R
SELECT date_part('DAY', CAST('2000-01-01' AS DATE))
Expand All @@ -920,8 +928,10 @@ SELECT EXTRACT("day" FROM to_timestamp('2020-09-08T12:00:00+00:00'))
----
8

query error
query R
SELECT EXTRACT('day' FROM to_timestamp('2020-09-08T12:00:00+00:00'))
----
8

query R
SELECT date_part('DOY', CAST('2000-01-01' AS DATE))
Expand All @@ -938,8 +948,10 @@ SELECT EXTRACT("doy" FROM to_timestamp('2020-09-08T12:00:00+00:00'))
----
252

query error
query R
SELECT EXTRACT('doy' FROM to_timestamp('2020-09-08T12:00:00+00:00'))
----
252

query R
SELECT date_part('DOW', CAST('2000-01-01' AS DATE))
Expand All @@ -956,8 +968,10 @@ SELECT EXTRACT("dow" FROM to_timestamp('2020-09-08T12:00:00+00:00'))
----
2

query error
query R
SELECT EXTRACT('dow' FROM to_timestamp('2020-09-08T12:00:00+00:00'))
----
2

query R
SELECT date_part('HOUR', CAST('2000-01-01' AS DATE))
Expand All @@ -974,8 +988,10 @@ SELECT EXTRACT("hour" FROM to_timestamp('2020-09-08T12:03:03+00:00'))
----
12

query error
query R
SELECT EXTRACT('hour' FROM to_timestamp('2020-09-08T12:03:03+00:00'))
----
12

query R
SELECT EXTRACT(minute FROM to_timestamp('2020-09-08T12:12:00+00:00'))
Expand All @@ -987,8 +1003,10 @@ SELECT EXTRACT("minute" FROM to_timestamp('2020-09-08T12:12:00+00:00'))
----
12

query error
query R
SELECT EXTRACT('minute' FROM to_timestamp('2020-09-08T12:12:00+00:00'))
----
12

query R
SELECT date_part('minute', to_timestamp('2020-09-08T12:12:00+00:00'))
Expand Down Expand Up @@ -1035,17 +1053,25 @@ SELECT EXTRACT("nanosecond" FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
----
12123456780

query error
query R
SELECT EXTRACT('second' FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
----
12.12345678

query error
query R
SELECT EXTRACT('millisecond' FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
----
12123.45678

query error
query R
SELECT EXTRACT('microsecond' FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
----
12123456.78

query error
query R
SELECT EXTRACT('nanosecond' FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
----
12123456780

# Keep precision when coercing Utf8 to Timestamp
query R
Expand Down
Loading