Skip to content

Commit 14815e7

Browse files
author
Marko Mikulicic
committed
fix: Case insensitive unquoted identifiers
1 parent e4a056f commit 14815e7

File tree

3 files changed

+182
-12
lines changed

3 files changed

+182
-12
lines changed

datafusion/src/execution/context.rs

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3634,6 +3634,173 @@ mod tests {
36343634
assert_eq!(result[0].schema().metadata(), result[1].schema().metadata());
36353635
}
36363636

3637+
#[tokio::test]
3638+
async fn normalized_column_identifiers() {
3639+
// create local execution context
3640+
let mut ctx = ExecutionContext::new();
3641+
3642+
// register csv file with the execution context
3643+
ctx.register_csv(
3644+
"case_insensitive_test",
3645+
"tests/example.csv",
3646+
CsvReadOptions::new(),
3647+
)
3648+
.await
3649+
.unwrap();
3650+
3651+
let sql = "SELECT A, b FROM case_insensitive_test";
3652+
let result = plan_and_collect(&mut ctx, sql)
3653+
.await
3654+
.expect("ran plan correctly");
3655+
let expected = vec![
3656+
"+---+---+",
3657+
"| a | b |",
3658+
"+---+---+",
3659+
"| 1 | 2 |",
3660+
"+---+---+",
3661+
];
3662+
assert_batches_sorted_eq!(expected, &result);
3663+
3664+
let sql = "SELECT t.A, b FROM case_insensitive_test AS t";
3665+
let result = plan_and_collect(&mut ctx, sql)
3666+
.await
3667+
.expect("ran plan correctly");
3668+
let expected = vec![
3669+
"+---+---+",
3670+
"| a | b |",
3671+
"+---+---+",
3672+
"| 1 | 2 |",
3673+
"+---+---+",
3674+
];
3675+
assert_batches_sorted_eq!(expected, &result);
3676+
3677+
// Aliases
3678+
3679+
let sql = "SELECT t.A as x, b FROM case_insensitive_test AS t";
3680+
let result = plan_and_collect(&mut ctx, sql)
3681+
.await
3682+
.expect("ran plan correctly");
3683+
let expected = vec![
3684+
"+---+---+",
3685+
"| x | b |",
3686+
"+---+---+",
3687+
"| 1 | 2 |",
3688+
"+---+---+",
3689+
];
3690+
assert_batches_sorted_eq!(expected, &result);
3691+
3692+
let sql = "SELECT t.A AS X, b FROM case_insensitive_test AS t";
3693+
let result = plan_and_collect(&mut ctx, sql)
3694+
.await
3695+
.expect("ran plan correctly");
3696+
let expected = vec![
3697+
"+---+---+",
3698+
"| x | b |",
3699+
"+---+---+",
3700+
"| 1 | 2 |",
3701+
"+---+---+",
3702+
];
3703+
assert_batches_sorted_eq!(expected, &result);
3704+
3705+
let sql = r#"SELECT t.A AS "X", b FROM case_insensitive_test AS t"#;
3706+
let result = plan_and_collect(&mut ctx, sql)
3707+
.await
3708+
.expect("ran plan correctly");
3709+
let expected = vec![
3710+
"+---+---+",
3711+
"| X | b |",
3712+
"+---+---+",
3713+
"| 1 | 2 |",
3714+
"+---+---+",
3715+
];
3716+
assert_batches_sorted_eq!(expected, &result);
3717+
3718+
// Order by
3719+
3720+
let sql = "SELECT t.A AS x, b FROM case_insensitive_test AS t ORDER BY x";
3721+
let result = plan_and_collect(&mut ctx, sql)
3722+
.await
3723+
.expect("ran plan correctly");
3724+
let expected = vec![
3725+
"+---+---+",
3726+
"| x | b |",
3727+
"+---+---+",
3728+
"| 1 | 2 |",
3729+
"+---+---+",
3730+
];
3731+
assert_batches_sorted_eq!(expected, &result);
3732+
3733+
let sql = "SELECT t.A AS x, b FROM case_insensitive_test AS t ORDER BY X";
3734+
let result = plan_and_collect(&mut ctx, sql)
3735+
.await
3736+
.expect("ran plan correctly");
3737+
let expected = vec![
3738+
"+---+---+",
3739+
"| x | b |",
3740+
"+---+---+",
3741+
"| 1 | 2 |",
3742+
"+---+---+",
3743+
];
3744+
assert_batches_sorted_eq!(expected, &result);
3745+
3746+
let sql = r#"SELECT t.A AS "X", b FROM case_insensitive_test AS t ORDER BY "X""#;
3747+
let result = plan_and_collect(&mut ctx, sql)
3748+
.await
3749+
.expect("ran plan correctly");
3750+
let expected = vec![
3751+
"+---+---+",
3752+
"| X | b |",
3753+
"+---+---+",
3754+
"| 1 | 2 |",
3755+
"+---+---+",
3756+
];
3757+
assert_batches_sorted_eq!(expected, &result);
3758+
3759+
// Where
3760+
3761+
let sql = "SELECT a, b FROM case_insensitive_test where A IS NOT null";
3762+
let result = plan_and_collect(&mut ctx, sql)
3763+
.await
3764+
.expect("ran plan correctly");
3765+
let expected = vec![
3766+
"+---+---+",
3767+
"| a | b |",
3768+
"+---+---+",
3769+
"| 1 | 2 |",
3770+
"+---+---+",
3771+
];
3772+
assert_batches_sorted_eq!(expected, &result);
3773+
3774+
// Group by
3775+
3776+
let sql = "SELECT a as x, count(*) as c FROM case_insensitive_test GROUP BY X";
3777+
let result = plan_and_collect(&mut ctx, sql)
3778+
.await
3779+
.expect("ran plan correctly");
3780+
let expected = vec![
3781+
"+---+---+",
3782+
"| x | c |",
3783+
"+---+---+",
3784+
"| 1 | 1 |",
3785+
"+---+---+",
3786+
];
3787+
assert_batches_sorted_eq!(expected, &result);
3788+
3789+
let sql =
3790+
r#"SELECT a as "X", count(*) as c FROM case_insensitive_test GROUP BY "X""#;
3791+
let result = plan_and_collect(&mut ctx, sql)
3792+
.await
3793+
.expect("ran plan correctly");
3794+
let expected = vec![
3795+
"+---+---+",
3796+
"| X | c |",
3797+
"+---+---+",
3798+
"| 1 | 1 |",
3799+
"+---+---+",
3800+
];
3801+
assert_batches_sorted_eq!(expected, &result);
3802+
}
3803+
36373804
struct MyPhysicalPlanner {}
36383805

36393806
#[async_trait]

datafusion/src/sql/planner.rs

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ use crate::logical_plan::{
3636
use crate::optimizer::utils::exprlist_to_columns;
3737
use crate::prelude::JoinType;
3838
use crate::scalar::ScalarValue;
39-
use crate::sql::utils::make_decimal_type;
39+
use crate::sql::utils::{make_decimal_type, normalize_ident};
4040
use crate::{
4141
error::{DataFusionError, Result},
4242
physical_plan::udaf::AggregateUDF,
@@ -1191,7 +1191,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
11911191
SelectItem::UnnamedExpr(expr) => self.sql_to_rex(expr, schema),
11921192
SelectItem::ExprWithAlias { expr, alias } => Ok(Alias(
11931193
Box::new(self.sql_to_rex(expr, schema)?),
1194-
alias.value.clone(),
1194+
normalize_ident(alias),
11951195
)),
11961196
SelectItem::Wildcard => Ok(Expr::Wildcard),
11971197
SelectItem::QualifiedWildcard(_) => Err(DataFusionError::NotImplemented(
@@ -1392,6 +1392,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
13921392

13931393
SQLExpr::Identifier(ref id) => {
13941394
if id.value.starts_with('@') {
1395+
// TODO: figure out if ScalarVariables should be insensitive.
13951396
let var_names = vec![id.value.clone()];
13961397
Ok(Expr::ScalarVariable(var_names))
13971398
} else {
@@ -1401,7 +1402,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
14011402
// identifier. (e.g. it is "foo.bar" not foo.bar)
14021403
Ok(Expr::Column(Column {
14031404
relation: None,
1404-
name: id.value.clone(),
1405+
name: normalize_ident(id),
14051406
}))
14061407
}
14071408
}
@@ -1418,8 +1419,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
14181419
}
14191420

14201421
SQLExpr::CompoundIdentifier(ids) => {
1421-
let mut var_names: Vec<_> =
1422-
ids.iter().map(|id| id.value.clone()).collect();
1422+
let mut var_names: Vec<_> = ids.iter().map(normalize_ident).collect();
14231423

14241424
if &var_names[0][0..1] == "@" {
14251425
Ok(Expr::ScalarVariable(var_names))
@@ -1639,13 +1639,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
16391639
// (e.g. "foo.bar") for function names yet
16401640
function.name.to_string()
16411641
} else {
1642-
// if there is a quote style, then don't normalize
1643-
// the name, otherwise normalize to lowercase
1644-
let ident = &function.name.0[0];
1645-
match ident.quote_style {
1646-
Some(_) => ident.value.clone(),
1647-
None => ident.value.to_ascii_lowercase(),
1648-
}
1642+
normalize_ident(&function.name.0[0])
16491643
};
16501644

16511645
// first, scalar built-in

datafusion/src/sql/utils.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
//! SQL Utility Functions
1919
2020
use arrow::datatypes::DataType;
21+
use sqlparser::ast::Ident;
2122

2223
use crate::logical_plan::{Expr, LogicalPlan};
2324
use crate::scalar::{ScalarValue, MAX_PRECISION_FOR_DECIMAL128};
@@ -532,6 +533,14 @@ pub(crate) fn make_decimal_type(
532533
}
533534
}
534535

536+
// Normalize an identifer to a lowercase string unless the identifier is quoted.
537+
pub(crate) fn normalize_ident(id: &Ident) -> String {
538+
match id.quote_style {
539+
Some(_) => id.value.clone(),
540+
None => id.value.to_ascii_lowercase(),
541+
}
542+
}
543+
535544
#[cfg(test)]
536545
mod tests {
537546
use super::*;

0 commit comments

Comments
 (0)