Skip to content

Commit 56a8b9a

Browse files
author
Marko Mikulicic
committed
fix: Case insensitive unquoted identifiers
1 parent e4a056f commit 56a8b9a

File tree

2 files changed

+137
-11
lines changed

2 files changed

+137
-11
lines changed

datafusion/src/execution/context.rs

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3634,6 +3634,130 @@ mod tests {
36343634
assert_eq!(result[0].schema().metadata(), result[1].schema().metadata());
36353635
}
36363636

3637+
#[tokio::test]
3638+
async fn case_insensitive_columns_identifiers() {
3639+
// create local execution context
3640+
let mut ctx = ExecutionContext::new();
3641+
3642+
// register csv file with the execution context
3643+
ctx.register_csv(
3644+
"case_insensitive_test",
3645+
"tests/example.csv",
3646+
CsvReadOptions::new(),
3647+
)
3648+
.await
3649+
.unwrap();
3650+
3651+
let sql = "SELECT A, b FROM case_insensitive_test";
3652+
let result = plan_and_collect(&mut ctx, sql)
3653+
.await
3654+
.expect("ran plan correctly");
3655+
let expected = vec![
3656+
"+---+---+",
3657+
"| a | b |",
3658+
"+---+---+",
3659+
"| 1 | 2 |",
3660+
"+---+---+",
3661+
];
3662+
assert_batches_sorted_eq!(expected, &result);
3663+
3664+
let sql = "SELECT t.A, b FROM case_insensitive_test AS t";
3665+
let result = plan_and_collect(&mut ctx, sql)
3666+
.await
3667+
.expect("ran plan correctly");
3668+
let expected = vec![
3669+
"+---+---+",
3670+
"| a | b |",
3671+
"+---+---+",
3672+
"| 1 | 2 |",
3673+
"+---+---+",
3674+
];
3675+
assert_batches_sorted_eq!(expected, &result);
3676+
3677+
// All identifiers should be lowercase except when quoted
3678+
3679+
let sql = "SELECT t.A as x, b FROM case_insensitive_test AS t";
3680+
let result = plan_and_collect(&mut ctx, sql)
3681+
.await
3682+
.expect("ran plan correctly");
3683+
let expected = vec![
3684+
"+---+---+",
3685+
"| x | b |",
3686+
"+---+---+",
3687+
"| 1 | 2 |",
3688+
"+---+---+",
3689+
];
3690+
assert_batches_sorted_eq!(expected, &result);
3691+
3692+
let sql = "SELECT t.A AS X, b FROM case_insensitive_test AS t";
3693+
let result = plan_and_collect(&mut ctx, sql)
3694+
.await
3695+
.expect("ran plan correctly");
3696+
let expected = vec![
3697+
"+---+---+",
3698+
"| x | b |",
3699+
"+---+---+",
3700+
"| 1 | 2 |",
3701+
"+---+---+",
3702+
];
3703+
assert_batches_sorted_eq!(expected, &result);
3704+
3705+
let sql = r#"SELECT t.A AS "X", b FROM case_insensitive_test AS t"#;
3706+
let result = plan_and_collect(&mut ctx, sql)
3707+
.await
3708+
.expect("ran plan correctly");
3709+
let expected = vec![
3710+
"+---+---+",
3711+
"| X | b |",
3712+
"+---+---+",
3713+
"| 1 | 2 |",
3714+
"+---+---+",
3715+
];
3716+
assert_batches_sorted_eq!(expected, &result);
3717+
3718+
3719+
// Order by column references should be case insensitive
3720+
3721+
let sql = "SELECT t.A AS x, b FROM case_insensitive_test AS t ORDER BY x";
3722+
let result = plan_and_collect(&mut ctx, sql)
3723+
.await
3724+
.expect("ran plan correctly");
3725+
let expected = vec![
3726+
"+---+---+",
3727+
"| x | b |",
3728+
"+---+---+",
3729+
"| 1 | 2 |",
3730+
"+---+---+",
3731+
];
3732+
assert_batches_sorted_eq!(expected, &result);
3733+
3734+
let sql = "SELECT t.A AS x, b FROM case_insensitive_test AS t ORDER BY X";
3735+
let result = plan_and_collect(&mut ctx, sql)
3736+
.await
3737+
.expect("ran plan correctly");
3738+
let expected = vec![
3739+
"+---+---+",
3740+
"| x | b |",
3741+
"+---+---+",
3742+
"| 1 | 2 |",
3743+
"+---+---+",
3744+
];
3745+
assert_batches_sorted_eq!(expected, &result);
3746+
3747+
let sql = r#"SELECT t.A AS "X", b FROM case_insensitive_test AS t ORDER BY "X""#;
3748+
let result = plan_and_collect(&mut ctx, sql)
3749+
.await
3750+
.expect("ran plan correctly");
3751+
let expected = vec![
3752+
"+---+---+",
3753+
"| X | b |",
3754+
"+---+---+",
3755+
"| 1 | 2 |",
3756+
"+---+---+",
3757+
];
3758+
assert_batches_sorted_eq!(expected, &result);
3759+
}
3760+
36373761
struct MyPhysicalPlanner {}
36383762

36393763
#[async_trait]

datafusion/src/sql/planner.rs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,7 +1191,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
11911191
SelectItem::UnnamedExpr(expr) => self.sql_to_rex(expr, schema),
11921192
SelectItem::ExprWithAlias { expr, alias } => Ok(Alias(
11931193
Box::new(self.sql_to_rex(expr, schema)?),
1194-
alias.value.clone(),
1194+
normalize_ident(alias),
11951195
)),
11961196
SelectItem::Wildcard => Ok(Expr::Wildcard),
11971197
SelectItem::QualifiedWildcard(_) => Err(DataFusionError::NotImplemented(
@@ -1392,6 +1392,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
13921392

13931393
SQLExpr::Identifier(ref id) => {
13941394
if id.value.starts_with('@') {
1395+
// TODO: figure out if ScalarVariables should be insensitive.
13951396
let var_names = vec![id.value.clone()];
13961397
Ok(Expr::ScalarVariable(var_names))
13971398
} else {
@@ -1401,7 +1402,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
14011402
// identifier. (e.g. it is "foo.bar" not foo.bar)
14021403
Ok(Expr::Column(Column {
14031404
relation: None,
1404-
name: id.value.clone(),
1405+
name: normalize_ident(id),
14051406
}))
14061407
}
14071408
}
@@ -1418,8 +1419,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
14181419
}
14191420

14201421
SQLExpr::CompoundIdentifier(ids) => {
1421-
let mut var_names: Vec<_> =
1422-
ids.iter().map(|id| id.value.clone()).collect();
1422+
let mut var_names: Vec<_> = ids.iter().map(normalize_ident).collect();
14231423

14241424
if &var_names[0][0..1] == "@" {
14251425
Ok(Expr::ScalarVariable(var_names))
@@ -1639,13 +1639,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
16391639
// (e.g. "foo.bar") for function names yet
16401640
function.name.to_string()
16411641
} else {
1642-
// if there is a quote style, then don't normalize
1643-
// the name, otherwise normalize to lowercase
1644-
let ident = &function.name.0[0];
1645-
match ident.quote_style {
1646-
Some(_) => ident.value.clone(),
1647-
None => ident.value.to_ascii_lowercase(),
1648-
}
1642+
normalize_ident(&function.name.0[0])
16491643
};
16501644

16511645
// first, scalar built-in
@@ -2176,6 +2170,14 @@ pub fn convert_data_type(sql_type: &SQLDataType) -> Result<DataType> {
21762170
}
21772171
}
21782172

2173+
// Normalize an identifer to a lowercase string unless the identifier is quoted.
2174+
fn normalize_ident(id: &Ident) -> String {
2175+
match id.quote_style {
2176+
Some(_) => id.value.clone(),
2177+
None => id.value.to_ascii_lowercase(),
2178+
}
2179+
}
2180+
21792181
#[cfg(test)]
21802182
mod tests {
21812183
use functions::ScalarFunctionImplementation;

0 commit comments

Comments
 (0)