Skip to content

Commit d014ff2

Browse files
authored
fix: Case insensitive unquoted identifiers (#1747)
1 parent fe46a1e commit d014ff2

File tree

3 files changed

+182
-12
lines changed

3 files changed

+182
-12
lines changed

datafusion/src/execution/context.rs

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3359,6 +3359,173 @@ mod tests {
33593359
assert_eq!(result[0].schema().metadata(), result[1].schema().metadata());
33603360
}
33613361

3362+
#[tokio::test]
3363+
async fn normalized_column_identifiers() {
3364+
// create local execution context
3365+
let mut ctx = ExecutionContext::new();
3366+
3367+
// register csv file with the execution context
3368+
ctx.register_csv(
3369+
"case_insensitive_test",
3370+
"tests/example.csv",
3371+
CsvReadOptions::new(),
3372+
)
3373+
.await
3374+
.unwrap();
3375+
3376+
let sql = "SELECT A, b FROM case_insensitive_test";
3377+
let result = plan_and_collect(&mut ctx, sql)
3378+
.await
3379+
.expect("ran plan correctly");
3380+
let expected = vec![
3381+
"+---+---+",
3382+
"| a | b |",
3383+
"+---+---+",
3384+
"| 1 | 2 |",
3385+
"+---+---+",
3386+
];
3387+
assert_batches_sorted_eq!(expected, &result);
3388+
3389+
let sql = "SELECT t.A, b FROM case_insensitive_test AS t";
3390+
let result = plan_and_collect(&mut ctx, sql)
3391+
.await
3392+
.expect("ran plan correctly");
3393+
let expected = vec![
3394+
"+---+---+",
3395+
"| a | b |",
3396+
"+---+---+",
3397+
"| 1 | 2 |",
3398+
"+---+---+",
3399+
];
3400+
assert_batches_sorted_eq!(expected, &result);
3401+
3402+
// Aliases
3403+
3404+
let sql = "SELECT t.A as x, b FROM case_insensitive_test AS t";
3405+
let result = plan_and_collect(&mut ctx, sql)
3406+
.await
3407+
.expect("ran plan correctly");
3408+
let expected = vec![
3409+
"+---+---+",
3410+
"| x | b |",
3411+
"+---+---+",
3412+
"| 1 | 2 |",
3413+
"+---+---+",
3414+
];
3415+
assert_batches_sorted_eq!(expected, &result);
3416+
3417+
let sql = "SELECT t.A AS X, b FROM case_insensitive_test AS t";
3418+
let result = plan_and_collect(&mut ctx, sql)
3419+
.await
3420+
.expect("ran plan correctly");
3421+
let expected = vec![
3422+
"+---+---+",
3423+
"| x | b |",
3424+
"+---+---+",
3425+
"| 1 | 2 |",
3426+
"+---+---+",
3427+
];
3428+
assert_batches_sorted_eq!(expected, &result);
3429+
3430+
let sql = r#"SELECT t.A AS "X", b FROM case_insensitive_test AS t"#;
3431+
let result = plan_and_collect(&mut ctx, sql)
3432+
.await
3433+
.expect("ran plan correctly");
3434+
let expected = vec![
3435+
"+---+---+",
3436+
"| X | b |",
3437+
"+---+---+",
3438+
"| 1 | 2 |",
3439+
"+---+---+",
3440+
];
3441+
assert_batches_sorted_eq!(expected, &result);
3442+
3443+
// Order by
3444+
3445+
let sql = "SELECT t.A AS x, b FROM case_insensitive_test AS t ORDER BY x";
3446+
let result = plan_and_collect(&mut ctx, sql)
3447+
.await
3448+
.expect("ran plan correctly");
3449+
let expected = vec![
3450+
"+---+---+",
3451+
"| x | b |",
3452+
"+---+---+",
3453+
"| 1 | 2 |",
3454+
"+---+---+",
3455+
];
3456+
assert_batches_sorted_eq!(expected, &result);
3457+
3458+
let sql = "SELECT t.A AS x, b FROM case_insensitive_test AS t ORDER BY X";
3459+
let result = plan_and_collect(&mut ctx, sql)
3460+
.await
3461+
.expect("ran plan correctly");
3462+
let expected = vec![
3463+
"+---+---+",
3464+
"| x | b |",
3465+
"+---+---+",
3466+
"| 1 | 2 |",
3467+
"+---+---+",
3468+
];
3469+
assert_batches_sorted_eq!(expected, &result);
3470+
3471+
let sql = r#"SELECT t.A AS "X", b FROM case_insensitive_test AS t ORDER BY "X""#;
3472+
let result = plan_and_collect(&mut ctx, sql)
3473+
.await
3474+
.expect("ran plan correctly");
3475+
let expected = vec![
3476+
"+---+---+",
3477+
"| X | b |",
3478+
"+---+---+",
3479+
"| 1 | 2 |",
3480+
"+---+---+",
3481+
];
3482+
assert_batches_sorted_eq!(expected, &result);
3483+
3484+
// Where
3485+
3486+
let sql = "SELECT a, b FROM case_insensitive_test where A IS NOT null";
3487+
let result = plan_and_collect(&mut ctx, sql)
3488+
.await
3489+
.expect("ran plan correctly");
3490+
let expected = vec![
3491+
"+---+---+",
3492+
"| a | b |",
3493+
"+---+---+",
3494+
"| 1 | 2 |",
3495+
"+---+---+",
3496+
];
3497+
assert_batches_sorted_eq!(expected, &result);
3498+
3499+
// Group by
3500+
3501+
let sql = "SELECT a as x, count(*) as c FROM case_insensitive_test GROUP BY X";
3502+
let result = plan_and_collect(&mut ctx, sql)
3503+
.await
3504+
.expect("ran plan correctly");
3505+
let expected = vec![
3506+
"+---+---+",
3507+
"| x | c |",
3508+
"+---+---+",
3509+
"| 1 | 1 |",
3510+
"+---+---+",
3511+
];
3512+
assert_batches_sorted_eq!(expected, &result);
3513+
3514+
let sql =
3515+
r#"SELECT a as "X", count(*) as c FROM case_insensitive_test GROUP BY "X""#;
3516+
let result = plan_and_collect(&mut ctx, sql)
3517+
.await
3518+
.expect("ran plan correctly");
3519+
let expected = vec![
3520+
"+---+---+",
3521+
"| X | c |",
3522+
"+---+---+",
3523+
"| 1 | 1 |",
3524+
"+---+---+",
3525+
];
3526+
assert_batches_sorted_eq!(expected, &result);
3527+
}
3528+
33623529
struct MyPhysicalPlanner {}
33633530

33643531
#[async_trait]

datafusion/src/sql/planner.rs

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ use crate::logical_plan::{
3636
use crate::optimizer::utils::exprlist_to_columns;
3737
use crate::prelude::JoinType;
3838
use crate::scalar::ScalarValue;
39-
use crate::sql::utils::make_decimal_type;
39+
use crate::sql::utils::{make_decimal_type, normalize_ident};
4040
use crate::{
4141
error::{DataFusionError, Result},
4242
physical_plan::udaf::AggregateUDF,
@@ -1191,7 +1191,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
11911191
SelectItem::UnnamedExpr(expr) => self.sql_to_rex(expr, schema),
11921192
SelectItem::ExprWithAlias { expr, alias } => Ok(Alias(
11931193
Box::new(self.sql_to_rex(expr, schema)?),
1194-
alias.value.clone(),
1194+
normalize_ident(alias),
11951195
)),
11961196
SelectItem::Wildcard => Ok(Expr::Wildcard),
11971197
SelectItem::QualifiedWildcard(_) => Err(DataFusionError::NotImplemented(
@@ -1392,6 +1392,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
13921392

13931393
SQLExpr::Identifier(ref id) => {
13941394
if id.value.starts_with('@') {
1395+
// TODO: figure out if ScalarVariables should be insensitive.
13951396
let var_names = vec![id.value.clone()];
13961397
Ok(Expr::ScalarVariable(var_names))
13971398
} else {
@@ -1401,7 +1402,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
14011402
// identifier. (e.g. it is "foo.bar" not foo.bar)
14021403
Ok(Expr::Column(Column {
14031404
relation: None,
1404-
name: id.value.clone(),
1405+
name: normalize_ident(id),
14051406
}))
14061407
}
14071408
}
@@ -1418,8 +1419,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
14181419
}
14191420

14201421
SQLExpr::CompoundIdentifier(ids) => {
1421-
let mut var_names: Vec<_> =
1422-
ids.iter().map(|id| id.value.clone()).collect();
1422+
let mut var_names: Vec<_> = ids.iter().map(normalize_ident).collect();
14231423

14241424
if &var_names[0][0..1] == "@" {
14251425
Ok(Expr::ScalarVariable(var_names))
@@ -1639,13 +1639,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
16391639
// (e.g. "foo.bar") for function names yet
16401640
function.name.to_string()
16411641
} else {
1642-
// if there is a quote style, then don't normalize
1643-
// the name, otherwise normalize to lowercase
1644-
let ident = &function.name.0[0];
1645-
match ident.quote_style {
1646-
Some(_) => ident.value.clone(),
1647-
None => ident.value.to_ascii_lowercase(),
1648-
}
1642+
normalize_ident(&function.name.0[0])
16491643
};
16501644

16511645
// first, scalar built-in

datafusion/src/sql/utils.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
//! SQL Utility Functions
1919
2020
use arrow::datatypes::DataType;
21+
use sqlparser::ast::Ident;
2122

2223
use crate::logical_plan::{Expr, LogicalPlan};
2324
use crate::scalar::{ScalarValue, MAX_PRECISION_FOR_DECIMAL128};
@@ -532,6 +533,14 @@ pub(crate) fn make_decimal_type(
532533
}
533534
}
534535

536+
// Normalize an identifer to a lowercase string unless the identifier is quoted.
537+
pub(crate) fn normalize_ident(id: &Ident) -> String {
538+
match id.quote_style {
539+
Some(_) => id.value.clone(),
540+
None => id.value.to_ascii_lowercase(),
541+
}
542+
}
543+
535544
#[cfg(test)]
536545
mod tests {
537546
use super::*;

0 commit comments

Comments
 (0)