Skip to content

Commit 570a1cf

Browse files
authored
Planner: normalize_ident only when enable_ident_normalization is enabled (#5785)
* Make planner::normalize_ident public and refactor planner impl method to honor planner options * Undo pre-commit config change * Add tests * Add identNormalizer struct to sqlToRel and move calls to normalize_ident to the struct implementation
1 parent bbc7169 commit 570a1cf

File tree

9 files changed

+68
-38
lines changed

9 files changed

+68
-38
lines changed

datafusion/sql/src/expr/function.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
// under the License.
1717

1818
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
19-
use crate::utils::normalize_ident;
2019
use datafusion_common::{DFSchema, DataFusionError, Result};
2120
use datafusion_expr::utils::COUNT_STAR_EXPANSION;
2221
use datafusion_expr::window_frame::regularize;
@@ -43,7 +42,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
4342
// (e.g. "foo.bar") for function names yet
4443
function.name.to_string()
4544
} else {
46-
normalize_ident(function.name.0[0].clone())
45+
self.normalizer.normalize(function.name.0[0].clone())
4746
};
4847

4948
// next, scalar built-in

datafusion/sql/src/expr/identifier.rs

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
// under the License.
1717

1818
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
19-
use crate::utils::normalize_ident;
2019
use datafusion_common::{
2120
Column, DFField, DFSchema, DataFusionError, Result, ScalarValue, TableReference,
2221
};
@@ -47,7 +46,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
4746
// interpret names with '.' as if they were
4847
// compound identifiers, but this is not a compound
4948
// identifier. (e.g. it is "foo.bar" not foo.bar)
50-
let normalize_ident = normalize_ident(id);
49+
let normalize_ident = self.normalizer.normalize(id);
5150
match schema.field_with_unqualified_name(normalize_ident.as_str()) {
5251
Ok(_) => {
5352
// found a match without a qualified name, this is a inner table column
@@ -97,7 +96,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
9796
}
9897

9998
if ids[0].value.starts_with('@') {
100-
let var_names: Vec<_> = ids.into_iter().map(normalize_ident).collect();
99+
let var_names: Vec<_> = ids
100+
.into_iter()
101+
.map(|id| self.normalizer.normalize(id))
102+
.collect();
101103
let ty = self
102104
.schema_provider
103105
.get_variable_type(&var_names)
@@ -110,13 +112,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
110112
} else {
111113
let ids = ids
112114
.into_iter()
113-
.map(|id| {
114-
if self.options.enable_ident_normalization {
115-
normalize_ident(id)
116-
} else {
117-
id.value
118-
}
119-
})
115+
.map(|id| self.normalizer.normalize(id))
120116
.collect::<Vec<_>>();
121117

122118
// Currently not supporting more than one nested level

datafusion/sql/src/expr/mod.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ mod unary_op;
2727
mod value;
2828

2929
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
30-
use crate::utils::normalize_ident;
3130
use arrow_schema::DataType;
3231
use datafusion_common::tree_node::{Transformed, TreeNode};
3332
use datafusion_common::{Column, DFSchema, DataFusionError, Result, ScalarValue};
@@ -148,7 +147,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
148147

149148
SQLExpr::MapAccess { column, keys } => {
150149
if let SQLExpr::Identifier(id) = *column {
151-
plan_indexed(col(normalize_ident(id)), keys)
150+
plan_indexed(col(self.normalizer.normalize(id)), keys)
152151
} else {
153152
Err(DataFusionError::NotImplemented(format!(
154153
"map access requires an identifier, found column {column} instead"

datafusion/sql/src/planner.rs

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,32 @@ impl Default for ParserOptions {
6969
}
7070
}
7171

72+
/// Ident Normalizer
73+
#[derive(Debug)]
74+
pub struct IdentNormalizer {
75+
normalize: bool,
76+
}
77+
78+
impl Default for IdentNormalizer {
79+
fn default() -> Self {
80+
Self { normalize: true }
81+
}
82+
}
83+
84+
impl IdentNormalizer {
85+
pub fn new(normalize: bool) -> Self {
86+
Self { normalize }
87+
}
88+
89+
pub fn normalize(&self, ident: Ident) -> String {
90+
if self.normalize {
91+
crate::utils::normalize_ident(ident)
92+
} else {
93+
ident.value
94+
}
95+
}
96+
}
97+
7298
/// Struct to store the states used by the Planner. The Planner will leverage the states to resolve
7399
/// CTEs, Views, subqueries and PREPARE statements. The states include
74100
/// Common Table Expression (CTE) provided with WITH clause and
@@ -155,6 +181,7 @@ impl PlannerContext {
155181
pub struct SqlToRel<'a, S: ContextProvider> {
156182
pub(crate) schema_provider: &'a S,
157183
pub(crate) options: ParserOptions,
184+
pub(crate) normalizer: IdentNormalizer,
158185
}
159186

160187
impl<'a, S: ContextProvider> SqlToRel<'a, S> {
@@ -165,9 +192,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
165192

166193
/// Create a new query planner
167194
pub fn new_with_options(schema_provider: &'a S, options: ParserOptions) -> Self {
195+
let normalize = options.enable_ident_normalization;
168196
SqlToRel {
169197
schema_provider,
170198
options,
199+
normalizer: IdentNormalizer::new(normalize),
171200
}
172201
}
173202

@@ -181,7 +210,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
181210
.iter()
182211
.any(|x| x.option == ColumnOption::NotNull);
183212
fields.push(Field::new(
184-
normalize_ident(column.name, self.options.enable_ident_normalization),
213+
self.normalizer.normalize(column.name),
185214
data_type,
186215
!not_nullable,
187216
));
@@ -198,7 +227,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
198227
) -> Result<LogicalPlan> {
199228
let apply_name_plan = LogicalPlan::SubqueryAlias(SubqueryAlias::try_new(
200229
plan,
201-
normalize_ident(alias.name, self.options.enable_ident_normalization),
230+
self.normalizer.normalize(alias.name),
202231
)?);
203232

204233
self.apply_expr_alias(apply_name_plan, alias.columns)
@@ -221,10 +250,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
221250
let fields = plan.schema().fields().clone();
222251
LogicalPlanBuilder::from(plan)
223252
.project(fields.iter().zip(idents.into_iter()).map(|(field, ident)| {
224-
col(field.name()).alias(normalize_ident(
225-
ident,
226-
self.options.enable_ident_normalization,
227-
))
253+
col(field.name()).alias(self.normalizer.normalize(ident))
228254
}))?
229255
.build()
230256
}
@@ -411,7 +437,7 @@ pub(crate) fn idents_to_table_reference(
411437
impl IdentTaker {
412438
fn take(&mut self, enable_normalization: bool) -> String {
413439
let ident = self.0.pop().expect("no more identifiers");
414-
normalize_ident(ident, enable_normalization)
440+
IdentNormalizer::new(enable_normalization).normalize(ident)
415441
}
416442
}
417443

@@ -447,6 +473,7 @@ pub fn object_name_to_qualifier(
447473
enable_normalization: bool,
448474
) -> String {
449475
let columns = vec!["table_name", "table_schema", "table_catalog"].into_iter();
476+
let normalizer = IdentNormalizer::new(enable_normalization);
450477
sql_table_name
451478
.0
452479
.iter()
@@ -456,17 +483,9 @@ pub fn object_name_to_qualifier(
456483
format!(
457484
r#"{} = '{}'"#,
458485
column_name,
459-
normalize_ident(ident.clone(), enable_normalization)
486+
normalizer.normalize(ident.clone())
460487
)
461488
})
462489
.collect::<Vec<_>>()
463490
.join(" AND ")
464491
}
465-
466-
fn normalize_ident(id: Ident, enable_normalization: bool) -> String {
467-
if enable_normalization {
468-
return crate::utils::normalize_ident(id);
469-
}
470-
471-
id.value
472-
}

datafusion/sql/src/query.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
19-
use crate::utils::normalize_ident;
19+
2020
use datafusion_common::{DataFusionError, Result, ScalarValue};
2121
use datafusion_expr::{Expr, LogicalPlan, LogicalPlanBuilder};
2222
use sqlparser::ast::{Expr as SQLExpr, Offset as SQLOffset, OrderByExpr, Query, Value};
@@ -53,7 +53,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
5353

5454
for cte in with.cte_tables {
5555
// A `WITH` block can't use the same name more than once
56-
let cte_name = normalize_ident(cte.alias.name.clone());
56+
let cte_name = self.normalizer.normalize(cte.alias.name.clone());
5757
if planner_context.contains_cte(&cte_name) {
5858
return Err(DataFusionError::SQL(ParserError(format!(
5959
"WITH query name {cte_name:?} specified more than once"

datafusion/sql/src/relation/join.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
// under the License.
1717

1818
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
19-
use crate::utils::normalize_ident;
2019
use datafusion_common::{Column, DataFusionError, Result};
2120
use datafusion_expr::{JoinType, LogicalPlan, LogicalPlanBuilder};
2221
use sqlparser::ast::{Join, JoinConstraint, JoinOperator, TableWithJoins};
@@ -146,7 +145,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
146145
JoinConstraint::Using(idents) => {
147146
let keys: Vec<Column> = idents
148147
.into_iter()
149-
.map(|x| Column::from_name(normalize_ident(x)))
148+
.map(|x| Column::from_name(self.normalizer.normalize(x)))
150149
.collect();
151150
LogicalPlanBuilder::from(left)
152151
.join_using(right, join_type, keys)?

datafusion/sql/src/select.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717

1818
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
1919
use crate::utils::{
20-
check_columns_satisfy_exprs, extract_aliases, normalize_ident, rebase_expr,
21-
resolve_aliases_to_exprs, resolve_columns, resolve_positions_to_exprs,
20+
check_columns_satisfy_exprs, extract_aliases, rebase_expr, resolve_aliases_to_exprs,
21+
resolve_columns, resolve_positions_to_exprs,
2222
};
2323
use datafusion_common::{DataFusionError, Result};
2424
use datafusion_expr::expr_rewriter::{
@@ -330,7 +330,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
330330
&[&[plan.schema()]],
331331
&plan.using_columns()?,
332332
)?;
333-
let expr = Alias(Box::new(col), normalize_ident(alias));
333+
let expr = Alias(Box::new(col), self.normalizer.normalize(alias));
334334
Ok(vec![expr])
335335
}
336336
SelectItem::Wildcard(options) => {

datafusion/sql/src/statement.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -924,7 +924,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
924924
.iter()
925925
.map(|c| {
926926
Ok(table_schema
927-
.field_with_unqualified_name(&normalize_ident(c.clone()))?
927+
.field_with_unqualified_name(
928+
&self.normalizer.normalize(c.clone()),
929+
)?
928930
.clone())
929931
})
930932
.collect::<Result<Vec<DFField>>>()?;

datafusion/sql/tests/integration_test.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,18 @@ fn parse_ident_normalization() {
8989
"Err(Plan(\"No table named: PERSON found\"))",
9090
false,
9191
),
92+
(
93+
"SELECT Id FROM UPPERCASE_test",
94+
"Ok(Projection: UPPERCASE_test.Id\
95+
\n TableScan: UPPERCASE_test)",
96+
false,
97+
),
98+
(
99+
"SELECT \"Id\", lower FROM \"UPPERCASE_test\"",
100+
"Ok(Projection: UPPERCASE_test.Id, UPPERCASE_test.lower\
101+
\n TableScan: UPPERCASE_test)",
102+
true,
103+
),
92104
];
93105

94106
for (sql, expected, enable_ident_normalization) in test_data {
@@ -2635,6 +2647,10 @@ impl ContextProvider for MockContextProvider {
26352647
Field::new("c12", DataType::Float64, false),
26362648
Field::new("c13", DataType::Utf8, false),
26372649
])),
2650+
"UPPERCASE_test" => Ok(Schema::new(vec![
2651+
Field::new("Id", DataType::UInt32, false),
2652+
Field::new("lower", DataType::UInt32, false),
2653+
])),
26382654
_ => Err(DataFusionError::Plan(format!(
26392655
"No table named: {} found",
26402656
name.table()

0 commit comments

Comments
 (0)