-
Notifications
You must be signed in to change notification settings - Fork 2k
Attach diagnostic information for duplicate table name error #20720
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
e6716d7
fbdbc61
10f6946
1786814
3087656
67a1e17
db79b6d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,7 +15,7 @@ | |
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| use std::collections::HashSet; | ||
| use std::collections::{HashMap, HashSet}; | ||
| use std::ops::ControlFlow; | ||
| use std::sync::Arc; | ||
|
|
||
|
|
@@ -29,7 +29,9 @@ use crate::utils::{ | |
|
|
||
| use datafusion_common::error::DataFusionErrorBuilder; | ||
| use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; | ||
| use datafusion_common::{Column, DFSchema, Result, not_impl_err, plan_err}; | ||
| use datafusion_common::{ | ||
| Column, DFSchema, Diagnostic, Result, Span, not_impl_err, plan_err, | ||
| }; | ||
| use datafusion_common::{RecursionUnnestOption, UnnestOptions}; | ||
| use datafusion_expr::expr::{Alias, PlannedReplaceSelectItem, WildcardOptions}; | ||
| use datafusion_expr::expr_rewriter::{ | ||
|
|
@@ -50,7 +52,9 @@ use sqlparser::ast::{ | |
| SelectItemQualifiedWildcardKind, WildcardAdditionalOptions, WindowType, | ||
| visit_expressions_mut, | ||
| }; | ||
| use sqlparser::ast::{NamedWindowDefinition, Select, SelectItem, TableWithJoins}; | ||
| use sqlparser::ast::{ | ||
| NamedWindowDefinition, Select, SelectItem, Spanned, TableFactor, TableWithJoins, | ||
| }; | ||
|
|
||
| /// Result of the `aggregate` function, containing the aggregate plan and | ||
| /// rewritten expressions that reference the aggregate output columns. | ||
|
|
@@ -690,21 +694,75 @@ impl<S: ContextProvider> SqlToRel<'_, S> { | |
| self.plan_table_with_joins(input, planner_context) | ||
| } | ||
| _ => { | ||
| let extract_table_name = | ||
| |t: &TableWithJoins| -> Option<(String, Option<Span>)> { | ||
| match &t.relation { | ||
| TableFactor::Table { alias: Some(a), .. } | ||
| | TableFactor::Derived { alias: Some(a), .. } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This Once the JOIN coverage is addressed, would it be worth pulling this into a small helper near relation planning? It might make the naming rules easier to follow and extend as more table factor variants are added. |
||
| | TableFactor::Function { alias: Some(a), .. } | ||
| | TableFactor::UNNEST { alias: Some(a), .. } | ||
| | TableFactor::NestedJoin { alias: Some(a), .. } => { | ||
| let span = Span::try_from_sqlparser_span(a.name.span); | ||
| let name = | ||
| self.ident_normalizer.normalize(a.name.clone()); | ||
| Some((name, span)) | ||
| } | ||
| TableFactor::Table { | ||
| name, alias: None, .. | ||
| } => { | ||
| let span = | ||
| Span::try_from_sqlparser_span(t.relation.span()); | ||
| let table_name = name | ||
| .0 | ||
| .iter() | ||
| .filter_map(|p| p.as_ident()) | ||
| .map(|id| self.ident_normalizer.normalize(id.clone())) | ||
| .next_back()?; | ||
| Some((table_name, span)) | ||
| } | ||
| _ => None, | ||
| } | ||
| }; | ||
|
|
||
| let mut alias_spans: HashMap<String, Option<Span>> = HashMap::new(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this currently extracts only the final identifier for unaliased table factors. That means something like However, DataFusion's scan qualifier uses the full normalized Would it make sense to reuse |
||
|
|
||
| let mut from = from.into_iter(); | ||
| let first = from.next().unwrap(); | ||
|
|
||
| if let Some((name, span)) = extract_table_name(&first) { | ||
| alias_spans.entry(name).or_insert(span); | ||
| } | ||
|
|
||
| let mut left = LogicalPlanBuilder::from( | ||
| self.plan_table_with_joins(first, planner_context)?, | ||
| ); | ||
|
|
||
| let mut left = LogicalPlanBuilder::from({ | ||
| let input = from.next().unwrap(); | ||
| self.plan_table_with_joins(input, planner_context)? | ||
| }); | ||
| let old_outer_from_schema = { | ||
| let left_schema = Some(Arc::clone(left.schema())); | ||
| planner_context.set_outer_from_schema(left_schema) | ||
| }; | ||
| for input in from { | ||
| // Join `input` with the current result (`left`). | ||
| let current_name = extract_table_name(&input); | ||
|
|
||
| if let Some((ref name, current_span)) = current_name { | ||
| if let Some(prior_span) = alias_spans.get(name) { | ||
| let mut diagnostic = Diagnostic::new_error( | ||
| "duplicate table alias in FROM clause", | ||
| current_span, | ||
| ); | ||
| if let Some(span) = *prior_span { | ||
| diagnostic = diagnostic | ||
| .with_note("first defined here", Some(span)); | ||
| } | ||
| return plan_err!("duplicate table alias in FROM clause") | ||
| .map_err(|e| e.with_diagnostic(diagnostic)); | ||
| } | ||
| alias_spans.insert(name.clone(), current_span); | ||
| } | ||
|
|
||
| let right = self.plan_table_with_joins(input, planner_context)?; | ||
|
|
||
| left = left.cross_join(right)?; | ||
| // Update the outer FROM schema. | ||
| let left_schema = Some(Arc::clone(left.schema())); | ||
| planner_context.set_outer_from_schema(left_schema); | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like the duplicate alias diagnostic is only applied in the multi-entry comma
FROMpath. A singleTableWithJoinsstill goes straight toplan_table_with_joins, so explicit joins skip this check.For example,
(SELECT 1 AS a) AS t JOIN (SELECT 2 AS b) AS t ON truewould not be caught here. Depending on the columns, this could either fall back to a schema error or even plan successfully with duplicatetaliases.Could we move the alias and span tracking into the relation or join planning path so both comma joins and explicit JOINs go through the same validation and produce consistent diagnostics?