Skip to content

Commit 5515063

Browse files
committed
Disallow duplicated qualified field names
1 parent 380d843 commit 5515063

File tree

9 files changed

+29
-66
lines changed

9 files changed

+29
-66
lines changed

datafusion-cli/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/common/src/dfschema.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,12 @@ impl DFSchema {
226226

227227
for (field, qualifier) in self.inner.fields().iter().zip(&self.field_qualifiers) {
228228
if let Some(qualifier) = qualifier {
229-
qualified_names.insert((qualifier, field.name()));
229+
if !qualified_names.insert((qualifier, field.name())) {
230+
return _schema_err!(SchemaError::DuplicateQualifiedField {
231+
qualifier: Box::new(qualifier.clone()),
232+
name: field.name().to_string(),
233+
});
234+
}
230235
} else if !unqualified_names.insert(field.name()) {
231236
return _schema_err!(SchemaError::DuplicateUnqualifiedField {
232237
name: field.name().to_string()
@@ -1165,7 +1170,10 @@ mod tests {
11651170
let left = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
11661171
let right = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
11671172
let join = left.join(&right);
1168-
assert!(join.err().is_none());
1173+
assert_eq!(
1174+
join.unwrap_err().strip_backtrace(),
1175+
"Schema error: Schema contains duplicate qualified field name t1.c0",
1176+
);
11691177
Ok(())
11701178
}
11711179

datafusion/core/src/dataframe/mod.rs

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -3337,52 +3337,6 @@ mod tests {
33373337
Ok(())
33383338
}
33393339

3340-
// Table 't1' self join
3341-
// Supplementary test of issue: https://github.com/apache/datafusion/issues/7790
3342-
#[tokio::test]
3343-
async fn with_column_self_join() -> Result<()> {
3344-
let df = test_table().await?.select_columns(&["c1"])?;
3345-
let ctx = SessionContext::new();
3346-
3347-
ctx.register_table("t1", df.into_view())?;
3348-
3349-
let df = ctx
3350-
.table("t1")
3351-
.await?
3352-
.join(
3353-
ctx.table("t1").await?,
3354-
JoinType::Inner,
3355-
&["c1"],
3356-
&["c1"],
3357-
None,
3358-
)?
3359-
.sort(vec![
3360-
// make the test deterministic
3361-
col("t1.c1").sort(true, true),
3362-
])?
3363-
.limit(0, Some(1))?;
3364-
3365-
let df_results = df.clone().collect().await?;
3366-
assert_batches_sorted_eq!(
3367-
[
3368-
"+----+----+",
3369-
"| c1 | c1 |",
3370-
"+----+----+",
3371-
"| a | a |",
3372-
"+----+----+",
3373-
],
3374-
&df_results
3375-
);
3376-
3377-
let actual_err = df.clone().with_column("new_column", lit(true)).unwrap_err();
3378-
let expected_err = "Error during planning: Projections require unique expression names \
3379-
but the expression \"t1.c1\" at position 0 and \"t1.c1\" at position 1 have the same name. \
3380-
Consider aliasing (\"AS\") one of them.";
3381-
assert_eq!(actual_err.strip_backtrace(), expected_err);
3382-
3383-
Ok(())
3384-
}
3385-
33863340
#[tokio::test]
33873341
async fn with_column_renamed() -> Result<()> {
33883342
let df = test_table()

datafusion/expr/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ datafusion-expr-common = { workspace = true }
4848
datafusion-functions-aggregate-common = { workspace = true }
4949
datafusion-functions-window-common = { workspace = true }
5050
datafusion-physical-expr-common = { workspace = true }
51+
indexmap = { workspace = true }
5152
paste = "^1.0"
5253
serde_json = { workspace = true }
5354
sqlparser = { workspace = true }

datafusion/expr/src/logical_plan/plan.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ use datafusion_common::{
5151
DFSchema, DFSchemaRef, DataFusionError, Dependency, FunctionalDependence,
5252
FunctionalDependencies, ParamValues, Result, TableReference, UnnestOptions,
5353
};
54+
use indexmap::IndexSet;
5455

5556
// backwards compatibility
5657
use crate::display::PgJsonVisitor;
@@ -3069,6 +3070,8 @@ fn calc_func_dependencies_for_aggregate(
30693070
let group_by_expr_names = group_expr
30703071
.iter()
30713072
.map(|item| item.schema_name().to_string())
3073+
.collect::<IndexSet<_>>()
3074+
.into_iter()
30723075
.collect::<Vec<_>>();
30733076
let aggregate_func_dependencies = aggregate_functional_dependencies(
30743077
input.schema(),

datafusion/expr/src/utils.rs

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ use datafusion_common::{
3838
DataFusionError, Result, TableReference,
3939
};
4040

41+
use indexmap::IndexSet;
4142
use sqlparser::ast::{ExceptSelectItem, ExcludeSelectItem};
4243

4344
pub use datafusion_functions_aggregate_common::order::AggregateOrderSensitivity;
@@ -59,16 +60,7 @@ pub fn exprlist_to_columns(expr: &[Expr], accum: &mut HashSet<Column>) -> Result
5960
/// Count the number of distinct exprs in a list of group by expressions. If the
6061
/// first element is a `GroupingSet` expression then it must be the only expr.
6162
pub fn grouping_set_expr_count(group_expr: &[Expr]) -> Result<usize> {
62-
if let Some(Expr::GroupingSet(grouping_set)) = group_expr.first() {
63-
if group_expr.len() > 1 {
64-
return plan_err!(
65-
"Invalid group by expressions, GroupingSet must be the only expression"
66-
);
67-
}
68-
Ok(grouping_set.distinct_expr().len())
69-
} else {
70-
Ok(group_expr.len())
71-
}
63+
grouping_set_to_exprlist(group_expr).map(|exprs| exprs.len())
7264
}
7365

7466
/// The [power set] (or powerset) of a set S is the set of all subsets of S, \
@@ -260,7 +252,11 @@ pub fn grouping_set_to_exprlist(group_expr: &[Expr]) -> Result<Vec<&Expr>> {
260252
}
261253
Ok(grouping_set.distinct_expr())
262254
} else {
263-
Ok(group_expr.iter().collect())
255+
Ok(group_expr
256+
.iter()
257+
.collect::<IndexSet<_>>()
258+
.into_iter()
259+
.collect())
264260
}
265261
}
266262

datafusion/optimizer/tests/optimizer_integration.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ fn select_wildcard_with_repeated_column() {
345345
let sql = "SELECT *, col_int32 FROM test";
346346
let err = test_sql(sql).expect_err("query should have failed");
347347
assert_eq!(
348-
"expand_wildcard_rule\ncaused by\nError during planning: Projections require unique expression names but the expression \"test.col_int32\" at position 0 and \"test.col_int32\" at position 7 have the same name. Consider aliasing (\"AS\") one of them.",
348+
"Schema error: Schema contains duplicate qualified field name test.col_int32",
349349
err.strip_backtrace()
350350
);
351351
}
@@ -396,7 +396,7 @@ fn test_sql(sql: &str) -> Result<LogicalPlan> {
396396
.with_udaf(count_udaf())
397397
.with_udaf(avg_udaf());
398398
let sql_to_rel = SqlToRel::new(&context_provider);
399-
let plan = sql_to_rel.sql_statement_to_plan(statement.clone()).unwrap();
399+
let plan = sql_to_rel.sql_statement_to_plan(statement.clone())?;
400400

401401
let config = OptimizerContext::new().with_skip_failing_rules(false);
402402
let analyzer = Analyzer::new();

datafusion/sql/src/relation/join.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
1919
use datafusion_common::{not_impl_err, Column, Result};
2020
use datafusion_expr::{JoinType, LogicalPlan, LogicalPlanBuilder};
2121
use sqlparser::ast::{Join, JoinConstraint, JoinOperator, TableFactor, TableWithJoins};
22-
use std::collections::HashSet;
22+
use std::{collections::HashSet, sync::Arc};
2323

2424
impl<'a, S: ContextProvider> SqlToRel<'a, S> {
2525
pub(crate) fn plan_table_with_joins(
@@ -34,7 +34,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
3434
};
3535
let old_outer_from_schema = planner_context.outer_from_schema();
3636
for join in t.joins {
37-
planner_context.extend_outer_from_schema(left.schema())?;
37+
planner_context.set_outer_from_schema(Some(Arc::clone(left.schema())));
3838
left = self.parse_relation_join(left, join, planner_context)?;
3939
}
4040
planner_context.set_outer_from_schema(old_outer_from_schema);

datafusion/sqllogictest/test_files/join.slt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1215,14 +1215,14 @@ statement ok
12151215
create table t1(v1 int) as values(100);
12161216

12171217
## Query with Ambiguous column reference
1218-
query error DataFusion error: Schema error: Ambiguous reference to unqualified field v1
1218+
query error DataFusion error: Schema error: Schema contains duplicate qualified field name t1\.v1
12191219
select count(*)
12201220
from t1
12211221
right outer join t1
12221222
on t1.v1 > 0;
12231223

1224-
query error DataFusion error: Schema error: Ambiguous reference to unqualified field v1
1224+
query error DataFusion error: Schema error: Schema contains duplicate qualified field name t1\.v1
12251225
select t1.v1 from t1 join t1 using(v1) cross join (select struct('foo' as v1) as t1);
12261226

12271227
statement ok
1228-
drop table t1;
1228+
drop table t1;

0 commit comments

Comments
 (0)