From ffaa67904ed0ca454267ccc5832582bcb669a5c0 Mon Sep 17 00:00:00 2001 From: Dejan Simic <10134699+simicd@users.noreply.github.com> Date: Wed, 17 Jan 2024 07:15:25 +0100 Subject: [PATCH] test: Port tests in `references.rs` to sqllogictest (#8877) * Migrate references unit tests to sqllogictest * Remove unused import --- datafusion/core/tests/sql/mod.rs | 56 ------- datafusion/core/tests/sql/references.rs | 140 ------------------ .../sqllogictest/test_files/references.slt | 134 +++++++++++++++++ 3 files changed, 134 insertions(+), 196 deletions(-) delete mode 100644 datafusion/core/tests/sql/references.rs create mode 100644 datafusion/sqllogictest/test_files/references.slt diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index 140eeb91d1a9..981bdf34f539 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -73,66 +73,10 @@ pub mod explain_analyze; pub mod expr; pub mod joins; pub mod partitioned_csv; -pub mod references; pub mod repartition; pub mod select; mod sql_api; -fn create_join_context( - column_left: &str, - column_right: &str, - repartition_joins: bool, -) -> Result { - let ctx = SessionContext::new_with_config( - SessionConfig::new() - .with_repartition_joins(repartition_joins) - .with_target_partitions(2) - .with_batch_size(4096), - ); - - let t1_schema = Arc::new(Schema::new(vec![ - Field::new(column_left, DataType::UInt32, true), - Field::new("t1_name", DataType::Utf8, true), - Field::new("t1_int", DataType::UInt32, true), - ])); - let t1_data = RecordBatch::try_new( - t1_schema, - vec![ - Arc::new(UInt32Array::from(vec![11, 22, 33, 44])), - Arc::new(StringArray::from(vec![ - Some("a"), - Some("b"), - Some("c"), - Some("d"), - ])), - Arc::new(UInt32Array::from(vec![1, 2, 3, 4])), - ], - )?; - ctx.register_batch("t1", t1_data)?; - - let t2_schema = Arc::new(Schema::new(vec![ - Field::new(column_right, DataType::UInt32, true), - Field::new("t2_name", DataType::Utf8, true), - Field::new("t2_int", DataType::UInt32, true), - ])); - let t2_data = RecordBatch::try_new( - t2_schema, - vec![ - Arc::new(UInt32Array::from(vec![11, 22, 44, 55])), - Arc::new(StringArray::from(vec![ - Some("z"), - Some("y"), - Some("x"), - Some("w"), - ])), - Arc::new(UInt32Array::from(vec![3, 1, 3, 3])), - ], - )?; - ctx.register_batch("t2", t2_data)?; - - Ok(ctx) -} - fn create_left_semi_anti_join_context_with_null_ids( column_left: &str, column_right: &str, diff --git a/datafusion/core/tests/sql/references.rs b/datafusion/core/tests/sql/references.rs deleted file mode 100644 index f465e8a2dacc..000000000000 --- a/datafusion/core/tests/sql/references.rs +++ /dev/null @@ -1,140 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use super::*; - -#[tokio::test] -async fn qualified_table_references() -> Result<()> { - let ctx = SessionContext::new(); - register_aggregate_csv(&ctx).await?; - - for table_ref in &[ - "aggregate_test_100", - "public.aggregate_test_100", - "datafusion.public.aggregate_test_100", - ] { - let sql = format!("SELECT COUNT(*) FROM {table_ref}"); - let actual = execute_to_batches(&ctx, &sql).await; - let expected = [ - "+----------+", - "| COUNT(*) |", - "+----------+", - "| 100 |", - "+----------+", - ]; - assert_batches_eq!(expected, &actual); - } - Ok(()) -} - -#[tokio::test] -async fn qualified_table_references_and_fields() -> Result<()> { - let ctx = SessionContext::new(); - - let c1: StringArray = vec!["foofoo", "foobar", "foobaz"] - .into_iter() - .map(Some) - .collect(); - let c2: Int64Array = vec![1, 2, 3].into_iter().map(Some).collect(); - let c3: Int64Array = vec![10, 20, 30].into_iter().map(Some).collect(); - - let batch = RecordBatch::try_from_iter(vec![ - ("f.c1", Arc::new(c1) as ArrayRef), - // evil -- use the same name as the table - ("test.c2", Arc::new(c2) as ArrayRef), - // more evil still - ("....", Arc::new(c3) as ArrayRef), - ])?; - - ctx.register_batch("test", batch)?; - - // referring to the unquoted column is an error - let sql = r#"SELECT f1.c1 from test"#; - let error = ctx.sql(sql).await.unwrap_err(); - assert_contains!( - error.to_string(), - r#"No field named f1.c1. Valid fields are test."f.c1", test."test.c2""# - ); - - // however, enclosing it in double quotes is ok - let sql = r#"SELECT "f.c1" from test"#; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+--------+", - "| f.c1 |", - "+--------+", - "| foofoo |", - "| foobar |", - "| foobaz |", - "+--------+", - ]; - assert_batches_eq!(expected, &actual); - // Works fully qualified too - let sql = r#"SELECT test."f.c1" from test"#; - let actual = execute_to_batches(&ctx, sql).await; - assert_batches_eq!(expected, &actual); - - // check that duplicated table name and column name are ok - let sql = r#"SELECT "test.c2" as expr1, test."test.c2" as expr2 from test"#; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+-------+-------+", - "| expr1 | expr2 |", - "+-------+-------+", - "| 1 | 1 |", - "| 2 | 2 |", - "| 3 | 3 |", - "+-------+-------+", - ]; - assert_batches_eq!(expected, &actual); - - // check that '....' is also an ok column name (in the sense that - // datafusion should run the query, not that someone should write - // this - let sql = r#"SELECT "....", "...." as c3 from test order by "....""#; - let actual = execute_to_batches(&ctx, sql).await; - let expected = [ - "+------+----+", - "| .... | c3 |", - "+------+----+", - "| 10 | 10 |", - "| 20 | 20 |", - "| 30 | 30 |", - "+------+----+", - ]; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn test_partial_qualified_name() -> Result<()> { - let ctx = create_join_context("t1_id", "t2_id", true)?; - let sql = "SELECT t1.t1_id, t1_name FROM public.t1"; - let expected = [ - "+-------+---------+", - "| t1_id | t1_name |", - "+-------+---------+", - "| 11 | a |", - "| 22 | b |", - "| 33 | c |", - "| 44 | d |", - "+-------+---------+", - ]; - let actual = execute_to_batches(&ctx, sql).await; - assert_batches_eq!(expected, &actual); - Ok(()) -} diff --git a/datafusion/sqllogictest/test_files/references.slt b/datafusion/sqllogictest/test_files/references.slt new file mode 100644 index 000000000000..c09addb3eec2 --- /dev/null +++ b/datafusion/sqllogictest/test_files/references.slt @@ -0,0 +1,134 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +########## +## References Tests +########## + + +# Qualified table references +# Query tables with catalog prefix +statement ok +CREATE EXTERNAL TABLE aggregate_test_100 ( + c1 VARCHAR NOT NULL, + c2 TINYINT NOT NULL, + c3 SMALLINT NOT NULL, + c4 SMALLINT, + c5 INT, + c6 BIGINT NOT NULL, + c7 SMALLINT NOT NULL, + c8 INT NOT NULL, + c9 BIGINT UNSIGNED NOT NULL, + c10 VARCHAR NOT NULL, + c11 FLOAT NOT NULL, + c12 DOUBLE NOT NULL, + c13 VARCHAR NOT NULL +) +STORED AS CSV +WITH HEADER ROW +LOCATION '../../testing/data/csv/aggregate_test_100.csv' + +query I +SELECT COUNT(*) FROM aggregate_test_100; +---- +100 + +query I +SELECT COUNT(*) FROM public.aggregate_test_100; +---- +100 + +query I +SELECT COUNT(*) FROM datafusion.public.aggregate_test_100; +---- +100 + + +# Qualified table references and fields +# Query fields with prefixes +statement ok +CREATE TABLE test("f.c1" TEXT, "test.c2" INT, "...." INT) AS VALUES +('foofoo', 1, 10), +('foobar', 2, 20), +('foobaz', 3, 30); + +query error DataFusion error: Schema error: No field named f1\.c1\. Valid fields are test\."f\.c1", test\."test\.c2", test\."\.\.\.\."\. +SELECT f1.c1 FROM test; + +query T +SELECT "f.c1" FROM test +---- +foofoo +foobar +foobaz + +query T +SELECT test."f.c1" FROM test +---- +foofoo +foobar +foobaz + +query II +SELECT "test.c2" AS expr1, test."test.c2" AS expr2 FROM test +---- +1 1 +2 2 +3 3 + +query II +SELECT "....", "...." AS c3 FROM test ORDER BY "...." +---- +10 10 +20 20 +30 30 + +query TT +EXPLAIN (SELECT "....", "...." AS c3 FROM test ORDER BY "...."); +---- +logical_plan +Sort: test..... ASC NULLS LAST +--Projection: test....., test..... AS c3 +----TableScan: test projection=[....] +physical_plan +SortExec: expr=[....@0 ASC NULLS LAST] +--ProjectionExec: expr=[....@0 as ...., ....@0 as c3] +----MemoryExec: partitions=1, partition_sizes=[1] + + +# Partial qualified name +statement ok +CREATE TABLE t1(t1_id INT, t1_name TEXT, t1_int INT) AS VALUES +(11, 'a', 1), +(22, 'b', 2), +(33, 'c', 3), +(44, 'd', 3); + +statement ok +CREATE TABLE t2(t2_id INT, t2_name TEXT, t2_int INT) AS VALUES +(11, 'z', 3), +(22, 'y', 1), +(44, 'x', 3), +(55, 'w', 3); + +query IT +SELECT t1.t1_id, t1_name FROM public.t1; +---- +11 a +22 b +33 c +44 d