Skip to content

test: Port tests in partitioned_csv.rs to sqllogictest #8919

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion datafusion/core/tests/sql/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ pub mod create_drop;
pub mod explain_analyze;
pub mod expr;
pub mod joins;
pub mod partitioned_csv;
pub mod repartition;
pub mod select;
mod sql_api;
Expand Down
77 changes: 0 additions & 77 deletions datafusion/core/tests/sql/partitioned_csv.rs

This file was deleted.

6 changes: 3 additions & 3 deletions datafusion/core/tests/sql/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,7 @@ async fn sort_on_window_null_string() -> Result<()> {
async fn test_prepare_statement() -> Result<()> {
let tmp_dir = TempDir::new()?;
let partition_count = 4;
let ctx = partitioned_csv::create_ctx(&tmp_dir, partition_count).await?;
let ctx = create_ctx_with_partition(&tmp_dir, partition_count).await?;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same code existed in partitioned_csv.rs and mod.rs - dropped the former and replaced with function calls to mod.rs


// sql to statement then to prepare logical plan with parameters
// c1 defined as UINT32, c2 defined as UInt64 but the params are Int32 and Float64
Expand Down Expand Up @@ -529,7 +529,7 @@ async fn test_prepare_statement() -> Result<()> {
async fn test_named_query_parameters() -> Result<()> {
let tmp_dir = TempDir::new()?;
let partition_count = 4;
let ctx = partitioned_csv::create_ctx(&tmp_dir, partition_count).await?;
let ctx = create_ctx_with_partition(&tmp_dir, partition_count).await?;

// sql to statement then to logical plan with parameters
// c1 defined as UINT32, c2 defined as UInt64
Expand Down Expand Up @@ -576,7 +576,7 @@ async fn test_named_query_parameters() -> Result<()> {
async fn parallel_query_with_filter() -> Result<()> {
let tmp_dir = TempDir::new()?;
let partition_count = 4;
let ctx = partitioned_csv::create_ctx(&tmp_dir, partition_count).await?;
let ctx = create_ctx_with_partition(&tmp_dir, partition_count).await?;

let dataframe = ctx
.sql("SELECT c1, c2 FROM test WHERE c1 > 0 AND c1 < 3")
Expand Down
73 changes: 73 additions & 0 deletions datafusion/sqllogictest/test_files/csv_files.slt
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,76 @@ id6 value"6
id7 value"7
id8 value"8
id9 value"9


# Read partitioned csv
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

partitioned_csv.rs did not have any unit tests so I created a new one to test reading from partitioned csv files.

statement ok
CREATE TABLE src_table_1 (
int_col INT,
string_col TEXT,
bigint_col BIGINT,
partition_col INT
) AS VALUES
(1, 'aaa', 100, 1),
(2, 'bbb', 200, 1),
(3, 'ccc', 300, 1),
(4, 'ddd', 400, 1);

statement ok
CREATE TABLE src_table_2 (
int_col INT,
string_col TEXT,
bigint_col BIGINT,
partition_col INT
) AS VALUES
(5, 'eee', 500, 2),
(6, 'fff', 600, 2),
(7, 'ggg', 700, 2),
(8, 'hhh', 800, 2);

query ITII
COPY src_table_1 TO 'test_files/scratch/csv_files/csv_partitions/1.csv'
(FORMAT CSV, SINGLE_FILE_OUTPUT true);
----
4


query ITII
COPY src_table_2 TO 'test_files/scratch/csv_files/csv_partitions/2.csv'
(FORMAT CSV, SINGLE_FILE_OUTPUT true);
----
4

statement ok
CREATE EXTERNAL TABLE partitioned_table (
int_col INT,
string_col TEXT,
bigint_col BIGINT,
partition_col INT
)
STORED AS CSV
WITH HEADER ROW
LOCATION 'test_files/scratch/csv_files/csv_partitions';

query ITII
SELECT * FROM partitioned_table ORDER BY int_col;
----
1 aaa 100 1
2 bbb 200 1
3 ccc 300 1
4 ddd 400 1
5 eee 500 2
6 fff 600 2
7 ggg 700 2
8 hhh 800 2

query TT
EXPLAIN SELECT * FROM partitioned_table ORDER BY int_col;
----
logical_plan
Sort: partitioned_table.int_col ASC NULLS LAST
--TableScan: partitioned_table projection=[int_col, string_col, bigint_col, partition_col]
physical_plan
SortPreservingMergeExec: [int_col@0 ASC NULLS LAST]
--SortExec: expr=[int_col@0 ASC NULLS LAST]
----CsvExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/csv_files/csv_partitions/1.csv], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/csv_files/csv_partitions/2.csv]]}, projection=[int_col, string_col, bigint_col, partition_col], has_header=true