Skip to content

test: Port tests in select.rs to sqllogictest - Part II #8953

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions datafusion/core/tests/sql/explain_analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
use super::*;

use datafusion::config::ConfigOptions;
use datafusion::physical_plan::collect;
use datafusion::physical_plan::display::DisplayableExecutionPlan;
use datafusion::physical_plan::metrics::Timestamp;

Expand Down
17 changes: 0 additions & 17 deletions datafusion/core/tests/sql/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ use arrow::{
util::display::array_value_to_string,
};

use datafusion::datasource::TableProvider;
use datafusion::error::Result;
use datafusion::logical_expr::{Aggregate, LogicalPlan, TableScan};
use datafusion::physical_plan::metrics::MetricValue;
Expand All @@ -31,7 +30,6 @@ use datafusion::physical_plan::ExecutionPlanVisitor;
use datafusion::prelude::*;
use datafusion::test_util;
use datafusion::{assert_batches_eq, assert_batches_sorted_eq};
use datafusion::{datasource::MemTable, physical_plan::collect};
use datafusion::{execution::context::SessionContext, physical_plan::displayable};
use datafusion_common::{assert_contains, assert_not_contains};
use object_store::path::Path;
Expand Down Expand Up @@ -322,21 +320,6 @@ async fn register_alltypes_parquet(ctx: &SessionContext) {
.unwrap();
}

/// Return a new table provider that has a single Int32 column with
/// values between `seq_start` and `seq_end`
pub fn table_with_sequence(
seq_start: i32,
seq_end: i32,
) -> Result<Arc<dyn TableProvider>> {
let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)]));
let arr = Arc::new(Int32Array::from((seq_start..=seq_end).collect::<Vec<_>>()));
let partitions = vec![vec![RecordBatch::try_new(
schema.clone(),
vec![arr as ArrayRef],
)?]];
Ok(Arc::new(MemTable::try_new(schema, partitions)?))
}

pub struct ExplainNormalizer {
replacements: Vec<(String, String)>,
}
Expand Down
90 changes: 0 additions & 90 deletions datafusion/core/tests/sql/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -571,93 +571,3 @@ async fn test_named_query_parameters() -> Result<()> {
assert_batches_sorted_eq!(expected, &results);
Ok(())
}

#[tokio::test]
async fn parallel_query_with_filter() -> Result<()> {
let tmp_dir = TempDir::new()?;
let partition_count = 4;
let ctx = create_ctx_with_partition(&tmp_dir, partition_count).await?;

let dataframe = ctx
.sql("SELECT c1, c2 FROM test WHERE c1 > 0 AND c1 < 3")
.await?;
let results = dataframe.collect().await.unwrap();
let expected = vec![
"+----+----+",
"| c1 | c2 |",
"+----+----+",
"| 1 | 1 |",
"| 1 | 10 |",
"| 1 | 2 |",
"| 1 | 3 |",
"| 1 | 4 |",
"| 1 | 5 |",
"| 1 | 6 |",
"| 1 | 7 |",
"| 1 | 8 |",
"| 1 | 9 |",
"| 2 | 1 |",
"| 2 | 10 |",
"| 2 | 2 |",
"| 2 | 3 |",
"| 2 | 4 |",
"| 2 | 5 |",
"| 2 | 6 |",
"| 2 | 7 |",
"| 2 | 8 |",
"| 2 | 9 |",
"+----+----+",
];
assert_batches_sorted_eq!(expected, &results);

Ok(())
}

#[tokio::test]
async fn boolean_literal() -> Result<()> {
let results =
execute_with_partition("SELECT c1, c3 FROM test WHERE c1 > 2 AND c3 = true", 4)
.await?;

let expected = [
"+----+------+",
"| c1 | c3 |",
"+----+------+",
"| 3 | true |",
"| 3 | true |",
"| 3 | true |",
"| 3 | true |",
"| 3 | true |",
"+----+------+",
];
assert_batches_sorted_eq!(expected, &results);

Ok(())
}

#[tokio::test]
async fn unprojected_filter() {
let config = SessionConfig::new();
let ctx = SessionContext::new_with_config(config);
let df = ctx.read_table(table_with_sequence(1, 3).unwrap()).unwrap();

let df = df
.filter(col("i").gt(lit(2)))
.unwrap()
.select(vec![col("i") + col("i")])
.unwrap();

let plan = df.clone().into_optimized_plan().unwrap();
println!("{}", plan.display_indent());

let results = df.collect().await.unwrap();

let expected = [
"+-----------------------+",
"| ?table?.i + ?table?.i |",
"+-----------------------+",
"| 6 |",
"+-----------------------+",
];
assert_batches_sorted_eq!(expected, &results);
}
151 changes: 145 additions & 6 deletions datafusion/sqllogictest/test_files/select.slt
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ VALUES (1,2,3,4,5,6,7,8,9,10,11,12,13,NULL,'F',3.5)

# Test non-literal expressions in VALUES
query II
VALUES (1, CASE WHEN RANDOM() > 0.5 THEN 1 ELSE 1 END),
VALUES (1, CASE WHEN RANDOM() > 0.5 THEN 1 ELSE 1 END),
(2, CASE WHEN RANDOM() > 0.5 THEN 2 ELSE 2 END);
----
1 1
Expand Down Expand Up @@ -669,9 +669,104 @@ c 3 1 b 0 29
c 3 1 a 0 -85
c 3 1 b 4 -82

# TODO: test_prepare_statement
# TODO: Test prepare statement
# Dependency on https://github.com/apache/arrow-datafusion/issues/4539#issuecomment-1755430857

# TODO: parallel_query_with_filter
# TODO: Test named query parameters
# Dependency on https://github.com/apache/arrow-datafusion/issues/4539#issuecomment-1755430857


######
# Parallel query with filter
######

# Set up csv files
statement ok
CREATE TABLE src_table_base (
c2 INT,
c3 BOOLEAN
) AS VALUES
(1, FALSE),
(2, TRUE),
(3, FALSE),
(4, TRUE),
(5, FALSE),
(6, TRUE),
(7, FALSE),
(8, TRUE),
(9, FALSE),
(10, TRUE);

query IIB
COPY (
SELECT 0 as c1, c2, c3 FROM src_table_base
) TO 'test_files/scratch/select/csv_partitions/partition-1.csv'
(FORMAT CSV, SINGLE_FILE_OUTPUT true);
----
10

query IIB
COPY (
SELECT 1 as c1, c2, c3 FROM src_table_base
) TO 'test_files/scratch/select/csv_partitions/partition-2.csv'
(FORMAT CSV, SINGLE_FILE_OUTPUT true);
----
10

query IIB
COPY (
SELECT 2 as c1, c2, c3 FROM src_table_base
) TO 'test_files/scratch/select/csv_partitions/partition-3.csv'
(FORMAT CSV, SINGLE_FILE_OUTPUT true);
----
10

query IIB
COPY (
SELECT 3 as c1, c2, c3 FROM src_table_base
) TO 'test_files/scratch/select/csv_partitions/partition-4.csv'
(FORMAT CSV, SINGLE_FILE_OUTPUT true);
----
10

# Set up table
statement ok
CREATE EXTERNAL TABLE test (
c1 INT,
c2 INT,
c3 BOOLEAN,
)
STORED AS CSV
WITH HEADER ROW
LOCATION 'test_files/scratch/select/csv_partitions'

query II
SELECT c1, c2 FROM test WHERE c1 > 0 AND c1 < 3 ORDER BY c1, c2;
----
1 1
1 2
1 3
1 4
1 5
1 6
1 7
1 8
1 9
1 10
2 1
2 2
2 3
2 4
2 5
2 6
2 7
2 8
2 9
2 10

# Drop table
statement ok
DROP TABLE test;


# query with filter string type coercion
Expand Down Expand Up @@ -705,10 +800,54 @@ CREATE TABLE empty_table;
statement ok
SELECT * FROM empty_table

######
# Boolean literal
######

# TODO: boolean_literal
statement ok
CREATE EXTERNAL TABLE test_base (
c1 INT,
c2 INT,
c3 BOOLEAN,
)
STORED AS CSV
WITH HEADER ROW
LOCATION 'test_files/scratch/select/csv_partitions'

# TODO: unprojected_filter
statement ok
CREATE TABLE test AS
SELECT
arrow_cast(c1, 'UInt32') as c1,
arrow_cast(c2, 'UInt64') as c2,
arrow_cast(c3, 'Boolean') as c3
FROM test_base;

query IB
SELECT c1, c3 FROM test WHERE c1 > 2 AND c3 = true
----
3 true
3 true
3 true
3 true
3 true

# Drop table
statement ok
DROP TABLE test;


######
# Unprojected filter
######

statement ok
CREATE TABLE test(i INT) AS
VALUES (1), (2), (3);

query I
SELECT i + i FROM test WHERE i > 2;
----
6


# case sensitive in default dialect
Expand Down Expand Up @@ -1121,7 +1260,7 @@ query II
SELECT
CASE WHEN B.x > 0 THEN A.x / B.x ELSE 0 END AS value1,
CASE WHEN B.x > 0 AND B.y > 0 THEN A.x / B.x ELSE 0 END AS value3
FROM t AS A, (SELECT * FROM t WHERE x = 0) AS B;
FROM t AS A, (SELECT * FROM t WHERE x = 0) AS B;
----
0 0
0 0
Expand Down