diff --git a/rust/tests/data/delta-2.2.0-partitioned-types/_delta_log/.00000000000000000000.json.crc b/rust/tests/data/delta-2.2.0-partitioned-types/_delta_log/.00000000000000000000.json.crc new file mode 100644 index 0000000000..5042c0fbc2 Binary files /dev/null and b/rust/tests/data/delta-2.2.0-partitioned-types/_delta_log/.00000000000000000000.json.crc differ diff --git a/rust/tests/data/delta-2.2.0-partitioned-types/_delta_log/00000000000000000000.json b/rust/tests/data/delta-2.2.0-partitioned-types/_delta_log/00000000000000000000.json new file mode 100644 index 0000000000..2db663806a --- /dev/null +++ b/rust/tests/data/delta-2.2.0-partitioned-types/_delta_log/00000000000000000000.json @@ -0,0 +1,6 @@ +{"commitInfo":{"timestamp":1670892998177,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[\"c1\",\"c2\"]"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"3","numOutputRows":"3","numOutputBytes":"1356"},"engineInfo":"Apache-Spark/3.3.1 Delta-Lake/2.2.0","txnId":"046a258f-45e3-4657-b0bf-abfb0f76681c"}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"aff5cb91-8cd9-4195-aef9-446908507302","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c3\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["c1","c2"],"configuration":{},"createdTime":1670892997849}} +{"add":{"path":"c1=4/c2=c/part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet","partitionValues":{"c1":"4","c2":"c"},"size":452,"modificationTime":1670892998135,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"c3\":5},\"maxValues\":{\"c3\":5},\"nullCount\":{\"c3\":0}}"}} +{"add":{"path":"c1=5/c2=b/part-00007-4e73fa3b-2c88-424a-8051-f8b54328ffdb.c000.snappy.parquet","partitionValues":{"c1":"5","c2":"b"},"size":452,"modificationTime":1670892998135,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"c3\":6},\"maxValues\":{\"c3\":6},\"nullCount\":{\"c3\":0}}"}} +{"add":{"path":"c1=6/c2=a/part-00011-10619b10-b691-4fd0-acc4-2a9608499d7c.c000.snappy.parquet","partitionValues":{"c1":"6","c2":"a"},"size":452,"modificationTime":1670892998135,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"c3\":4},\"maxValues\":{\"c3\":4},\"nullCount\":{\"c3\":0}}"}} diff --git a/rust/tests/data/delta-2.2.0-partitioned-types/c1=4/c2=c/.part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet.crc b/rust/tests/data/delta-2.2.0-partitioned-types/c1=4/c2=c/.part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet.crc new file mode 100644 index 0000000000..4df00298f1 Binary files /dev/null and b/rust/tests/data/delta-2.2.0-partitioned-types/c1=4/c2=c/.part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet.crc differ diff --git a/rust/tests/data/delta-2.2.0-partitioned-types/c1=4/c2=c/part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet b/rust/tests/data/delta-2.2.0-partitioned-types/c1=4/c2=c/part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet new file mode 100644 index 0000000000..3f09f1d945 Binary files /dev/null and b/rust/tests/data/delta-2.2.0-partitioned-types/c1=4/c2=c/part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet differ diff --git a/rust/tests/data/delta-2.2.0-partitioned-types/c1=5/c2=b/.part-00007-4e73fa3b-2c88-424a-8051-f8b54328ffdb.c000.snappy.parquet.crc b/rust/tests/data/delta-2.2.0-partitioned-types/c1=5/c2=b/.part-00007-4e73fa3b-2c88-424a-8051-f8b54328ffdb.c000.snappy.parquet.crc new file mode 100644 index 0000000000..f6fffe7bcd Binary files /dev/null and b/rust/tests/data/delta-2.2.0-partitioned-types/c1=5/c2=b/.part-00007-4e73fa3b-2c88-424a-8051-f8b54328ffdb.c000.snappy.parquet.crc differ diff --git a/rust/tests/data/delta-2.2.0-partitioned-types/c1=5/c2=b/part-00007-4e73fa3b-2c88-424a-8051-f8b54328ffdb.c000.snappy.parquet b/rust/tests/data/delta-2.2.0-partitioned-types/c1=5/c2=b/part-00007-4e73fa3b-2c88-424a-8051-f8b54328ffdb.c000.snappy.parquet new file mode 100644 index 0000000000..10ec40964b Binary files /dev/null and b/rust/tests/data/delta-2.2.0-partitioned-types/c1=5/c2=b/part-00007-4e73fa3b-2c88-424a-8051-f8b54328ffdb.c000.snappy.parquet differ diff --git a/rust/tests/data/delta-2.2.0-partitioned-types/c1=6/c2=a/.part-00011-10619b10-b691-4fd0-acc4-2a9608499d7c.c000.snappy.parquet.crc b/rust/tests/data/delta-2.2.0-partitioned-types/c1=6/c2=a/.part-00011-10619b10-b691-4fd0-acc4-2a9608499d7c.c000.snappy.parquet.crc new file mode 100644 index 0000000000..c31be60c20 Binary files /dev/null and b/rust/tests/data/delta-2.2.0-partitioned-types/c1=6/c2=a/.part-00011-10619b10-b691-4fd0-acc4-2a9608499d7c.c000.snappy.parquet.crc differ diff --git a/rust/tests/data/delta-2.2.0-partitioned-types/c1=6/c2=a/part-00011-10619b10-b691-4fd0-acc4-2a9608499d7c.c000.snappy.parquet b/rust/tests/data/delta-2.2.0-partitioned-types/c1=6/c2=a/part-00011-10619b10-b691-4fd0-acc4-2a9608499d7c.c000.snappy.parquet new file mode 100644 index 0000000000..a0e02daa50 Binary files /dev/null and b/rust/tests/data/delta-2.2.0-partitioned-types/c1=6/c2=a/part-00011-10619b10-b691-4fd0-acc4-2a9608499d7c.c000.snappy.parquet differ diff --git a/rust/tests/datafusion_test.rs b/rust/tests/datafusion_test.rs index 8684fe29fe..27e3642054 100644 --- a/rust/tests/datafusion_test.rs +++ b/rust/tests/datafusion_test.rs @@ -7,6 +7,7 @@ use std::sync::Arc; use arrow::array::*; use arrow::datatypes::{DataType as ArrowDataType, Field as ArrowField, Schema as ArrowSchema}; use arrow::record_batch::RecordBatch; +use datafusion::assert_batches_sorted_eq; use datafusion::datasource::datasource::TableProviderFactory; use datafusion::datasource::TableProvider; use datafusion::execution::context::{SessionContext, TaskContext}; @@ -280,3 +281,50 @@ async fn test_files_scanned() -> Result<()> { Ok(()) } + +#[tokio::test] +async fn test_datafusion_partitioned_types() -> Result<()> { + let ctx = SessionContext::new(); + let table = deltalake::open_table("./tests/data/delta-2.2.0-partitioned-types") + .await + .unwrap(); + ctx.register_table("demo", Arc::new(table))?; + + let batches = ctx.sql("SELECT * FROM demo").await?.collect().await?; + + let expected = vec![ + "+----+----+----+", + "| c3 | c1 | c2 |", + "+----+----+----+", + "| 5 | 4 | c |", + "| 6 | 5 | b |", + "| 4 | 6 | a |", + "+----+----+----+", + ]; + + assert_batches_sorted_eq!(&expected, &batches); + + let expected_schema = ArrowSchema::new(vec![ + ArrowField::new("c3", ArrowDataType::Int32, true), + ArrowField::new( + "c1", + ArrowDataType::Dictionary( + Box::new(ArrowDataType::UInt16), + Box::new(ArrowDataType::Int32), + ), + false, + ), + ArrowField::new( + "c2", + ArrowDataType::Dictionary( + Box::new(ArrowDataType::UInt16), + Box::new(ArrowDataType::Utf8), + ), + false, + ), + ]); + + assert_eq!(Arc::new(expected_schema), batches[0].schema()); + + Ok(()) +}