From 6271f48e4d8d1bf2b43ef1da81f6d9f681e38d63 Mon Sep 17 00:00:00 2001 From: Yijun Zhao Date: Thu, 28 Sep 2023 11:05:46 +0800 Subject: [PATCH] Add test for list_nested_decimal (#1572) --- parquet_integration/write_parquet.py | 12 +++++ tests/it/io/parquet/mod.rs | 73 ++++++++++++++++++++++++++++ tests/it/io/parquet/read.rs | 5 ++ 3 files changed, 90 insertions(+) diff --git a/parquet_integration/write_parquet.py b/parquet_integration/write_parquet.py index a7f7560fc5..072b59c775 100644 --- a/parquet_integration/write_parquet.py +++ b/parquet_integration/write_parquet.py @@ -179,6 +179,16 @@ def case_nested() -> Tuple[dict, pa.Schema, str]: ] decimal_nullable = [[Decimal(n) if n is not None else None for n in sublist] if sublist is not None else None for sublist in items_nullable] + decimal_nested = [ + [[Decimal(0), Decimal(1)]], + None, + [[Decimal(2), None], [Decimal(3)]], + [[Decimal(4), Decimal(5)], [Decimal(6)]], + [], + [[Decimal(7)], None, [Decimal(9)]], + [[], [None], None], + [[Decimal(10)]], + ] list_struct_nullable = [ [{"a": "a"}, {"a": "b"}], @@ -227,6 +237,7 @@ def case_nested() -> Tuple[dict, pa.Schema, str]: pa.field("list_decimal", pa.list_(pa.decimal128(9, 0))), pa.field("list_decimal256", pa.list_(pa.decimal256(9, 0))), pa.field("list_nested_i64", pa.list_(pa.list_(pa.int64()))), + pa.field("list_nested_decimal", pa.list_(pa.list_(pa.decimal128(9, 0)))), pa.field("list_nested_inner_required_i64", pa.list_(pa.list_(pa.int64()))), pa.field( "list_nested_inner_required_required_i64", pa.list_(pa.list_(pa.int64())) @@ -258,6 +269,7 @@ def case_nested() -> Tuple[dict, pa.Schema, str]: "list_decimal": decimal_nullable, "list_decimal256": decimal_nullable, "list_nested_i64": items_nested, + "list_nested_decimal": decimal_nested, "list_nested_inner_required_i64": items_required_nested, "list_nested_inner_required_required_i64": items_required_nested_2, "list_struct_nullable": list_struct_nullable, diff --git a/tests/it/io/parquet/mod.rs b/tests/it/io/parquet/mod.rs index 4539d21a33..94d6cdf77e 100644 --- a/tests/it/io/parquet/mod.rs +++ b/tests/it/io/parquet/mod.rs @@ -256,6 +256,7 @@ pub fn pyarrow_nested_nullable(column: &str) -> Box { Box::new(array) } "list_nested_i64" + | "list_nested_decimal" | "list_nested_inner_required_i64" | "list_nested_inner_required_required_i64" => Box::new(NullArray::new(DataType::Null, 1)), "struct_list_nullable" => pyarrow_nested_nullable("list_utf8"), @@ -389,6 +390,48 @@ pub fn pyarrow_nested_nullable(column: &str) -> Box { let array: ListArray = a.into(); Box::new(array) } + "list_nested_decimal" => { + // [ + // [[Decimal(0), Decimal(1)]], + // None, + // [[Decimal(2), None], [Decimal(3)]], + // [[Decimal(4), Decimal(5)], [Decimal(6)]], + // [], + // [[Decimal(7)], None, [Decimal(9)]], + // [[], [None], None], + // [[Decimal(10)]], + // ] + + let data = [ + Some(vec![Some(vec![Some(0), Some(1)])]), + None, + Some(vec![Some(vec![Some(2), None]), Some(vec![Some(3)])]), + Some(vec![Some(vec![Some(4), Some(5)]), Some(vec![Some(6)])]), + Some(vec![]), + Some(vec![Some(vec![Some(7)]), None, Some(vec![Some(9)])]), + Some(vec![Some(vec![]), Some(vec![None]), None]), + Some(vec![Some(vec![Some(10)])]), + ]; + + let inner_array = MutablePrimitiveArray::::from(DataType::Decimal(9, 0)); + let middle_array = MutableListArray::>::new_from( + inner_array.clone(), + ListArray::::default_datatype(inner_array.data_type().clone()), + 0, + ); + let mut outer_array = MutableListArray::< + i32, + MutableListArray>, + >::new_from( + middle_array.clone(), + ListArray::::default_datatype(middle_array.data_type().clone()), + 0, + ); + + outer_array.try_extend(data).unwrap(); + let array: ListArray = outer_array.into(); + Box::new(array) + } "list_nested_inner_required_i64" => { let data = [ Some(vec![Some(vec![Some(0), Some(1)])]), @@ -948,6 +991,36 @@ pub fn pyarrow_nested_nullable_statistics(column: &str) -> Statistics { ) .boxed(), }, + "list_nested_decimal" => Statistics { + distinct_count: new_list( + new_list(UInt64Array::from([None]).boxed(), true).boxed(), + true, + ) + .boxed(), + null_count: new_list( + new_list(Box::new(UInt64Array::from_slice([7])), true).boxed(), + true, + ) + .boxed(), + min_value: new_list( + new_list( + Box::new(Int128Array::from_slice([0]).to(DataType::Decimal(9, 0))), + true, + ) + .boxed(), + true, + ) + .boxed(), + max_value: new_list( + new_list( + Box::new(Int128Array::from_slice([10]).to(DataType::Decimal(9, 0))), + true, + ) + .boxed(), + true, + ) + .boxed(), + }, "list_nested_inner_required_required_i64" => Statistics { distinct_count: UInt64Array::from([None]).boxed(), null_count: UInt64Array::from([Some(0)]).boxed(), diff --git a/tests/it/io/parquet/read.rs b/tests/it/io/parquet/read.rs index 8f45eb874d..7689f1532f 100644 --- a/tests/it/io/parquet/read.rs +++ b/tests/it/io/parquet/read.rs @@ -339,6 +339,11 @@ fn v2_nested_nested() -> Result<()> { test_pyarrow_integration("list_nested_i64", 2, "nested", false, false, None) } +#[test] +fn v2_nested_nested_decimal() -> Result<()> { + test_pyarrow_integration("list_nested_decimal", 2, "nested", false, false, None) +} + #[test] fn v2_nested_nested_required() -> Result<()> { test_pyarrow_integration(