|
17 | 17 |
|
18 | 18 | //! Module for unshredding VariantArray by folding typed_value columns back into the value column.
|
19 | 19 |
|
| 20 | +use crate::arrow_to_variant::ListLikeArray; |
20 | 21 | use crate::{BorrowedShreddingState, VariantArray, VariantValueArrayBuilder};
|
21 | 22 | use arrow::array::{
|
22 |
| - Array, AsArray as _, BinaryViewArray, BooleanArray, FixedSizeBinaryArray, PrimitiveArray, |
23 |
| - StringArray, StructArray, |
| 23 | + Array, AsArray as _, BinaryViewArray, BooleanArray, FixedSizeBinaryArray, FixedSizeListArray, |
| 24 | + GenericListArray, GenericListViewArray, PrimitiveArray, StringArray, StructArray, |
24 | 25 | };
|
25 | 26 | use arrow::buffer::NullBuffer;
|
26 | 27 | use arrow::datatypes::{
|
@@ -99,6 +100,11 @@ enum UnshredVariantRowBuilder<'a> {
|
99 | 100 | PrimitiveString(UnshredPrimitiveRowBuilder<'a, StringArray>),
|
100 | 101 | PrimitiveBinaryView(UnshredPrimitiveRowBuilder<'a, BinaryViewArray>),
|
101 | 102 | PrimitiveUuid(UnshredPrimitiveRowBuilder<'a, FixedSizeBinaryArray>),
|
| 103 | + List(ListUnshredVariantBuilder<'a, GenericListArray<i32>>), |
| 104 | + LargeList(ListUnshredVariantBuilder<'a, GenericListArray<i64>>), |
| 105 | + ListView(ListUnshredVariantBuilder<'a, GenericListViewArray<i32>>), |
| 106 | + LargeListView(ListUnshredVariantBuilder<'a, GenericListViewArray<i64>>), |
| 107 | + FixedSizeList(ListUnshredVariantBuilder<'a, FixedSizeListArray>), |
102 | 108 | Struct(StructUnshredVariantBuilder<'a>),
|
103 | 109 | ValueOnly(ValueOnlyUnshredVariantBuilder<'a>),
|
104 | 110 | Null(NullUnshredVariantBuilder<'a>),
|
@@ -132,6 +138,11 @@ impl<'a> UnshredVariantRowBuilder<'a> {
|
132 | 138 | Self::PrimitiveString(b) => b.append_row(builder, metadata, index),
|
133 | 139 | Self::PrimitiveBinaryView(b) => b.append_row(builder, metadata, index),
|
134 | 140 | Self::PrimitiveUuid(b) => b.append_row(builder, metadata, index),
|
| 141 | + Self::List(b) => b.append_row(builder, metadata, index), |
| 142 | + Self::LargeList(b) => b.append_row(builder, metadata, index), |
| 143 | + Self::ListView(b) => b.append_row(builder, metadata, index), |
| 144 | + Self::LargeListView(b) => b.append_row(builder, metadata, index), |
| 145 | + Self::FixedSizeList(b) => b.append_row(builder, metadata, index), |
135 | 146 | Self::Struct(b) => b.append_row(builder, metadata, index),
|
136 | 147 | Self::ValueOnly(b) => b.append_row(builder, metadata, index),
|
137 | 148 | Self::Null(b) => b.append_row(builder, metadata, index),
|
@@ -208,6 +219,25 @@ impl<'a> UnshredVariantRowBuilder<'a> {
|
208 | 219 | value,
|
209 | 220 | typed_value.as_struct(),
|
210 | 221 | )?),
|
| 222 | + DataType::List(_) => Self::List(ListUnshredVariantBuilder::try_new( |
| 223 | + value, |
| 224 | + typed_value.as_list(), |
| 225 | + )?), |
| 226 | + DataType::LargeList(_) => Self::LargeList(ListUnshredVariantBuilder::try_new( |
| 227 | + value, |
| 228 | + typed_value.as_list(), |
| 229 | + )?), |
| 230 | + DataType::ListView(_) => Self::ListView(ListUnshredVariantBuilder::try_new( |
| 231 | + value, |
| 232 | + typed_value.as_list_view(), |
| 233 | + )?), |
| 234 | + DataType::LargeListView(_) => Self::LargeListView(ListUnshredVariantBuilder::try_new( |
| 235 | + value, |
| 236 | + typed_value.as_list_view(), |
| 237 | + )?), |
| 238 | + DataType::FixedSizeList(_, _) => Self::FixedSizeList( |
| 239 | + ListUnshredVariantBuilder::try_new(value, typed_value.as_fixed_size_list())?, |
| 240 | + ), |
211 | 241 | _ => {
|
212 | 242 | return Err(ArrowError::NotYetImplemented(format!(
|
213 | 243 | "Unshredding not yet supported for type: {}",
|
@@ -517,5 +547,61 @@ impl<'a> StructUnshredVariantBuilder<'a> {
|
517 | 547 | }
|
518 | 548 | }
|
519 | 549 |
|
| 550 | +/// Builder for unshredding list/array types with recursive element processing |
| 551 | +struct ListUnshredVariantBuilder<'a, L: ListLikeArray> { |
| 552 | + value: Option<&'a BinaryViewArray>, |
| 553 | + typed_value: &'a L, |
| 554 | + element_unshredder: Box<UnshredVariantRowBuilder<'a>>, |
| 555 | +} |
| 556 | + |
| 557 | +impl<'a, L: ListLikeArray> ListUnshredVariantBuilder<'a, L> { |
| 558 | + fn try_new(value: Option<&'a BinaryViewArray>, typed_value: &'a L) -> Result<Self> { |
| 559 | + // Create a recursive unshredder for the list elements |
| 560 | + // The element type comes from the values array of the list |
| 561 | + let element_values = typed_value.values(); |
| 562 | + |
| 563 | + // For shredded lists, each element would be a ShreddedVariantFieldArray (struct) |
| 564 | + // Extract value/typed_value from the element struct |
| 565 | + let Some(element_values) = element_values.as_struct_opt() else { |
| 566 | + return Err(ArrowError::InvalidArgumentError(format!( |
| 567 | + "Invalid shredded variant array element: expected Struct, got {}", |
| 568 | + element_values.data_type() |
| 569 | + ))); |
| 570 | + }; |
| 571 | + |
| 572 | + // Create recursive unshredder for elements |
| 573 | + // |
| 574 | + // NOTE: A None/None array element is technically invalid, but the shredding spec |
| 575 | + // requires us to emit `Variant::Null` when a required value is missing. |
| 576 | + let element_unshredder = UnshredVariantRowBuilder::try_new_opt(element_values.try_into()?)? |
| 577 | + .unwrap_or_else(|| UnshredVariantRowBuilder::null(None)); |
| 578 | + |
| 579 | + Ok(Self { |
| 580 | + value, |
| 581 | + typed_value, |
| 582 | + element_unshredder: Box::new(element_unshredder), |
| 583 | + }) |
| 584 | + } |
| 585 | + |
| 586 | + fn append_row( |
| 587 | + &mut self, |
| 588 | + builder: &mut impl VariantBuilderExt, |
| 589 | + metadata: &VariantMetadata, |
| 590 | + index: usize, |
| 591 | + ) -> Result<()> { |
| 592 | + handle_unshredded_case!(self, builder, metadata, index, false); |
| 593 | + |
| 594 | + // If we get here, typed_value is valid and value is NULL -- process the list elements |
| 595 | + let mut list_builder = builder.try_new_list()?; |
| 596 | + for element_index in self.typed_value.element_range(index) { |
| 597 | + self.element_unshredder |
| 598 | + .append_row(&mut list_builder, metadata, element_index)?; |
| 599 | + } |
| 600 | + |
| 601 | + list_builder.finish(); |
| 602 | + Ok(()) |
| 603 | + } |
| 604 | +} |
| 605 | + |
520 | 606 | // TODO: This code is covered by tests in `parquet/tests/variant_integration.rs`. Does that suffice?
|
521 | 607 | // Or do we also need targeted stand-alone unit tests for full coverage?
|
0 commit comments