Skip to content

Commit 422da15

Browse files
authored
[Variant] Add a VariantBuilderExt impl for VariantValueArrayBuilder (#8501)
# Which issue does this PR close? - Relates to #8336 # Rationale for this change `VariantValueArrayBuilder` was already a bit inconvenient to work with, because it provides no implementation of `VariantBuilderExt`. Pathfinding for variant unshredding exposed this as an actual problem and not merely an inconvenience. # What changes are included in this PR? Add a thin wrapper for `VariantValueArrayBuilder` for which we can `impl VariantBuilderExt` -- thus allowing the value builder to participate fully in the variant builder ecosystem. # Are these changes tested? Yes, existing unit tests updated to use the new capability. # Are there any user-facing changes? New pub method `VariantValueArrayBuilder::as_builder_ext`
1 parent 027e9e5 commit 422da15

File tree

2 files changed

+74
-16
lines changed

2 files changed

+74
-16
lines changed

parquet-variant-compute/src/shred_variant.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ use arrow::buffer::NullBuffer;
2727
use arrow::compute::CastOptions;
2828
use arrow::datatypes::{DataType, Fields};
2929
use arrow::error::{ArrowError, Result};
30-
use parquet_variant::{ObjectBuilder, ReadOnlyMetadataBuilder, Variant};
30+
use parquet_variant::{Variant, VariantBuilderExt};
3131

3232
use indexmap::IndexMap;
3333
use std::sync::Arc;
@@ -267,9 +267,8 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
267267
};
268268

269269
// Route the object's fields by name as either shredded or unshredded
270-
let mut metadata_builder = ReadOnlyMetadataBuilder::new(value.metadata().clone());
271-
let state = self.value_builder.parent_state(&mut metadata_builder);
272-
let mut object_builder = ObjectBuilder::new(state, false);
270+
let mut builder = self.value_builder.builder_ext(value.metadata().clone());
271+
let mut object_builder = builder.try_new_object()?;
273272
let mut seen = std::collections::HashSet::new();
274273
let mut partially_shredded = false;
275274
for (field_name, value) in obj.iter() {
@@ -329,7 +328,7 @@ mod tests {
329328
use crate::VariantArrayBuilder;
330329
use arrow::array::{Array, Float64Array, Int64Array};
331330
use arrow::datatypes::{DataType, Field, Fields};
332-
use parquet_variant::{Variant, VariantBuilder, VariantBuilderExt as _};
331+
use parquet_variant::{ObjectBuilder, ReadOnlyMetadataBuilder, Variant, VariantBuilder};
333332
use std::sync::Arc;
334333

335334
fn create_test_variant_array(values: Vec<Option<Variant<'_, '_>>>) -> VariantArray {

parquet-variant-compute/src/variant_array_builder.rs

Lines changed: 70 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ use arrow::array::{ArrayRef, BinaryViewArray, BinaryViewBuilder, NullBufferBuild
2222
use arrow_schema::{ArrowError, DataType, Field, Fields};
2323
use parquet_variant::{
2424
BuilderSpecificState, ListBuilder, MetadataBuilder, ObjectBuilder, Variant, VariantBuilderExt,
25+
VariantMetadata,
2526
};
2627
use parquet_variant::{
2728
ParentState, ReadOnlyMetadataBuilder, ValueBuilder, WritableMetadataBuilder,
@@ -294,8 +295,8 @@ impl VariantValueArrayBuilder {
294295
/// builder.append_value(Variant::from(42));
295296
/// ```
296297
pub fn append_value(&mut self, value: Variant<'_, '_>) {
297-
let mut metadata_builder = ReadOnlyMetadataBuilder::new(value.metadata().clone());
298-
ValueBuilder::append_variant_bytes(self.parent_state(&mut metadata_builder), value);
298+
self.builder_ext(value.metadata().clone())
299+
.append_value(value);
299300
}
300301

301302
/// Creates a builder-specific parent state.
@@ -333,6 +334,18 @@ impl VariantValueArrayBuilder {
333334

334335
ParentState::new(&mut self.value_builder, metadata_builder, state)
335336
}
337+
338+
/// Creates a thin [`VariantBuilderExt`] wrapper for this builder, which hides the `metadata`
339+
/// parameter (similar to the way [`parquet_variant::ObjectFieldBuilder`] hides field names).
340+
pub fn builder_ext<'a>(
341+
&'a mut self,
342+
metadata: VariantMetadata<'a>,
343+
) -> VariantValueArrayBuilderExt<'a> {
344+
VariantValueArrayBuilderExt {
345+
metadata_builder: ReadOnlyMetadataBuilder::new(metadata),
346+
value_builder: self,
347+
}
348+
}
336349
}
337350

338351
/// Builder-specific state for array building that manages array-level offsets and nulls. See
@@ -355,6 +368,52 @@ impl BuilderSpecificState for ValueArrayBuilderState<'_> {
355368
}
356369
}
357370

371+
/// A thin [`VariantBuilderExt`] wrapper that hides the short-lived (per-row)
372+
/// [`ReadOnlyMetadataBuilder`] instances that [`VariantValueArrayBuilder`] requires.
373+
pub struct VariantValueArrayBuilderExt<'a> {
374+
metadata_builder: ReadOnlyMetadataBuilder<'a>,
375+
value_builder: &'a mut VariantValueArrayBuilder,
376+
}
377+
378+
impl<'a> VariantValueArrayBuilderExt<'a> {
379+
/// Creates a new instance from a metadata builder and a reference to a variant value builder.
380+
pub fn new(
381+
metadata_builder: ReadOnlyMetadataBuilder<'a>,
382+
value_builder: &'a mut VariantValueArrayBuilder,
383+
) -> Self {
384+
Self {
385+
metadata_builder,
386+
value_builder,
387+
}
388+
}
389+
}
390+
391+
impl<'a> VariantBuilderExt for VariantValueArrayBuilderExt<'a> {
392+
type State<'b>
393+
= ValueArrayBuilderState<'b>
394+
where
395+
Self: 'b;
396+
397+
fn append_null(&mut self) {
398+
self.value_builder.append_null()
399+
}
400+
401+
fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
402+
let state = self.value_builder.parent_state(&mut self.metadata_builder);
403+
ValueBuilder::append_variant_bytes(state, value.into());
404+
}
405+
406+
fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError> {
407+
let state = self.value_builder.parent_state(&mut self.metadata_builder);
408+
Ok(ListBuilder::new(state, false))
409+
}
410+
411+
fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError> {
412+
let state = self.value_builder.parent_state(&mut self.metadata_builder);
413+
Ok(ObjectBuilder::new(state, false))
414+
}
415+
}
416+
358417
fn binary_view_array_from_buffers(buffer: Vec<u8>, offsets: Vec<usize>) -> BinaryViewArray {
359418
// All offsets are less than or equal to the buffer length, so we can safely cast all offsets
360419
// inside the loop below, as long as the buffer length fits in u32.
@@ -482,32 +541,32 @@ mod test {
482541
//
483542
// NOTE: Because we will reuse the metadata column, we cannot reorder rows. We can only
484543
// filter or manipulate values within a row.
485-
let mut builder = VariantValueArrayBuilder::new(3);
544+
let mut value_builder = VariantValueArrayBuilder::new(3);
486545

487546
// straight copy
488-
builder.append_value(array.value(0));
547+
value_builder.append_value(array.value(0));
489548

490549
// filtering fields takes more work because we need to manually create an object builder
491550
let value = array.value(1);
492-
let mut metadata_builder = ReadOnlyMetadataBuilder::new(value.metadata().clone());
493-
let state = builder.parent_state(&mut metadata_builder);
494-
ObjectBuilder::new(state, false)
551+
let mut builder = value_builder.builder_ext(value.metadata().clone());
552+
builder
553+
.new_object()
495554
.with_field("name", value.get_object_field("name").unwrap())
496555
.with_field("age", value.get_object_field("age").unwrap())
497556
.finish();
498557

499558
// same bytes, but now nested and duplicated inside a list
500559
let value = array.value(2);
501-
let mut metadata_builder = ReadOnlyMetadataBuilder::new(value.metadata().clone());
502-
let state = builder.parent_state(&mut metadata_builder);
503-
ListBuilder::new(state, false)
560+
let mut builder = value_builder.builder_ext(value.metadata().clone());
561+
builder
562+
.new_list()
504563
.with_value(value.clone())
505564
.with_value(value.clone())
506565
.finish();
507566

508567
let array2 = VariantArray::from_parts(
509568
array.metadata_field().clone(),
510-
Some(builder.build().unwrap()),
569+
Some(value_builder.build().unwrap()),
511570
None,
512571
None,
513572
);

0 commit comments

Comments
 (0)