Skip to content

Commit 97f99da

Browse files
committed
[Variant] Implement new VariantValueArrayBuilder
1 parent 428aae1 commit 97f99da

File tree

3 files changed

+177
-3
lines changed

3 files changed

+177
-3
lines changed

parquet-variant-compute/src/variant_array_builder.rs

Lines changed: 175 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,11 @@ use arrow::array::{ArrayRef, BinaryViewArray, BinaryViewBuilder, NullBufferBuild
2222
use arrow_schema::{ArrowError, DataType, Field, Fields};
2323
use parquet_variant::{
2424
BuilderSpecificState, ListBuilder, MetadataBuilder, ObjectBuilder, Variant, VariantBuilderExt,
25+
EMPTY_VARIANT_METADATA,
26+
};
27+
use parquet_variant::{
28+
ParentState, ReadOnlyMetadataBuilder, ValueBuilder, WritableMetadataBuilder,
2529
};
26-
use parquet_variant::{ParentState, ValueBuilder, WritableMetadataBuilder};
2730
use std::sync::Arc;
2831

2932
/// A builder for [`VariantArray`]
@@ -205,6 +208,134 @@ impl VariantBuilderExt for VariantArrayBuilder {
205208
}
206209
}
207210

211+
/// A builder for creating only the value column of a [`VariantArray`]
212+
///
213+
/// This builder is used when you have existing metadata and only need to build
214+
/// the value column. It's useful for scenarios like variant unshredding, data
215+
/// transformation, or filtering where you want to reuse existing metadata.
216+
///
217+
/// The builder produces a [`BinaryViewArray`] that can be combined with existing
218+
/// metadata to create a complete [`VariantArray`].
219+
///
220+
/// # Example:
221+
/// ```
222+
/// # use arrow::array::Array;
223+
/// # use parquet_variant::{Variant, EMPTY_VARIANT_METADATA};
224+
/// # use parquet_variant_compute::VariantValueArrayBuilder;
225+
/// // Create a variant value builder for 10 rows
226+
/// let mut builder = VariantValueArrayBuilder::new(10);
227+
///
228+
/// // Append some values with their corresponding metadata
229+
/// // In practice, you should use the existing metadata you have access to.
230+
/// builder.append_value(Variant::from(42), EMPTY_VARIANT_METADATA).unwrap();
231+
/// builder.append_null();
232+
/// builder.append_value(Variant::from("hello"), EMPTY_VARIANT_METADATA).unwrap();
233+
///
234+
/// // Build the final value array
235+
/// let value_array = builder.build();
236+
/// assert_eq!(value_array.len(), 3);
237+
/// ```
238+
#[derive(Debug)]
239+
#[allow(unused)]
240+
pub struct VariantValueArrayBuilder {
241+
value_builder: ValueBuilder,
242+
value_offsets: Vec<usize>,
243+
nulls: NullBufferBuilder,
244+
}
245+
246+
#[allow(unused)]
247+
impl VariantValueArrayBuilder {
248+
/// Create a new `VariantValueArrayBuilder` with the specified row capacity
249+
pub fn new(row_capacity: usize) -> Self {
250+
Self {
251+
value_builder: ValueBuilder::new(),
252+
value_offsets: Vec::with_capacity(row_capacity),
253+
nulls: NullBufferBuilder::new(row_capacity),
254+
}
255+
}
256+
257+
/// Build the final value array
258+
///
259+
/// Returns a [`BinaryViewArray`] containing the serialized variant values.
260+
/// This can be combined with existing metadata to create a complete [`VariantArray`].
261+
pub fn build(mut self) -> Result<BinaryViewArray, ArrowError> {
262+
let value_buffer = self.value_builder.into_inner();
263+
let mut array = binary_view_array_from_buffers(value_buffer, self.value_offsets);
264+
if let Some(nulls) = self.nulls.finish() {
265+
let (views, buffers, _) = array.into_parts();
266+
array = BinaryViewArray::try_new(views, buffers, Some(nulls))?;
267+
}
268+
Ok(array)
269+
}
270+
271+
/// Append a null row to the builder
272+
///
273+
/// WARNING: It is only safe to call this method when building the `value` field of a shredded
274+
/// variant column (which is nullable). The `value` field of a binary (unshredded) variant
275+
/// column is non-nullable, and callers should instead invoke [`Self::append_value`] with
276+
/// `Variant::Null`, passing the appropriate metadata value.
277+
pub fn append_null(&mut self) {
278+
self.value_offsets.push(self.value_builder.offset());
279+
self.nulls.append_null();
280+
}
281+
282+
/// Append a variant value with its corresponding metadata
283+
///
284+
/// # Arguments
285+
/// * `value` - The variant value to append
286+
/// * `metadata` - The metadata dictionary for this variant (used for field name resolution)
287+
///
288+
/// # Returns
289+
/// * `Ok(())` if the value was successfully appended
290+
/// * `Err(ArrowError)` if the variant contains field names not found in the metadata
291+
///
292+
/// # Example
293+
/// ```
294+
/// # use parquet_variant::{Variant, EMPTY_VARIANT_METADATA};
295+
/// # use parquet_variant_compute::VariantValueArrayBuilder;
296+
/// let mut builder = VariantValueArrayBuilder::new(10);
297+
/// builder.append_value(Variant::from(42), EMPTY_VARIANT_METADATA).unwrap();
298+
/// ```
299+
pub fn append_value(&mut self, value: Variant<'_, '_>) {
300+
let metadata = value.metadata().cloned().unwrap_or(EMPTY_VARIANT_METADATA);
301+
let mut metadata_builder = ReadOnlyMetadataBuilder::new(metadata);
302+
ValueBuilder::append_variant_bytes(self.parent_state(&mut metadata_builder), value);
303+
}
304+
305+
/// Creates a builder-specific parent state
306+
pub fn parent_state<'a>(
307+
&'a mut self,
308+
metadata_builder: &'a mut dyn MetadataBuilder,
309+
) -> ParentState<'a, ValueArrayBuilderState<'a>> {
310+
let state = ValueArrayBuilderState {
311+
value_offsets: &mut self.value_offsets,
312+
nulls: &mut self.nulls,
313+
};
314+
315+
ParentState::new(&mut self.value_builder, metadata_builder, state)
316+
}
317+
}
318+
319+
/// Builder-specific state for array building that manages array-level offsets and nulls. See
320+
/// [`VariantBuilderExt`] for details.
321+
#[derive(Debug)]
322+
pub struct ValueArrayBuilderState<'a> {
323+
value_offsets: &'a mut Vec<usize>,
324+
nulls: &'a mut NullBufferBuilder,
325+
}
326+
327+
// All changes are pending until finalized
328+
impl BuilderSpecificState for ValueArrayBuilderState<'_> {
329+
fn finish(
330+
&mut self,
331+
_metadata_builder: &mut dyn MetadataBuilder,
332+
value_builder: &mut ValueBuilder,
333+
) {
334+
self.value_offsets.push(value_builder.offset());
335+
self.nulls.append_non_null();
336+
}
337+
}
338+
208339
fn binary_view_array_from_buffers(buffer: Vec<u8>, offsets: Vec<usize>) -> BinaryViewArray {
209340
// All offsets are less than or equal to the buffer length, so we can safely cast all offsets
210341
// inside the loop below, as long as the buffer length fits in u32.
@@ -228,6 +359,7 @@ fn binary_view_array_from_buffers(buffer: Vec<u8>, offsets: Vec<usize>) -> Binar
228359
mod test {
229360
use super::*;
230361
use arrow::array::Array;
362+
use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
231363

232364
/// Test that both the metadata and value buffers are non nullable
233365
#[test]
@@ -288,4 +420,46 @@ mod test {
288420
let list = variant.as_list().expect("variant to be a list");
289421
assert_eq!(list.len(), 2);
290422
}
423+
424+
#[test]
425+
fn test_variant_value_array_builder_basic() {
426+
let mut builder = VariantValueArrayBuilder::new(10);
427+
428+
// Append some values
429+
builder.append_value(Variant::from(42i32));
430+
builder.append_null();
431+
builder.append_value(Variant::from("hello"));
432+
433+
let value_array = builder.build().unwrap();
434+
assert_eq!(value_array.len(), 3);
435+
}
436+
437+
#[test]
438+
fn test_variant_value_array_builder_with_objects() {
439+
// Create metadata with field names
440+
let mut metadata_builder = WritableMetadataBuilder::default();
441+
metadata_builder.upsert_field_name("name");
442+
metadata_builder.upsert_field_name("age");
443+
metadata_builder.finish();
444+
let metadata_bytes = metadata_builder.into_inner();
445+
let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
446+
447+
// Create a variant with an object using the same metadata
448+
let mut variant_builder = VariantBuilder::new().with_metadata(metadata);
449+
variant_builder
450+
.new_object()
451+
.with_field("name", "Alice")
452+
.with_field("age", 30i32)
453+
.finish();
454+
let (_, value_bytes) = variant_builder.finish();
455+
let variant = Variant::try_new(&metadata_bytes, &value_bytes).unwrap();
456+
457+
// Now use the value array builder
458+
let mut builder = VariantValueArrayBuilder::new(10);
459+
builder.append_value(variant);
460+
builder.append_null();
461+
462+
let value_array = builder.build().unwrap();
463+
assert_eq!(value_array.len(), 2);
464+
}
291465
}

parquet-variant/src/builder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,7 @@ pub struct WritableMetadataBuilder {
562562

563563
impl WritableMetadataBuilder {
564564
/// Upsert field name to dictionary, return its ID
565-
fn upsert_field_name(&mut self, field_name: &str) -> u32 {
565+
pub fn upsert_field_name(&mut self, field_name: &str) -> u32 {
566566
let (id, new_entry) = self.field_names.insert_full(field_name.to_string());
567567

568568
if new_entry {

parquet-variant/src/variant.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1320,7 +1320,7 @@ impl<'m, 'v> Variant<'m, 'v> {
13201320
/// Return the metadata associated with this variant, if any.
13211321
///
13221322
/// Returns `Some(&VariantMetadata)` for object and list variants,
1323-
pub fn metadata(&self) -> Option<&'m VariantMetadata<'_>> {
1323+
pub fn metadata(&self) -> Option<&VariantMetadata<'m>> {
13241324
match self {
13251325
Variant::Object(VariantObject { metadata, .. })
13261326
| Variant::List(VariantList { metadata, .. }) => Some(metadata),

0 commit comments

Comments
 (0)