@@ -22,8 +22,11 @@ use arrow::array::{ArrayRef, BinaryViewArray, BinaryViewBuilder, NullBufferBuild
2222use arrow_schema:: { ArrowError , DataType , Field , Fields } ;
2323use parquet_variant:: {
2424 BuilderSpecificState , ListBuilder , MetadataBuilder , ObjectBuilder , Variant , VariantBuilderExt ,
25+ EMPTY_VARIANT_METADATA ,
26+ } ;
27+ use parquet_variant:: {
28+ ParentState , ReadOnlyMetadataBuilder , ValueBuilder , WritableMetadataBuilder ,
2529} ;
26- use parquet_variant:: { ParentState , ValueBuilder , WritableMetadataBuilder } ;
2730use std:: sync:: Arc ;
2831
2932/// A builder for [`VariantArray`]
@@ -205,6 +208,134 @@ impl VariantBuilderExt for VariantArrayBuilder {
205208 }
206209}
207210
211+ /// A builder for creating only the value column of a [`VariantArray`]
212+ ///
213+ /// This builder is used when you have existing metadata and only need to build
214+ /// the value column. It's useful for scenarios like variant unshredding, data
215+ /// transformation, or filtering where you want to reuse existing metadata.
216+ ///
217+ /// The builder produces a [`BinaryViewArray`] that can be combined with existing
218+ /// metadata to create a complete [`VariantArray`].
219+ ///
220+ /// # Example:
221+ /// ```
222+ /// # use arrow::array::Array;
223+ /// # use parquet_variant::{Variant, EMPTY_VARIANT_METADATA};
224+ /// # use parquet_variant_compute::VariantValueArrayBuilder;
225+ /// // Create a variant value builder for 10 rows
226+ /// let mut builder = VariantValueArrayBuilder::new(10);
227+ ///
228+ /// // Append some values with their corresponding metadata
229+ /// // In practice, you should use the existing metadata you have access to.
230+ /// builder.append_value(Variant::from(42), EMPTY_VARIANT_METADATA).unwrap();
231+ /// builder.append_null();
232+ /// builder.append_value(Variant::from("hello"), EMPTY_VARIANT_METADATA).unwrap();
233+ ///
234+ /// // Build the final value array
235+ /// let value_array = builder.build();
236+ /// assert_eq!(value_array.len(), 3);
237+ /// ```
238+ #[ derive( Debug ) ]
239+ #[ allow( unused) ]
240+ pub struct VariantValueArrayBuilder {
241+ value_builder : ValueBuilder ,
242+ value_offsets : Vec < usize > ,
243+ nulls : NullBufferBuilder ,
244+ }
245+
246+ #[ allow( unused) ]
247+ impl VariantValueArrayBuilder {
248+ /// Create a new `VariantValueArrayBuilder` with the specified row capacity
249+ pub fn new ( row_capacity : usize ) -> Self {
250+ Self {
251+ value_builder : ValueBuilder :: new ( ) ,
252+ value_offsets : Vec :: with_capacity ( row_capacity) ,
253+ nulls : NullBufferBuilder :: new ( row_capacity) ,
254+ }
255+ }
256+
257+ /// Build the final value array
258+ ///
259+ /// Returns a [`BinaryViewArray`] containing the serialized variant values.
260+ /// This can be combined with existing metadata to create a complete [`VariantArray`].
261+ pub fn build ( mut self ) -> Result < BinaryViewArray , ArrowError > {
262+ let value_buffer = self . value_builder . into_inner ( ) ;
263+ let mut array = binary_view_array_from_buffers ( value_buffer, self . value_offsets ) ;
264+ if let Some ( nulls) = self . nulls . finish ( ) {
265+ let ( views, buffers, _) = array. into_parts ( ) ;
266+ array = BinaryViewArray :: try_new ( views, buffers, Some ( nulls) ) ?;
267+ }
268+ Ok ( array)
269+ }
270+
271+ /// Append a null row to the builder
272+ ///
273+ /// WARNING: It is only safe to call this method when building the `value` field of a shredded
274+ /// variant column (which is nullable). The `value` field of a binary (unshredded) variant
275+ /// column is non-nullable, and callers should instead invoke [`Self::append_value`] with
276+ /// `Variant::Null`, passing the appropriate metadata value.
277+ pub fn append_null ( & mut self ) {
278+ self . value_offsets . push ( self . value_builder . offset ( ) ) ;
279+ self . nulls . append_null ( ) ;
280+ }
281+
282+ /// Append a variant value with its corresponding metadata
283+ ///
284+ /// # Arguments
285+ /// * `value` - The variant value to append
286+ /// * `metadata` - The metadata dictionary for this variant (used for field name resolution)
287+ ///
288+ /// # Returns
289+ /// * `Ok(())` if the value was successfully appended
290+ /// * `Err(ArrowError)` if the variant contains field names not found in the metadata
291+ ///
292+ /// # Example
293+ /// ```
294+ /// # use parquet_variant::{Variant, EMPTY_VARIANT_METADATA};
295+ /// # use parquet_variant_compute::VariantValueArrayBuilder;
296+ /// let mut builder = VariantValueArrayBuilder::new(10);
297+ /// builder.append_value(Variant::from(42), EMPTY_VARIANT_METADATA).unwrap();
298+ /// ```
299+ pub fn append_value ( & mut self , value : Variant < ' _ , ' _ > ) {
300+ let metadata = value. metadata ( ) . cloned ( ) . unwrap_or ( EMPTY_VARIANT_METADATA ) ;
301+ let mut metadata_builder = ReadOnlyMetadataBuilder :: new ( metadata) ;
302+ ValueBuilder :: append_variant_bytes ( self . parent_state ( & mut metadata_builder) , value) ;
303+ }
304+
305+ /// Creates a builder-specific parent state
306+ pub fn parent_state < ' a > (
307+ & ' a mut self ,
308+ metadata_builder : & ' a mut dyn MetadataBuilder ,
309+ ) -> ParentState < ' a , ValueArrayBuilderState < ' a > > {
310+ let state = ValueArrayBuilderState {
311+ value_offsets : & mut self . value_offsets ,
312+ nulls : & mut self . nulls ,
313+ } ;
314+
315+ ParentState :: new ( & mut self . value_builder , metadata_builder, state)
316+ }
317+ }
318+
319+ /// Builder-specific state for array building that manages array-level offsets and nulls. See
320+ /// [`VariantBuilderExt`] for details.
321+ #[ derive( Debug ) ]
322+ pub struct ValueArrayBuilderState < ' a > {
323+ value_offsets : & ' a mut Vec < usize > ,
324+ nulls : & ' a mut NullBufferBuilder ,
325+ }
326+
327+ // All changes are pending until finalized
328+ impl BuilderSpecificState for ValueArrayBuilderState < ' _ > {
329+ fn finish (
330+ & mut self ,
331+ _metadata_builder : & mut dyn MetadataBuilder ,
332+ value_builder : & mut ValueBuilder ,
333+ ) {
334+ self . value_offsets . push ( value_builder. offset ( ) ) ;
335+ self . nulls . append_non_null ( ) ;
336+ }
337+ }
338+
208339fn binary_view_array_from_buffers ( buffer : Vec < u8 > , offsets : Vec < usize > ) -> BinaryViewArray {
209340 // All offsets are less than or equal to the buffer length, so we can safely cast all offsets
210341 // inside the loop below, as long as the buffer length fits in u32.
@@ -228,6 +359,7 @@ fn binary_view_array_from_buffers(buffer: Vec<u8>, offsets: Vec<usize>) -> Binar
228359mod test {
229360 use super :: * ;
230361 use arrow:: array:: Array ;
362+ use parquet_variant:: { Variant , VariantBuilder , VariantMetadata } ;
231363
232364 /// Test that both the metadata and value buffers are non nullable
233365 #[ test]
@@ -288,4 +420,46 @@ mod test {
288420 let list = variant. as_list ( ) . expect ( "variant to be a list" ) ;
289421 assert_eq ! ( list. len( ) , 2 ) ;
290422 }
423+
424+ #[ test]
425+ fn test_variant_value_array_builder_basic ( ) {
426+ let mut builder = VariantValueArrayBuilder :: new ( 10 ) ;
427+
428+ // Append some values
429+ builder. append_value ( Variant :: from ( 42i32 ) ) ;
430+ builder. append_null ( ) ;
431+ builder. append_value ( Variant :: from ( "hello" ) ) ;
432+
433+ let value_array = builder. build ( ) . unwrap ( ) ;
434+ assert_eq ! ( value_array. len( ) , 3 ) ;
435+ }
436+
437+ #[ test]
438+ fn test_variant_value_array_builder_with_objects ( ) {
439+ // Create metadata with field names
440+ let mut metadata_builder = WritableMetadataBuilder :: default ( ) ;
441+ metadata_builder. upsert_field_name ( "name" ) ;
442+ metadata_builder. upsert_field_name ( "age" ) ;
443+ metadata_builder. finish ( ) ;
444+ let metadata_bytes = metadata_builder. into_inner ( ) ;
445+ let metadata = VariantMetadata :: try_new ( & metadata_bytes) . unwrap ( ) ;
446+
447+ // Create a variant with an object using the same metadata
448+ let mut variant_builder = VariantBuilder :: new ( ) . with_metadata ( metadata) ;
449+ variant_builder
450+ . new_object ( )
451+ . with_field ( "name" , "Alice" )
452+ . with_field ( "age" , 30i32 )
453+ . finish ( ) ;
454+ let ( _, value_bytes) = variant_builder. finish ( ) ;
455+ let variant = Variant :: try_new ( & metadata_bytes, & value_bytes) . unwrap ( ) ;
456+
457+ // Now use the value array builder
458+ let mut builder = VariantValueArrayBuilder :: new ( 10 ) ;
459+ builder. append_value ( variant) ;
460+ builder. append_null ( ) ;
461+
462+ let value_array = builder. build ( ) . unwrap ( ) ;
463+ assert_eq ! ( value_array. len( ) , 2 ) ;
464+ }
291465}
0 commit comments