Skip to content

Commit 5959577

Browse files
committed
Merge remote-tracking branch 'oss/main' into variant-value-builder
2 parents 474fa31 + 2ec77b5 commit 5959577

File tree

10 files changed

+553
-1270
lines changed

10 files changed

+553
-1270
lines changed

arrow-cast/src/cast/decimal.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,9 @@ impl DecimalCast for i64 {
8181
}
8282

8383
fn from_f64(n: f64) -> Option<Self> {
84-
n.to_i64()
84+
// Call implementation explicitly otherwise this resolves to `to_i64`
85+
// in arrow-buffer that behaves differently.
86+
num::traits::ToPrimitive::to_i64(&n)
8587
}
8688
}
8789

arrow-cast/src/cast/mod.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3101,6 +3101,35 @@ mod tests {
31013101
);
31023102
}
31033103

3104+
#[test]
3105+
fn test_cast_floating_to_decimals() {
3106+
for output_type in [
3107+
DataType::Decimal32(9, 3),
3108+
DataType::Decimal64(9, 3),
3109+
DataType::Decimal128(9, 3),
3110+
DataType::Decimal256(9, 3),
3111+
] {
3112+
let input_type = DataType::Float64;
3113+
assert!(can_cast_types(&input_type, &output_type));
3114+
3115+
let array = vec![Some(1.1_f64)];
3116+
let array = PrimitiveArray::<Float64Type>::from_iter(array);
3117+
let result = cast_with_options(
3118+
&array,
3119+
&output_type,
3120+
&CastOptions {
3121+
safe: false,
3122+
format_options: FormatOptions::default(),
3123+
},
3124+
);
3125+
assert!(
3126+
result.is_ok(),
3127+
"Failed to cast to {output_type} with: {}",
3128+
result.unwrap_err()
3129+
);
3130+
}
3131+
}
3132+
31043133
#[test]
31053134
fn test_cast_decimal128_to_decimal128_overflow() {
31063135
let input_type = DataType::Decimal128(38, 3);

parquet-testing

Submodule parquet-testing updated 276 files

parquet-variant-compute/src/variant_array.rs

Lines changed: 37 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,19 @@
1717

1818
//! [`VariantArray`] implementation
1919
20+
use crate::type_conversion::primitive_conversion_single_value;
2021
use arrow::array::{Array, ArrayData, ArrayRef, AsArray, BinaryViewArray, StructArray};
2122
use arrow::buffer::NullBuffer;
2223
use arrow::datatypes::{
2324
Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type,
2425
UInt32Type, UInt64Type, UInt8Type,
2526
};
2627
use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields};
28+
use parquet_variant::Uuid;
2729
use parquet_variant::Variant;
2830
use std::any::Any;
2931
use std::sync::Arc;
3032

31-
use crate::type_conversion::primitive_conversion_single_value;
32-
3333
/// An array of Parquet [`Variant`] values
3434
///
3535
/// A [`VariantArray`] wraps an Arrow [`StructArray`] that stores the underlying
@@ -129,7 +129,7 @@ impl VariantArray {
129129
Ok(Self {
130130
inner: inner.clone(),
131131
metadata: metadata.clone(),
132-
shredding_state: ShreddingState::try_new(metadata.clone(), value, typed_value)?,
132+
shredding_state: ShreddingState::try_new(value, typed_value)?,
133133
})
134134
}
135135

@@ -154,8 +154,7 @@ impl VariantArray {
154154
// This would be a lot simpler if ShreddingState were just a pair of Option... we already
155155
// have everything we need.
156156
let inner = builder.build();
157-
let shredding_state =
158-
ShreddingState::try_new(metadata.clone(), value, typed_value).unwrap(); // valid by construction
157+
let shredding_state = ShreddingState::try_new(value, typed_value).unwrap(); // valid by construction
159158
Self {
160159
inner,
161160
metadata,
@@ -222,7 +221,7 @@ impl VariantArray {
222221
typed_value_to_variant(typed_value, index)
223222
}
224223
}
225-
ShreddingState::AllNull { .. } => {
224+
ShreddingState::AllNull => {
226225
// AllNull case: neither value nor typed_value fields exist
227226
// NOTE: This handles the case where neither value nor typed_value fields exist.
228227
// For top-level variants, this returns Variant::Null (JSON null).
@@ -325,14 +324,11 @@ impl ShreddedVariantFieldArray {
325324
.and_then(|col| col.as_binary_view_opt().cloned());
326325
let typed_value = inner_struct.column_by_name("typed_value").cloned();
327326

328-
// Use a dummy metadata for the constructor (ShreddedVariantFieldArray doesn't have metadata)
329-
let dummy_metadata = arrow::array::BinaryViewArray::new_null(inner_struct.len());
330-
331327
// Note this clone is cheap, it just bumps the ref count
332328
let inner = inner_struct.clone();
333329
Ok(Self {
334330
inner: inner.clone(),
335-
shredding_state: ShreddingState::try_new(dummy_metadata, value, typed_value)?,
331+
shredding_state: ShreddingState::try_new(value, typed_value)?,
336332
})
337333
}
338334

@@ -432,16 +428,10 @@ impl Array for ShreddedVariantFieldArray {
432428
#[derive(Debug)]
433429
pub enum ShreddingState {
434430
/// This variant has no typed_value field
435-
Unshredded {
436-
metadata: BinaryViewArray,
437-
value: BinaryViewArray,
438-
},
431+
Unshredded { value: BinaryViewArray },
439432
/// This variant has a typed_value field and no value field
440433
/// meaning it is the shredded type
441-
Typed {
442-
metadata: BinaryViewArray,
443-
typed_value: ArrayRef,
444-
},
434+
Typed { typed_value: ArrayRef },
445435
/// Imperfectly shredded: Shredded values reside in `typed_value` while those that failed to
446436
/// shred reside in `value`. Missing field values are NULL in both columns, while NULL primitive
447437
/// values have NULL `typed_value` and `Variant::Null` in `value`.
@@ -453,7 +443,6 @@ pub enum ShreddingState {
453443
/// the `value` is a variant object containing the subset of fields for which shredding was
454444
/// not even attempted.
455445
PartiallyShredded {
456-
metadata: BinaryViewArray,
457446
value: BinaryViewArray,
458447
typed_value: ArrayRef,
459448
},
@@ -463,38 +452,20 @@ pub enum ShreddingState {
463452
/// Note: By strict spec interpretation, this should only be valid for shredded object fields,
464453
/// not top-level variants. However, we allow it and treat as Variant::Null for pragmatic
465454
/// handling of missing data.
466-
AllNull { metadata: BinaryViewArray },
455+
AllNull,
467456
}
468457

469458
impl ShreddingState {
470459
/// try to create a new `ShreddingState` from the given fields
471460
pub fn try_new(
472-
metadata: BinaryViewArray,
473461
value: Option<BinaryViewArray>,
474462
typed_value: Option<ArrayRef>,
475463
) -> Result<Self, ArrowError> {
476-
match (metadata, value, typed_value) {
477-
(metadata, Some(value), Some(typed_value)) => Ok(Self::PartiallyShredded {
478-
metadata,
479-
value,
480-
typed_value,
481-
}),
482-
(metadata, Some(value), None) => Ok(Self::Unshredded { metadata, value }),
483-
(metadata, None, Some(typed_value)) => Ok(Self::Typed {
484-
metadata,
485-
typed_value,
486-
}),
487-
(metadata, None, None) => Ok(Self::AllNull { metadata }),
488-
}
489-
}
490-
491-
/// Return a reference to the metadata field
492-
pub fn metadata_field(&self) -> &BinaryViewArray {
493-
match self {
494-
ShreddingState::Unshredded { metadata, .. } => metadata,
495-
ShreddingState::Typed { metadata, .. } => metadata,
496-
ShreddingState::PartiallyShredded { metadata, .. } => metadata,
497-
ShreddingState::AllNull { metadata } => metadata,
464+
match (value, typed_value) {
465+
(Some(value), Some(typed_value)) => Ok(Self::PartiallyShredded { value, typed_value }),
466+
(Some(value), None) => Ok(Self::Unshredded { value }),
467+
(None, Some(typed_value)) => Ok(Self::Typed { typed_value }),
468+
(None, None) => Ok(Self::AllNull),
498469
}
499470
}
500471

@@ -504,7 +475,7 @@ impl ShreddingState {
504475
ShreddingState::Unshredded { value, .. } => Some(value),
505476
ShreddingState::Typed { .. } => None,
506477
ShreddingState::PartiallyShredded { value, .. } => Some(value),
507-
ShreddingState::AllNull { .. } => None,
478+
ShreddingState::AllNull => None,
508479
}
509480
}
510481

@@ -514,36 +485,26 @@ impl ShreddingState {
514485
ShreddingState::Unshredded { .. } => None,
515486
ShreddingState::Typed { typed_value, .. } => Some(typed_value),
516487
ShreddingState::PartiallyShredded { typed_value, .. } => Some(typed_value),
517-
ShreddingState::AllNull { .. } => None,
488+
ShreddingState::AllNull => None,
518489
}
519490
}
520491

521492
/// Slice all the underlying arrays
522493
pub fn slice(&self, offset: usize, length: usize) -> Self {
523494
match self {
524-
ShreddingState::Unshredded { metadata, value } => ShreddingState::Unshredded {
525-
metadata: metadata.slice(offset, length),
495+
ShreddingState::Unshredded { value } => ShreddingState::Unshredded {
526496
value: value.slice(offset, length),
527497
},
528-
ShreddingState::Typed {
529-
metadata,
530-
typed_value,
531-
} => ShreddingState::Typed {
532-
metadata: metadata.slice(offset, length),
498+
ShreddingState::Typed { typed_value } => ShreddingState::Typed {
533499
typed_value: typed_value.slice(offset, length),
534500
},
535-
ShreddingState::PartiallyShredded {
536-
metadata,
537-
value,
538-
typed_value,
539-
} => ShreddingState::PartiallyShredded {
540-
metadata: metadata.slice(offset, length),
541-
value: value.slice(offset, length),
542-
typed_value: typed_value.slice(offset, length),
543-
},
544-
ShreddingState::AllNull { metadata } => ShreddingState::AllNull {
545-
metadata: metadata.slice(offset, length),
546-
},
501+
ShreddingState::PartiallyShredded { value, typed_value } => {
502+
ShreddingState::PartiallyShredded {
503+
value: value.slice(offset, length),
504+
typed_value: typed_value.slice(offset, length),
505+
}
506+
}
507+
ShreddingState::AllNull => ShreddingState::AllNull,
547508
}
548509
}
549510
}
@@ -595,8 +556,15 @@ fn typed_value_to_variant(typed_value: &ArrayRef, index: usize) -> Variant<'_, '
595556
let value = boolean_array.value(index);
596557
Variant::from(value)
597558
}
598-
DataType::FixedSizeBinary(_) => {
559+
DataType::FixedSizeBinary(binary_len) => {
599560
let array = typed_value.as_fixed_size_binary();
561+
// Try to treat 16 byte FixedSizeBinary as UUID
562+
let value = array.value(index);
563+
if *binary_len == 16 {
564+
if let Ok(uuid) = Uuid::from_slice(value) {
565+
return Variant::from(uuid);
566+
}
567+
}
600568
let value = array.value(index);
601569
Variant::from(value)
602570
}
@@ -744,7 +712,7 @@ mod test {
744712
// Verify the shredding state is AllNull
745713
assert!(matches!(
746714
variant_array.shredding_state(),
747-
ShreddingState::AllNull { .. }
715+
ShreddingState::AllNull
748716
));
749717

750718
// Verify that value() returns Variant::Null (compensating for spec violation)
@@ -801,17 +769,10 @@ mod test {
801769

802770
#[test]
803771
fn all_null_shredding_state() {
804-
let metadata = BinaryViewArray::from(vec![b"test" as &[u8]]);
805-
let shredding_state = ShreddingState::try_new(metadata.clone(), None, None).unwrap();
772+
let shredding_state = ShreddingState::try_new(None, None).unwrap();
806773

807774
// Verify the shredding state is AllNull
808-
assert!(matches!(shredding_state, ShreddingState::AllNull { .. }));
809-
810-
// Verify metadata is preserved correctly
811-
if let ShreddingState::AllNull { metadata: m } = shredding_state {
812-
assert_eq!(m.len(), metadata.len());
813-
assert_eq!(m.value(0), metadata.value(0));
814-
}
775+
assert!(matches!(shredding_state, ShreddingState::AllNull));
815776
}
816777

817778
#[test]
@@ -827,7 +788,7 @@ mod test {
827788
// Verify the shredding state is AllNull
828789
assert!(matches!(
829790
variant_array.shredding_state(),
830-
ShreddingState::AllNull { .. }
791+
ShreddingState::AllNull
831792
));
832793

833794
// Verify all values are null

parquet-variant-compute/src/variant_get.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,8 @@ fn shredded_get_path(
135135
let shred_basic_variant =
136136
|target: VariantArray, path: VariantPath<'_>, as_field: Option<&Field>| {
137137
let as_type = as_field.map(|f| f.data_type());
138-
let mut builder = make_variant_to_arrow_row_builder(path, as_type, cast_options)?;
138+
let mut builder =
139+
make_variant_to_arrow_row_builder(path, as_type, cast_options, target.len())?;
139140
for i in 0..target.len() {
140141
if target.is_null(i) {
141142
builder.append_null()?;

0 commit comments

Comments
 (0)