Skip to content

Commit 0734d69

Browse files
Implement DataType::{Binary, LargeBinary, BinaryView} => Variant::Binary
1 parent 3e7c887 commit 0734d69

File tree

1 file changed

+86
-20
lines changed

1 file changed

+86
-20
lines changed

parquet-variant-compute/src/cast_to_variant.rs

Lines changed: 86 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,25 @@
1818
use crate::{VariantArray, VariantArrayBuilder};
1919
use arrow::array::{Array, AsArray};
2020
use arrow::datatypes::{
21-
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
22-
UInt64Type, UInt8Type,
21+
BinaryType, BinaryViewType, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type,
22+
Int8Type, LargeBinaryType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
2323
};
2424
use arrow_schema::{ArrowError, DataType};
2525
use parquet_variant::Variant;
2626

27-
/// Convert the input array of a specific primitive type to a `VariantArray`
28-
/// row by row
29-
macro_rules! primitive_conversion {
30-
($t:ty, $input:expr, $builder:expr) => {{
31-
let array = $input.as_primitive::<$t>();
27+
/// Convert the input array to a `VariantArray` row by row, using `method`
28+
/// to downcast the generic array to a specific array type and `cast_fn`
29+
/// to transform each element to a type compatible with Variant
30+
macro_rules! conversion {
31+
($t:ty, $method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{
32+
let array = $input.$method::<$t>();
3233
for i in 0..array.len() {
3334
if array.is_null(i) {
3435
$builder.append_null();
3536
continue;
3637
}
37-
$builder.append_variant(Variant::from(array.value(i)));
38+
let cast_value = $cast_fn(array.value(i));
39+
$builder.append_variant(Variant::from(cast_value));
3840
}
3941
}};
4042
}
@@ -68,35 +70,44 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
6870
let input_type = input.data_type();
6971
// todo: handle other types like Boolean, Strings, Date, Timestamp, etc.
7072
match input_type {
73+
DataType::Binary => {
74+
conversion!(BinaryType, as_bytes, |v| v, input, builder);
75+
}
76+
DataType::LargeBinary => {
77+
conversion!(LargeBinaryType, as_bytes, |v| v, input, builder);
78+
}
79+
DataType::BinaryView => {
80+
conversion!(BinaryViewType, as_byte_view, |v| v, input, builder);
81+
}
7182
DataType::Int8 => {
72-
primitive_conversion!(Int8Type, input, builder);
83+
conversion!(Int8Type, as_primitive, |v| v, input, builder);
7384
}
7485
DataType::Int16 => {
75-
primitive_conversion!(Int16Type, input, builder);
86+
conversion!(Int16Type, as_primitive, |v| v, input, builder);
7687
}
7788
DataType::Int32 => {
78-
primitive_conversion!(Int32Type, input, builder);
89+
conversion!(Int32Type, as_primitive, |v| v, input, builder);
7990
}
8091
DataType::Int64 => {
81-
primitive_conversion!(Int64Type, input, builder);
92+
conversion!(Int64Type, as_primitive, |v| v, input, builder);
8293
}
8394
DataType::UInt8 => {
84-
primitive_conversion!(UInt8Type, input, builder);
95+
conversion!(UInt8Type, as_primitive, |v| v, input, builder);
8596
}
8697
DataType::UInt16 => {
87-
primitive_conversion!(UInt16Type, input, builder);
98+
conversion!(UInt16Type, as_primitive, |v| v, input, builder);
8899
}
89100
DataType::UInt32 => {
90-
primitive_conversion!(UInt32Type, input, builder);
101+
conversion!(UInt32Type, as_primitive, |v| v, input, builder);
91102
}
92103
DataType::UInt64 => {
93-
primitive_conversion!(UInt64Type, input, builder);
104+
conversion!(UInt64Type, as_primitive, |v| v, input, builder);
94105
}
95106
DataType::Float32 => {
96-
primitive_conversion!(Float32Type, input, builder);
107+
conversion!(Float32Type, as_primitive, |v| v, input, builder);
97108
}
98109
DataType::Float64 => {
99-
primitive_conversion!(Float64Type, input, builder);
110+
conversion!(Float64Type, as_primitive, |v| v, input, builder);
100111
}
101112
dt => {
102113
return Err(ArrowError::CastError(format!(
@@ -115,12 +126,67 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
115126
mod tests {
116127
use super::*;
117128
use arrow::array::{
118-
ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
119-
UInt16Array, UInt32Array, UInt64Array, UInt8Array,
129+
ArrayRef, Float32Array, Float64Array, GenericByteBuilder, GenericByteViewBuilder,
130+
Int16Array, Int32Array, Int64Array, Int8Array, UInt16Array, UInt32Array, UInt64Array,
131+
UInt8Array,
120132
};
121133
use parquet_variant::{Variant, VariantDecimal16};
122134
use std::sync::Arc;
123135

136+
#[test]
137+
fn test_cast_to_variant_binary() {
138+
// BinaryType
139+
let mut builder = GenericByteBuilder::<BinaryType>::new();
140+
builder.append_value(b"hello");
141+
builder.append_value(b"");
142+
builder.append_null();
143+
builder.append_value(b"world");
144+
let binary_array = builder.finish();
145+
run_test(
146+
Arc::new(binary_array),
147+
vec![
148+
Some(Variant::Binary(b"hello")),
149+
Some(Variant::Binary(b"")),
150+
None,
151+
Some(Variant::Binary(b"world")),
152+
],
153+
);
154+
155+
// LargeBinaryType
156+
let mut builder = GenericByteBuilder::<LargeBinaryType>::new();
157+
builder.append_value(b"hello");
158+
builder.append_value(b"");
159+
builder.append_null();
160+
builder.append_value(b"world");
161+
let large_binary_array = builder.finish();
162+
run_test(
163+
Arc::new(large_binary_array),
164+
vec![
165+
Some(Variant::Binary(b"hello")),
166+
Some(Variant::Binary(b"")),
167+
None,
168+
Some(Variant::Binary(b"world")),
169+
],
170+
);
171+
172+
// BinaryViewType
173+
let mut builder = GenericByteViewBuilder::<BinaryViewType>::new();
174+
builder.append_value(b"hello");
175+
builder.append_value(b"");
176+
builder.append_null();
177+
builder.append_value(b"world");
178+
let byte_view_array = builder.finish();
179+
run_test(
180+
Arc::new(byte_view_array),
181+
vec![
182+
Some(Variant::Binary(b"hello")),
183+
Some(Variant::Binary(b"")),
184+
None,
185+
Some(Variant::Binary(b"world")),
186+
],
187+
);
188+
}
189+
124190
#[test]
125191
fn test_cast_to_variant_int8() {
126192
run_test(

0 commit comments

Comments
 (0)