Skip to content

Commit a003ed0

Browse files
committed
address comment
1 extract builder constructing logic to macro_rules 2 add micro -> nano test
1 parent 3517b15 commit a003ed0

File tree

3 files changed

+146
-150
lines changed

3 files changed

+146
-150
lines changed

parquet-variant-compute/src/type_conversion.rs

Lines changed: 41 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717

1818
//! Module for transforming a typed arrow `Array` to `VariantArray`.
1919
20-
use arrow::datatypes::{self, ArrowPrimitiveType};
21-
use chrono::Datelike;
20+
use arrow::datatypes::{
21+
self, ArrowPrimitiveType, ArrowTimestampType, Date32Type, TimestampMicrosecondType,
22+
TimestampNanosecondType,
23+
};
2224
use parquet_variant::Variant;
2325

2426
/// Options for controlling the behavior of `cast_to_variant_with_options`.
@@ -41,10 +43,12 @@ pub(crate) trait PrimitiveFromVariant: ArrowPrimitiveType {
4143

4244
/// Macro to generate PrimitiveFromVariant implementations for Arrow primitive types
4345
macro_rules! impl_primitive_from_variant {
44-
($arrow_type:ty, $variant_method:ident) => {
46+
($arrow_type:ty, $variant_method:ident $(, $cast_fn:expr)?) => {
4547
impl PrimitiveFromVariant for $arrow_type {
4648
fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native> {
47-
variant.$variant_method()
49+
let value = variant.$variant_method();
50+
$( let value = value.map($cast_fn); )?
51+
value
4852
}
4953
}
5054
};
@@ -61,35 +65,45 @@ impl_primitive_from_variant!(datatypes::UInt64Type, as_u64);
6165
impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
6266
impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
6367
impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
68+
impl_primitive_from_variant!(
69+
datatypes::Date32Type,
70+
as_naive_date,
71+
Date32Type::from_naive_date
72+
);
6473

65-
impl VariantAsPrimitive<datatypes::TimestampMicrosecondType> for Variant<'_, '_> {
66-
fn as_primitive(&self) -> Option<i64> {
67-
match self {
68-
Variant::TimestampMicros(dt) => Some(dt.timestamp_micros()),
69-
Variant::TimestampNtzMicros(ndt) => Some(ndt.and_utc().timestamp_micros()),
70-
_ => None,
71-
}
72-
}
74+
pub(crate) trait TimestampFromVariant: ArrowTimestampType {
75+
fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
7376
}
7477

75-
impl VariantAsPrimitive<datatypes::TimestampNanosecondType> for Variant<'_, '_> {
76-
fn as_primitive(&self) -> Option<i64> {
77-
match self {
78-
Variant::TimestampNanos(dt) => dt.timestamp_nanos_opt(),
79-
Variant::TimestampNtzNanos(ndt) => ndt.and_utc().timestamp_nanos_opt(),
80-
_ => None,
78+
macro_rules! impl_timestamp_from_variant {
79+
($timestamp_type:ty, {
80+
$(($variant_pattern:pat, $conversion:expr)),+ $(,)?
81+
}) => {
82+
impl TimestampFromVariant for $timestamp_type {
83+
fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native> {
84+
match variant {
85+
$(
86+
$variant_pattern => $conversion,
87+
)+
88+
_ => None,
89+
}
90+
}
8191
}
82-
}
92+
};
8393
}
8494

85-
impl VariantAsPrimitive<datatypes::Date32Type> for Variant<'_, '_> {
86-
fn as_primitive(&self) -> Option<i32> {
87-
// The number of days from 0001-01-01 to 1970-01-01.
88-
const DAYS_FROM_CE_TO_UNIX_EPOCH: i32 = 719163;
89-
self.as_naive_date()
90-
.map(|d| d.num_days_from_ce() - DAYS_FROM_CE_TO_UNIX_EPOCH)
91-
}
92-
}
95+
impl_timestamp_from_variant!(TimestampMicrosecondType, {
96+
(Variant::TimestampMicros(t), Some(t.timestamp_micros())),
97+
(Variant::TimestampNtzMicros(t), Some(t.and_utc().timestamp_micros())),
98+
});
99+
100+
impl_timestamp_from_variant!(TimestampNanosecondType, {
101+
(Variant::TimestampMicros(t), Some(t.timestamp_micros()).map(|t| t * 1000)),
102+
(Variant::TimestampNtzMicros(t), Some(t.and_utc().timestamp_micros()).map(|t| t * 1000)),
103+
(Variant::TimestampNanos(t), t.timestamp_nanos_opt()),
104+
(Variant::TimestampNtzNanos(t), t.and_utc().timestamp_nanos_opt()),
105+
});
106+
93107
/// Convert the value at a specific index in the given array into a `Variant`.
94108
macro_rules! non_generic_conversion_single_value {
95109
($array:expr, $cast_fn:expr, $index:expr) => {{

parquet-variant-compute/src/variant_get.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,18 @@ mod test {
863863
])
864864
);
865865

866+
// test converting micro to nano
867+
perfectly_shredded_to_arrow_primitive_test!(
868+
get_variant_perfectly_shredded_timestamp_micro_ntz_as_nano_ntz,
869+
DataType::Timestamp(TimeUnit::Nanosecond, None),
870+
perfectly_shredded_timestamp_micro_ntz_variant_array,
871+
arrow::array::TimestampNanosecondArray::from(vec![
872+
Some(-456000000),
873+
Some(1758602096000001000),
874+
Some(1758602096000002000)
875+
])
876+
);
877+
866878
perfectly_shredded_variant_array_fn!(perfectly_shredded_timestamp_micro_variant_array, || {
867879
arrow::array::TimestampMicrosecondArray::from(vec![
868880
Some(-456000),
@@ -884,6 +896,19 @@ mod test {
884896
.with_timezone("+00:00")
885897
);
886898

899+
// test converting micro to nano
900+
perfectly_shredded_to_arrow_primitive_test!(
901+
get_variant_perfectly_shredded_timestamp_micro_as_nano,
902+
DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from("+00:00"))),
903+
perfectly_shredded_timestamp_micro_variant_array,
904+
arrow::array::TimestampNanosecondArray::from(vec![
905+
Some(-456000000),
906+
Some(1758602096000001000),
907+
Some(1758602096000002000)
908+
])
909+
.with_timezone("+00:00")
910+
);
911+
887912
perfectly_shredded_variant_array_fn!(
888913
perfectly_shredded_timestamp_nano_ntz_variant_array,
889914
|| {

0 commit comments

Comments
 (0)