Migrate parquet-variant to Rust 2024 (#8510)

mbrobbel · web-flow · commit 5993dffb714e · 2025-10-07T07:38:13.000-04:00
# Which issue does this PR close? - Contribute to #6827 # Rationale for this change Splitting up #8227. # What changes are included in this PR? Migrate `parquet-variant` to Rust 2024 # Are these changes tested? CI # Are there any user-facing changes? Yes
diff --git a/parquet-variant/Cargo.toml b/parquet-variant/Cargo.toml
@@ -27,7 +27,7 @@ repository = { workspace = true }
 authors = { workspace = true }
 keywords = ["arrow", "parquet", "variant"]
 readme = "README.md"
-edition = { workspace = true }
+edition = "2024"
 rust-version = { workspace = true }
 
 [dependencies]
diff --git a/parquet-variant/benches/variant_builder.rs b/parquet-variant/benches/variant_builder.rs
@@ -21,9 +21,9 @@ use criterion::*;
 
 use parquet_variant::{Variant, VariantBuilder};
 use rand::{
-    distr::{uniform::SampleUniform, Alphanumeric},
-    rngs::StdRng,
     Rng, SeedableRng,
+    distr::{Alphanumeric, uniform::SampleUniform},
+    rngs::StdRng,
 };
 use std::{hint, ops::Range};
 
diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs
@@ -16,7 +16,7 @@
 // under the License.
 use crate::decoder::{VariantBasicType, VariantPrimitiveType};
 use crate::{
-    ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8, VariantList,
+    ShortString, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16, VariantList,
     VariantMetadata, VariantObject,
 };
 use arrow_schema::ArrowError;
@@ -3403,10 +3403,12 @@ mod tests {
             // This should fail because "unknown_field" is not in the metadata
             let result = obj.try_insert("unknown_field", "value");
             assert!(result.is_err());
-            assert!(result
-                .unwrap_err()
-                .to_string()
-                .contains("Field name 'unknown_field' not found"));
+            assert!(
+                result
+                    .unwrap_err()
+                    .to_string()
+                    .contains("Field name 'unknown_field' not found")
+            );
         }
     }
 
diff --git a/parquet-variant/src/decoder.rs b/parquet-variant/src/decoder.rs
@@ -14,10 +14,10 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+use crate::ShortString;
 use crate::utils::{
     array_from_slice, overflow_error, slice_from_slice_at_offset, string_from_slice,
 };
-use crate::ShortString;
 
 use arrow_schema::ArrowError;
 use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, Utc};
@@ -143,7 +143,7 @@ impl OffsetSizeBytes {
             _ => {
                 return Err(ArrowError::InvalidArgumentError(
                     "offset_size_minus_one must be 0–3".to_string(),
-                ))
+                ));
             }
         };
         Ok(result)
diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs
@@ -15,17 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
-pub use self::decimal::{VariantDecimal16, VariantDecimal4, VariantDecimal8};
+pub use self::decimal::{VariantDecimal4, VariantDecimal8, VariantDecimal16};
 pub use self::list::VariantList;
-pub use self::metadata::{VariantMetadata, EMPTY_VARIANT_METADATA, EMPTY_VARIANT_METADATA_BYTES};
+pub use self::metadata::{EMPTY_VARIANT_METADATA, EMPTY_VARIANT_METADATA_BYTES, VariantMetadata};
 pub use self::object::VariantObject;
 
 // Publically export types used in the API
 pub use half::f16;
 pub use uuid::Uuid;
 
 use crate::decoder::{
-    self, get_basic_type, get_primitive_type, VariantBasicType, VariantPrimitiveType,
+    self, VariantBasicType, VariantPrimitiveType, get_basic_type, get_primitive_type,
 };
 use crate::path::{VariantPath, VariantPathElement};
 use crate::utils::{first_byte_from_slice, fits_precision, slice_from_slice};
diff --git a/parquet-variant/src/variant/decimal.rs b/parquet-variant/src/variant/decimal.rs
@@ -285,20 +285,24 @@ mod tests {
             decimal4_too_large.is_err(),
             "Decimal4 precision overflow should fail"
         );
-        assert!(decimal4_too_large
-            .unwrap_err()
-            .to_string()
-            .contains("wider than max precision"));
+        assert!(
+            decimal4_too_large
+                .unwrap_err()
+                .to_string()
+                .contains("wider than max precision")
+        );
 
         let decimal4_too_small = VariantDecimal4::try_new(-1_000_000_000_i32, 2);
         assert!(
             decimal4_too_small.is_err(),
             "Decimal4 precision underflow should fail"
         );
-        assert!(decimal4_too_small
-            .unwrap_err()
-            .to_string()
-            .contains("wider than max precision"));
+        assert!(
+            decimal4_too_small
+                .unwrap_err()
+                .to_string()
+                .contains("wider than max precision")
+        );
 
         // Test valid edge cases for Decimal4
         let decimal4_max_valid = VariantDecimal4::try_new(999_999_999_i32, 2);
@@ -319,20 +323,24 @@ mod tests {
             decimal8_too_large.is_err(),
             "Decimal8 precision overflow should fail"
         );
-        assert!(decimal8_too_large
-            .unwrap_err()
-            .to_string()
-            .contains("wider than max precision"));
+        assert!(
+            decimal8_too_large
+                .unwrap_err()
+                .to_string()
+                .contains("wider than max precision")
+        );
 
         let decimal8_too_small = VariantDecimal8::try_new(-1_000_000_000_000_000_000_i64, 2);
         assert!(
             decimal8_too_small.is_err(),
             "Decimal8 precision underflow should fail"
         );
-        assert!(decimal8_too_small
-            .unwrap_err()
-            .to_string()
-            .contains("wider than max precision"));
+        assert!(
+            decimal8_too_small
+                .unwrap_err()
+                .to_string()
+                .contains("wider than max precision")
+        );
 
         // Test valid edge cases for Decimal8
         let decimal8_max_valid = VariantDecimal8::try_new(999_999_999_999_999_999_i64, 2);
@@ -354,21 +362,25 @@ mod tests {
             decimal16_too_large.is_err(),
             "Decimal16 precision overflow should fail"
         );
-        assert!(decimal16_too_large
-            .unwrap_err()
-            .to_string()
-            .contains("wider than max precision"));
+        assert!(
+            decimal16_too_large
+                .unwrap_err()
+                .to_string()
+                .contains("wider than max precision")
+        );
 
         let decimal16_too_small =
             VariantDecimal16::try_new(-100000000000000000000000000000000000000_i128, 2);
         assert!(
             decimal16_too_small.is_err(),
             "Decimal16 precision underflow should fail"
         );
-        assert!(decimal16_too_small
-            .unwrap_err()
-            .to_string()
-            .contains("wider than max precision"));
+        assert!(
+            decimal16_too_small
+                .unwrap_err()
+                .to_string()
+                .contains("wider than max precision")
+        );
 
         // Test valid edge cases for Decimal16
         let decimal16_max_valid =
@@ -394,10 +406,12 @@ mod tests {
             decimal4_invalid_scale.is_err(),
             "Decimal4 with scale > 9 should fail"
         );
-        assert!(decimal4_invalid_scale
-            .unwrap_err()
-            .to_string()
-            .contains("larger than max precision"));
+        assert!(
+            decimal4_invalid_scale
+                .unwrap_err()
+                .to_string()
+                .contains("larger than max precision")
+        );
 
         let decimal4_invalid_scale_large = VariantDecimal4::try_new(123_i32, 20);
         assert!(
@@ -418,10 +432,12 @@ mod tests {
             decimal8_invalid_scale.is_err(),
             "Decimal8 with scale > 18 should fail"
         );
-        assert!(decimal8_invalid_scale
-            .unwrap_err()
-            .to_string()
-            .contains("larger than max precision"));
+        assert!(
+            decimal8_invalid_scale
+                .unwrap_err()
+                .to_string()
+                .contains("larger than max precision")
+        );
 
         let decimal8_invalid_scale_large = VariantDecimal8::try_new(123_i64, 25);
         assert!(
@@ -442,10 +458,12 @@ mod tests {
             decimal16_invalid_scale.is_err(),
             "Decimal16 with scale > 38 should fail"
         );
-        assert!(decimal16_invalid_scale
-            .unwrap_err()
-            .to_string()
-            .contains("larger than max precision"));
+        assert!(
+            decimal16_invalid_scale
+                .unwrap_err()
+                .to_string()
+                .contains("larger than max precision")
+        );
 
         let decimal16_invalid_scale_large = VariantDecimal16::try_new(123_i128, 50);
         assert!(
diff --git a/parquet-variant/src/variant/list.rs b/parquet-variant/src/variant/list.rs
@@ -14,7 +14,7 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-use crate::decoder::{map_bytes_to_offsets, OffsetSizeBytes};
+use crate::decoder::{OffsetSizeBytes, map_bytes_to_offsets};
 use crate::utils::{
     first_byte_from_slice, overflow_error, slice_from_slice, slice_from_slice_at_offset,
 };
diff --git a/parquet-variant/src/variant/metadata.rs b/parquet-variant/src/variant/metadata.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::decoder::{map_bytes_to_offsets, OffsetSizeBytes};
+use crate::decoder::{OffsetSizeBytes, map_bytes_to_offsets};
 use crate::utils::{
     first_byte_from_slice, overflow_error, slice_from_slice, string_from_slice,
     try_binary_search_range_by,
@@ -285,14 +285,13 @@ impl<'m> VariantMetadata<'m> {
                 let mut current_offset = offsets.next().unwrap_or(0);
                 let mut prev_value: Option<&str> = None;
                 for next_offset in offsets {
-                    let current_value =
-                        value_buffer
-                            .get(current_offset..next_offset)
-                            .ok_or_else(|| {
-                                ArrowError::InvalidArgumentError(format!(
+                    let current_value = value_buffer.get(current_offset..next_offset).ok_or_else(
+                        || {
+                            ArrowError::InvalidArgumentError(format!(
                                 "range {current_offset}..{next_offset} is invalid or out of bounds"
                             ))
-                            })?;
+                        },
+                    )?;
 
                     if let Some(prev_val) = prev_value {
                         if current_value <= prev_val {
diff --git a/parquet-variant/src/variant/object.rs b/parquet-variant/src/variant/object.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::decoder::{map_bytes_to_offsets, OffsetSizeBytes};
+use crate::decoder::{OffsetSizeBytes, map_bytes_to_offsets};
 use crate::utils::{
     first_byte_from_slice, overflow_error, slice_from_slice, try_binary_search_range_by,
 };
diff --git a/parquet-variant/tests/variant_interop.rs b/parquet-variant/tests/variant_interop.rs