Skip to content

Commit 521f219

Browse files
authored
Migrate arrow-avro to Rust 2024 (#8545)
# Which issue does this PR close? - Contribute to #6827 # Rationale for this change Splitting up #8227. # What changes are included in this PR? Migrate `arrow-avro` to Rust 2024 # Are these changes tested? CI # Are there any user-facing changes? Yes
1 parent f19bda3 commit 521f219

File tree

12 files changed

+120
-117
lines changed

12 files changed

+120
-117
lines changed

arrow-avro/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ authors = { workspace = true }
2525
license = { workspace = true }
2626
keywords = { workspace = true }
2727
include = { workspace = true }
28-
edition = { workspace = true }
28+
edition = "2024"
2929
rust-version = { workspace = true }
3030

3131
[lib]
@@ -93,4 +93,4 @@ harness = false
9393

9494
[[bench]]
9595
name = "avro_writer"
96-
harness = false
96+
harness = false

arrow-avro/benches/avro_writer.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,21 +22,21 @@ extern crate criterion;
2222
extern crate once_cell;
2323

2424
use arrow_array::{
25+
ArrayRef, BinaryArray, BooleanArray, Decimal32Array, Decimal64Array, Decimal128Array,
26+
Decimal256Array, FixedSizeBinaryArray, Float32Array, Float64Array, ListArray, PrimitiveArray,
27+
RecordBatch, StringArray, StructArray,
2528
builder::{ListBuilder, StringBuilder},
2629
types::{Int32Type, Int64Type, IntervalMonthDayNanoType, TimestampMicrosecondType},
27-
ArrayRef, BinaryArray, BooleanArray, Decimal128Array, Decimal256Array, Decimal32Array,
28-
Decimal64Array, FixedSizeBinaryArray, Float32Array, Float64Array, ListArray, PrimitiveArray,
29-
RecordBatch, StringArray, StructArray,
3030
};
3131
use arrow_avro::writer::AvroWriter;
32-
use arrow_buffer::{i256, Buffer};
32+
use arrow_buffer::{Buffer, i256};
3333
use arrow_schema::{DataType, Field, IntervalUnit, Schema, TimeUnit, UnionFields, UnionMode};
34-
use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput};
34+
use criterion::{BatchSize, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
3535
use once_cell::sync::Lazy;
3636
use rand::{
37+
Rng, SeedableRng,
3738
distr::uniform::{SampleRange, SampleUniform},
3839
rngs::StdRng,
39-
Rng, SeedableRng,
4040
};
4141
use std::collections::HashMap;
4242
use std::io::Cursor;

arrow-avro/benches/decoder.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ extern crate once_cell;
2626
extern crate uuid;
2727

2828
use apache_avro::types::Value;
29-
use apache_avro::{to_avro_datum, Decimal, Schema as ApacheSchema};
30-
use arrow_avro::schema::{Fingerprint, FingerprintAlgorithm, CONFLUENT_MAGIC, SINGLE_OBJECT_MAGIC};
29+
use apache_avro::{Decimal, Schema as ApacheSchema, to_avro_datum};
30+
use arrow_avro::schema::{CONFLUENT_MAGIC, Fingerprint, FingerprintAlgorithm, SINGLE_OBJECT_MAGIC};
3131
use arrow_avro::{reader::ReaderBuilder, schema::AvroSchema};
32-
use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput};
32+
use criterion::{BatchSize, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
3333
use once_cell::sync::Lazy;
3434
use std::{hint::black_box, time::Duration};
3535
use uuid::Uuid;

arrow-avro/examples/decode_kafka_stream.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
use arrow_array::{Int64Array, RecordBatch, StringArray};
3636
use arrow_avro::reader::ReaderBuilder;
3737
use arrow_avro::schema::{
38-
AvroSchema, Fingerprint, FingerprintAlgorithm, SchemaStore, CONFLUENT_MAGIC,
38+
AvroSchema, CONFLUENT_MAGIC, Fingerprint, FingerprintAlgorithm, SchemaStore,
3939
};
4040
use arrow_schema::ArrowError;
4141

arrow-avro/src/codec.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717
use crate::schema::{
18-
make_full_name, Array, Attributes, ComplexType, Enum, Fixed, Map, Nullability, PrimitiveType,
19-
Record, Schema, Type, TypeName, AVRO_ENUM_SYMBOLS_METADATA_KEY,
20-
AVRO_FIELD_DEFAULT_METADATA_KEY, AVRO_NAMESPACE_METADATA_KEY, AVRO_NAME_METADATA_KEY,
18+
AVRO_ENUM_SYMBOLS_METADATA_KEY, AVRO_FIELD_DEFAULT_METADATA_KEY, AVRO_NAME_METADATA_KEY,
19+
AVRO_NAMESPACE_METADATA_KEY, Array, Attributes, ComplexType, Enum, Fixed, Map, Nullability,
20+
PrimitiveType, Record, Schema, Type, TypeName, make_full_name,
2121
};
2222
use arrow_schema::{
23-
ArrowError, DataType, Field, Fields, IntervalUnit, TimeUnit, UnionFields, UnionMode,
24-
DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
23+
ArrowError, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, DataType, Field, Fields,
24+
IntervalUnit, TimeUnit, UnionFields, UnionMode,
2525
};
2626
#[cfg(feature = "small_decimals")]
2727
use arrow_schema::{DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION};
@@ -324,14 +324,14 @@ impl AvroDataType {
324324
Codec::Null => {
325325
return Err(ArrowError::SchemaError(
326326
"Default for `null` type must be JSON null".to_string(),
327-
))
327+
));
328328
}
329329
Codec::Boolean => match default_json {
330330
Value::Bool(b) => AvroLiteral::Boolean(*b),
331331
_ => {
332332
return Err(ArrowError::SchemaError(
333333
"Boolean default must be a JSON boolean".to_string(),
334-
))
334+
));
335335
}
336336
},
337337
Codec::Int32 | Codec::Date32 | Codec::TimeMillis => {
@@ -393,7 +393,7 @@ impl AvroDataType {
393393
_ => {
394394
return Err(ArrowError::SchemaError(
395395
"Default value must be a JSON array for Avro array type".to_string(),
396-
))
396+
));
397397
}
398398
},
399399
Codec::Map(val_dt) => match default_json {
@@ -407,7 +407,7 @@ impl AvroDataType {
407407
_ => {
408408
return Err(ArrowError::SchemaError(
409409
"Default value must be a JSON object for Avro map type".to_string(),
410-
))
410+
));
411411
}
412412
},
413413
Codec::Struct(fields) => match default_json {
@@ -449,7 +449,7 @@ impl AvroDataType {
449449
_ => {
450450
return Err(ArrowError::SchemaError(
451451
"Default value for record/struct must be a JSON object".to_string(),
452-
))
452+
));
453453
}
454454
},
455455
Codec::Union(encodings, _, _) => {
@@ -1622,7 +1622,7 @@ impl<'a> Maker<'a> {
16221622
_ => {
16231623
return Err(ArrowError::ParseError(format!(
16241624
"Illegal promotion {write_primitive:?} to {read_primitive:?}"
1625-
)))
1625+
)));
16261626
}
16271627
};
16281628
let mut datatype = self.parse_type(reader_schema, None)?;
@@ -1894,8 +1894,8 @@ impl<'a> Maker<'a> {
18941894
mod tests {
18951895
use super::*;
18961896
use crate::schema::{
1897-
Array, Attributes, ComplexType, Field as AvroFieldSchema, Fixed, PrimitiveType, Record,
1898-
Schema, Type, TypeName, AVRO_ROOT_RECORD_DEFAULT_NAME,
1897+
AVRO_ROOT_RECORD_DEFAULT_NAME, Array, Attributes, ComplexType, Field as AvroFieldSchema,
1898+
Fixed, PrimitiveType, Record, Schema, Type, TypeName,
18991899
};
19001900
use indexmap::IndexMap;
19011901
use serde_json::{self, Value};

arrow-avro/src/reader/header.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717

1818
//! Decoder for [`Header`]
1919
20-
use crate::compression::{CompressionCodec, CODEC_METADATA_KEY};
20+
use crate::compression::{CODEC_METADATA_KEY, CompressionCodec};
2121
use crate::reader::vlq::VLQDecoder;
22-
use crate::schema::{Schema, SCHEMA_METADATA_KEY};
22+
use crate::schema::{SCHEMA_METADATA_KEY, Schema};
2323
use arrow_schema::ArrowError;
2424
use std::io::BufRead;
2525

arrow-avro/src/reader/mod.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -477,8 +477,8 @@
477477
use crate::codec::AvroFieldBuilder;
478478
use crate::reader::header::read_header;
479479
use crate::schema::{
480-
AvroSchema, Fingerprint, FingerprintAlgorithm, Schema, SchemaStore, CONFLUENT_MAGIC,
481-
SINGLE_OBJECT_MAGIC,
480+
AvroSchema, CONFLUENT_MAGIC, Fingerprint, FingerprintAlgorithm, SINGLE_OBJECT_MAGIC, Schema,
481+
SchemaStore,
482482
};
483483
use arrow_array::{RecordBatch, RecordBatchReader};
484484
use arrow_schema::{ArrowError, SchemaRef};
@@ -695,7 +695,7 @@ impl Decoder {
695695
None => {
696696
return Err(ArrowError::ParseError(
697697
"Missing magic bytes and fingerprint".to_string(),
698-
))
698+
));
699699
}
700700
}
701701
}
@@ -1267,9 +1267,9 @@ mod test {
12671267
use crate::reader::record::RecordDecoder;
12681268
use crate::reader::{Decoder, Reader, ReaderBuilder};
12691269
use crate::schema::{
1270-
AvroSchema, Fingerprint, FingerprintAlgorithm, PrimitiveType, SchemaStore,
1271-
AVRO_ENUM_SYMBOLS_METADATA_KEY, AVRO_NAMESPACE_METADATA_KEY, AVRO_NAME_METADATA_KEY,
1272-
CONFLUENT_MAGIC, SINGLE_OBJECT_MAGIC,
1270+
AVRO_ENUM_SYMBOLS_METADATA_KEY, AVRO_NAME_METADATA_KEY, AVRO_NAMESPACE_METADATA_KEY,
1271+
AvroSchema, CONFLUENT_MAGIC, Fingerprint, FingerprintAlgorithm, PrimitiveType,
1272+
SINGLE_OBJECT_MAGIC, SchemaStore,
12731273
};
12741274
use crate::test_util::arrow_test_data;
12751275
use crate::writer::AvroWriter;
@@ -1289,7 +1289,7 @@ mod test {
12891289
use arrow_array::types::{Int32Type, IntervalMonthDayNanoType};
12901290
use arrow_array::*;
12911291
use arrow_buffer::{
1292-
i256, Buffer, IntervalMonthDayNano, NullBuffer, OffsetBuffer, ScalarBuffer,
1292+
Buffer, IntervalMonthDayNano, NullBuffer, OffsetBuffer, ScalarBuffer, i256,
12931293
};
12941294
#[cfg(feature = "avro_custom_types")]
12951295
use arrow_schema::{
@@ -1302,8 +1302,8 @@ mod test {
13021302
};
13031303
use bytes::Bytes;
13041304
use futures::executor::block_on;
1305-
use futures::{stream, Stream, StreamExt, TryStreamExt};
1306-
use serde_json::{json, Value};
1305+
use futures::{Stream, StreamExt, TryStreamExt, stream};
1306+
use serde_json::{Value, json};
13071307
use std::collections::HashMap;
13081308
use std::fs::File;
13091309
use std::io::{BufReader, Cursor};
@@ -2810,7 +2810,7 @@ mod test {
28102810
let top_meta = proj_field.metadata().clone();
28112811
let (expected_field_ref, expected_col): (Arc<Field>, ArrayRef) =
28122812
match (full_field.data_type(), proj_field.data_type()) {
2813-
(&DataType::List(_), DataType::List(ref proj_elem)) => {
2813+
(&DataType::List(_), DataType::List(proj_elem)) => {
28142814
let new_col =
28152815
rebuild_list_array_with_element(&col_full, proj_elem.clone(), false);
28162816
let nf = Field::new(
@@ -2821,7 +2821,7 @@ mod test {
28212821
.with_metadata(top_meta);
28222822
(Arc::new(nf), new_col)
28232823
}
2824-
(&DataType::LargeList(_), DataType::LargeList(ref proj_elem)) => {
2824+
(&DataType::LargeList(_), DataType::LargeList(proj_elem)) => {
28252825
let new_col =
28262826
rebuild_list_array_with_element(&col_full, proj_elem.clone(), true);
28272827
let nf = Field::new(

arrow-avro/src/reader/record.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@ use crate::codec::{
2121
};
2222
use crate::reader::cursor::AvroCursor;
2323
use crate::schema::Nullability;
24-
use arrow_array::builder::{Decimal128Builder, Decimal256Builder, IntervalMonthDayNanoBuilder};
2524
#[cfg(feature = "small_decimals")]
2625
use arrow_array::builder::{Decimal32Builder, Decimal64Builder};
26+
use arrow_array::builder::{Decimal128Builder, Decimal256Builder, IntervalMonthDayNanoBuilder};
2727
use arrow_array::types::*;
2828
use arrow_array::*;
2929
use arrow_buffer::*;
3030
use arrow_schema::{
31-
ArrowError, DataType, Field as ArrowField, FieldRef, Fields, Schema as ArrowSchema, SchemaRef,
32-
UnionFields, UnionMode, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
31+
ArrowError, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, DataType, Field as ArrowField,
32+
FieldRef, Fields, Schema as ArrowSchema, SchemaRef, UnionFields, UnionMode,
3333
};
3434
#[cfg(feature = "small_decimals")]
3535
use arrow_schema::{DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION};
@@ -4047,8 +4047,8 @@ mod tests {
40474047
}
40484048

40494049
#[test]
4050-
fn test_record_append_default_missing_fields_without_projector_defaults_yields_type_nulls_or_empties(
4051-
) {
4050+
fn test_record_append_default_missing_fields_without_projector_defaults_yields_type_nulls_or_empties()
4051+
{
40524052
let fields = vec![("a", DataType::Int32, true), ("b", DataType::Utf8, true)];
40534053
let mut field_refs: Vec<FieldRef> = Vec::new();
40544054
let mut encoders: Vec<Decoder> = Vec::new();

arrow-avro/src/schema.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use arrow_schema::{
2222
UnionMode,
2323
};
2424
use serde::{Deserialize, Serialize};
25-
use serde_json::{json, Map as JsonMap, Value};
25+
use serde_json::{Map as JsonMap, Value, json};
2626
#[cfg(feature = "sha256")]
2727
use sha2::{Digest, Sha256};
2828
use std::borrow::Cow;
@@ -1432,7 +1432,7 @@ fn datatype_to_avro(
14321432
_ => {
14331433
return Err(ArrowError::SchemaError(
14341434
"Map 'entries' field must be Struct(key,value)".into(),
1435-
))
1435+
));
14361436
}
14371437
};
14381438
let values_schema = process_datatype(
@@ -1556,7 +1556,7 @@ fn datatype_to_avro(
15561556
other => {
15571557
return Err(ArrowError::NotYetImplemented(format!(
15581558
"Arrow type {other:?} has no Avro representation"
1559-
)))
1559+
)));
15601560
}
15611561
};
15621562
Ok((val, extras))
@@ -2148,9 +2148,11 @@ mod tests {
21482148
store.lookup(&Fingerprint::Rabin(fp_val)).cloned(),
21492149
Some(schema.clone())
21502150
);
2151-
assert!(store
2152-
.lookup(&Fingerprint::Rabin(fp_val.wrapping_add(1)))
2153-
.is_none());
2151+
assert!(
2152+
store
2153+
.lookup(&Fingerprint::Rabin(fp_val.wrapping_add(1)))
2154+
.is_none()
2155+
);
21542156
}
21552157
Fingerprint::Id(_id) => {
21562158
unreachable!("This test should only generate Rabin fingerprints")

0 commit comments

Comments
 (0)