Skip to content

Commit cf50b5b

Browse files
committed
code review improvements
1 parent ae4fe0b commit cf50b5b

File tree

5 files changed

+42
-36
lines changed

5 files changed

+42
-36
lines changed

arrow-avro/benches/avro_reader.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ fn read_avro_test_file(
110110
ints.push(i32::from_le_bytes(int_bytes));
111111
}
112112

113-
let string_array: ArrayRef = if options.use_utf8view {
113+
let string_array: ArrayRef = if options.use_utf8view() {
114114
Arc::new(StringViewArray::from_iter(
115115
strings.iter().map(|s| Some(s.as_str())),
116116
))
@@ -123,7 +123,7 @@ fn read_avro_test_file(
123123
let int_array: ArrayRef = Arc::new(Int32Array::from(ints));
124124

125125
let schema = Arc::new(Schema::new(vec![
126-
if options.use_utf8view {
126+
if options.use_utf8view() {
127127
Field::new("string_field", DataType::Utf8View, false)
128128
} else {
129129
Field::new("string_field", DataType::Utf8, false)
@@ -244,19 +244,15 @@ fn bench_avro_reader(c: &mut Criterion) {
244244

245245
group.bench_function(format!("string_array_{}_chars", str_length), |b| {
246246
b.iter(|| {
247-
let options = ReadOptions {
248-
use_utf8view: false,
249-
};
250-
247+
let options = ReadOptions::default();
251248
let batch = read_avro_test_file(file_path, &options).unwrap();
252249
criterion::black_box(batch)
253250
})
254251
});
255252

256253
group.bench_function(format!("string_view_{}_chars", str_length), |b| {
257254
b.iter(|| {
258-
let options = ReadOptions { use_utf8view: true };
259-
255+
let options = ReadOptions::default().with_utf8view(true);
260256
let batch = read_avro_test_file(file_path, &options).unwrap();
261257
criterion::black_box(batch)
262258
})

arrow-avro/examples/read_with_utf8view.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
5050
reader.seek(SeekFrom::Start(0))?;
5151

5252
let start = Instant::now();
53-
let options = ReadOptions { use_utf8view: true };
53+
let options = ReadOptions::default().with_utf8view(true);
5454
let batch_view = read_avro_with_options(&mut reader, &options)?;
5555
let view_duration = start.elapsed();
5656

@@ -108,7 +108,7 @@ fn read_avro_with_options(
108108
let string_data = vec!["avro1", "avro2", "avro3", "avro4", "avro5"];
109109
let int_data = vec![1, 2, 3, 4, 5];
110110

111-
let string_array: ArrayRef = if options.use_utf8view {
111+
let string_array: ArrayRef = if options.use_utf8view() {
112112
Arc::new(StringViewArray::from(string_data))
113113
} else {
114114
Arc::new(StringArray::from(string_data))

arrow-avro/src/lib.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@
3333
/// Implements the primary reader interface and record decoding logic.
3434
pub mod reader;
3535

36-
/// Avro schema parsing and representation
37-
///
38-
/// Provides types for parsing and representing Avro schema definitions.
39-
pub mod schema;
36+
// Avro schema parsing and representation
37+
//
38+
// Provides types for parsing and representing Avro schema definitions.
39+
mod schema;
4040

4141
/// Compression codec implementations for Avro
4242
///

arrow-avro/src/reader/mod.rs

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -42,20 +42,33 @@ mod vlq;
4242
/// let default_options = ReadOptions::default();
4343
///
4444
/// // Enable Utf8View support for better string performance
45-
/// let options = ReadOptions {
46-
/// use_utf8view: true,
47-
/// ..ReadOptions::default()
48-
/// };
45+
/// let options = ReadOptions::default()
46+
/// .with_utf8view(true);
4947
/// ```
50-
#[derive(Default)]
48+
#[derive(Default, Debug, Clone)]
5149
pub struct ReadOptions {
52-
/// If true, use StringViewArray instead of StringArray for string data
53-
///
54-
/// When this option is enabled, string data from Avro files will be loaded
55-
/// into Arrow's StringViewArray instead of the standard StringArray.
50+
use_utf8view: bool,
51+
}
52+
53+
impl ReadOptions {
54+
/// Create a new `ReadOptions` with default values
55+
pub fn new() -> Self {
56+
Self::default()
57+
}
58+
59+
/// Set whether to use StringViewArray for string data
5660
///
57-
/// Default: false
58-
pub use_utf8view: bool,
61+
/// When enabled, string data from Avro files will be loaded into
62+
/// Arrow's StringViewArray instead of the standard StringArray.
63+
pub fn with_utf8view(mut self, use_utf8view: bool) -> Self {
64+
self.use_utf8view = use_utf8view;
65+
self
66+
}
67+
68+
/// Get whether StringViewArray is enabled for string data
69+
pub fn use_utf8view(&self) -> bool {
70+
self.use_utf8view
71+
}
5972
}
6073

6174
/// Read a [`Header`] from the provided [`BufRead`]
@@ -131,11 +144,8 @@ mod test {
131144
let schema = header.schema().unwrap().unwrap();
132145
let root = AvroField::try_from(&schema).unwrap();
133146

134-
let mut decoder = if options.use_utf8view {
135-
RecordDecoder::try_new_with_options(root.data_type(), true).unwrap()
136-
} else {
137-
RecordDecoder::try_new(root.data_type()).unwrap()
138-
};
147+
let mut decoder =
148+
RecordDecoder::try_new_with_options(root.data_type(), options.clone()).unwrap();
139149

140150
for result in read_blocks(reader) {
141151
let block = result.unwrap();

arrow-avro/src/reader/record.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use crate::codec::{AvroDataType, Codec, Nullability};
1919
use crate::reader::block::{Block, BlockDecoder};
2020
use crate::reader::cursor::AvroCursor;
2121
use crate::reader::header::Header;
22+
use crate::reader::ReadOptions;
2223
use crate::schema::*;
2324
use arrow_array::types::*;
2425
use arrow_array::*;
@@ -38,28 +39,27 @@ pub struct RecordDecoder {
3839
}
3940

4041
impl RecordDecoder {
41-
/// Create a new [`RecordDecoder`] from the provided [`AvroDataType`]
42+
/// Create a new [`RecordDecoder`] from the provided [`AvroDataType`] with default options
4243
pub fn try_new(data_type: &AvroDataType) -> Result<Self, ArrowError> {
43-
Self::try_new_with_options(data_type, false)
44+
Self::try_new_with_options(data_type, ReadOptions::default())
4445
}
4546

4647
/// Create a new [`RecordDecoder`] from the provided [`AvroDataType`] with additional options
4748
///
4849
/// This method allows you to customize how the Avro data is decoded into Arrow arrays.
49-
/// In particular, it allows enabling Utf8View support for better string performance.
5050
///
5151
/// # Parameters
5252
/// * `data_type` - The Avro data type to decode
53-
/// * `use_utf8view` - If true, use StringViewArray instead of StringArray for string data
53+
/// * `options` - Configuration options for decoding
5454
pub fn try_new_with_options(
5555
data_type: &AvroDataType,
56-
use_utf8view: bool,
56+
options: ReadOptions,
5757
) -> Result<Self, ArrowError> {
5858
match Decoder::try_new(data_type)? {
5959
Decoder::Record(fields, encodings) => Ok(Self {
6060
schema: Arc::new(ArrowSchema::new(fields)),
6161
fields: encodings,
62-
use_utf8view,
62+
use_utf8view: options.use_utf8view(),
6363
}),
6464
encoding => Err(ArrowError::ParseError(format!(
6565
"Expected record got {encoding:?}"

0 commit comments

Comments
 (0)