Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion parquet/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ repository = { workspace = true }
authors = { workspace = true }
keywords = ["arrow", "parquet", "hadoop"]
readme = "README.md"
edition = { workspace = true }
edition = "2024"
rust-version = { workspace = true }

[target.'cfg(target_arch = "wasm32")'.dependencies]
Expand Down
8 changes: 4 additions & 4 deletions parquet/benches/arrow_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ use arrow::array::Array;
use arrow::datatypes::DataType;
use arrow_schema::Field;
use criterion::measurement::WallTime;
use criterion::{criterion_group, criterion_main, BenchmarkGroup, Criterion};
use criterion::{BenchmarkGroup, Criterion, criterion_group, criterion_main};
use half::f16;
use num_bigint::BigInt;
use num_traits::FromPrimitive;
use parquet::arrow::array_reader::{
make_byte_array_reader, make_byte_view_array_reader, make_fixed_len_byte_array_reader,
ListArrayReader,
ListArrayReader, make_byte_array_reader, make_byte_view_array_reader,
make_fixed_len_byte_array_reader,
};
use parquet::basic::Type;
use parquet::data_type::{ByteArray, FixedLenByteArrayType};
Expand All @@ -38,7 +38,7 @@ use parquet::{
schema::types::{ColumnDescPtr, SchemaDescPtr},
};
use rand::distr::uniform::SampleUniform;
use rand::{rngs::StdRng, Rng, SeedableRng};
use rand::{Rng, SeedableRng, rngs::StdRng};
use std::{collections::VecDeque, sync::Arc};

fn build_test_schema() -> SchemaDescPtr {
Expand Down
2 changes: 1 addition & 1 deletion parquet/benches/arrow_reader_clickbench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use arrow::compute::{like, nlike, or};
use arrow_array::types::{Int16Type, Int32Type, Int64Type};
use arrow_array::{ArrayRef, ArrowPrimitiveType, BooleanArray, PrimitiveArray, StringViewArray};
use arrow_schema::{ArrowError, DataType, Schema};
use criterion::{criterion_group, criterion_main, Criterion};
use criterion::{Criterion, criterion_group, criterion_main};
use futures::StreamExt;
use parquet::arrow::arrow_reader::{
ArrowPredicate, ArrowPredicateFn, ArrowReaderMetadata, ArrowReaderOptions,
Expand Down
6 changes: 3 additions & 3 deletions parquet/benches/arrow_reader_row_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ use arrow::compute::and;
use arrow::compute::kernels::cmp::{eq, gt, lt, neq};
use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
use arrow::record_batch::RecordBatch;
use arrow_array::builder::{ArrayBuilder, StringViewBuilder};
use arrow_array::StringViewArray;
use arrow_array::builder::{ArrayBuilder, StringViewBuilder};
use arrow_cast::pretty::pretty_format_batches;
use bytes::Bytes;
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
use futures::future::BoxFuture;
use futures::{FutureExt, StreamExt};
use parquet::arrow::arrow_reader::{
Expand All @@ -72,7 +72,7 @@ use parquet::arrow::{ArrowWriter, ParquetRecordBatchStreamBuilder, ProjectionMas
use parquet::basic::Compression;
use parquet::file::metadata::{PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader};
use parquet::file::properties::WriterProperties;
use rand::{rngs::StdRng, Rng, SeedableRng};
use rand::{Rng, SeedableRng, rngs::StdRng};
use std::ops::Range;
use std::sync::Arc;

Expand Down
4 changes: 2 additions & 2 deletions parquet/benches/arrow_statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ use arrow_schema::{
DataType::{self, *},
Field, Schema,
};
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
use parquet::{arrow::arrow_reader::ArrowReaderOptions, file::properties::WriterProperties};
use parquet::{
arrow::{arrow_reader::ArrowReaderBuilder, ArrowWriter},
arrow::{ArrowWriter, arrow_reader::ArrowReaderBuilder},
file::properties::EnabledStatistics,
};
use std::sync::Arc;
Expand Down
2 changes: 1 addition & 1 deletion parquet/benches/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use parquet::basic::{Encoding, Type as ParquetType};
use parquet::data_type::{
DataType, DoubleType, FixedLenByteArray, FixedLenByteArrayType, FloatType,
};
use parquet::decoding::{get_decoder, Decoder};
use parquet::decoding::{Decoder, get_decoder};
use parquet::encoding::get_encoder;
use parquet::schema::types::{ColumnDescPtr, ColumnDescriptor, ColumnPath, Type};
use rand::prelude::*;
Expand Down
2 changes: 1 addition & 1 deletion parquet/examples/read_with_rowgroup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use arrow::util::pretty::print_batches;
use bytes::{Buf, Bytes};
use parquet::arrow::arrow_reader::{ParquetRecordBatchReader, RowGroups, RowSelection};
use parquet::arrow::async_reader::AsyncFileReader;
use parquet::arrow::{parquet_to_arrow_field_levels, ProjectionMask};
use parquet::arrow::{ProjectionMask, parquet_to_arrow_field_levels};
use parquet::column::page::{PageIterator, PageReader};
use parquet::errors::{ParquetError, Result};
use parquet::file::metadata::RowGroupMetaData;
Expand Down
8 changes: 4 additions & 4 deletions parquet/src/arrow/array_reader/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,20 @@ use std::sync::{Arc, Mutex};

use arrow_schema::{DataType, Fields, SchemaBuilder};

use crate::arrow::ProjectionMask;
use crate::arrow::array_reader::byte_view_array::make_byte_view_array_reader;
use crate::arrow::array_reader::cached_array_reader::CacheRole;
use crate::arrow::array_reader::cached_array_reader::CachedArrayReader;
use crate::arrow::array_reader::empty_array::make_empty_array_reader;
use crate::arrow::array_reader::fixed_len_byte_array::make_fixed_len_byte_array_reader;
use crate::arrow::array_reader::row_group_cache::RowGroupCache;
use crate::arrow::array_reader::{
make_byte_array_dictionary_reader, make_byte_array_reader, ArrayReader,
FixedSizeListArrayReader, ListArrayReader, MapArrayReader, NullArrayReader,
PrimitiveArrayReader, RowGroups, StructArrayReader,
ArrayReader, FixedSizeListArrayReader, ListArrayReader, MapArrayReader, NullArrayReader,
PrimitiveArrayReader, RowGroups, StructArrayReader, make_byte_array_dictionary_reader,
make_byte_array_reader,
};
use crate::arrow::arrow_reader::metrics::ArrowReaderMetrics;
use crate::arrow::schema::{ParquetField, ParquetFieldType};
use crate::arrow::ProjectionMask;
use crate::basic::Type as PhysicalType;
use crate::data_type::{BoolType, DoubleType, FloatType, Int32Type, Int64Type, Int96Type};
use crate::errors::{ParquetError, Result};
Expand Down
4 changes: 2 additions & 2 deletions parquet/src/arrow/array_reader/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::arrow::array_reader::{read_records, skip_records, ArrayReader};
use crate::arrow::array_reader::{ArrayReader, read_records, skip_records};
use crate::arrow::buffer::bit_util::sign_extend_be;
use crate::arrow::buffer::offset_buffer::OffsetBuffer;
use crate::arrow::decoder::{DeltaByteArrayDecoder, DictIndexDecoder};
Expand Down Expand Up @@ -286,7 +286,7 @@ impl ByteArrayDecoder {
return Err(general_err!(
"unsupported encoding for byte array: {}",
encoding
))
));
}
};

Expand Down
4 changes: 2 additions & 2 deletions parquet/src/arrow/array_reader/byte_array_dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ use std::any::Any;
use std::marker::PhantomData;
use std::sync::Arc;

use arrow_array::{new_empty_array, Array, ArrayRef, OffsetSizeTrait};
use arrow_array::{Array, ArrayRef, OffsetSizeTrait, new_empty_array};
use arrow_buffer::ArrowNativeType;
use arrow_schema::DataType as ArrowType;
use bytes::Bytes;

use crate::arrow::array_reader::byte_array::{ByteArrayDecoder, ByteArrayDecoderPlain};
use crate::arrow::array_reader::{read_records, skip_records, ArrayReader};
use crate::arrow::array_reader::{ArrayReader, read_records, skip_records};
use crate::arrow::buffer::{dictionary_buffer::DictionaryBuffer, offset_buffer::OffsetBuffer};
use crate::arrow::record_reader::GenericRecordReader;
use crate::arrow::schema::parquet_to_arrow_field;
Expand Down
6 changes: 3 additions & 3 deletions parquet/src/arrow/array_reader/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::arrow::array_reader::{read_records, skip_records, ArrayReader};
use crate::arrow::array_reader::{ArrayReader, read_records, skip_records};
use crate::arrow::buffer::view_buffer::ViewBuffer;
use crate::arrow::decoder::{DeltaByteArrayDecoder, DictIndexDecoder};
use crate::arrow::record_reader::GenericRecordReader;
Expand All @@ -28,7 +28,7 @@ use crate::encodings::decoding::{Decoder, DeltaBitPackDecoder};
use crate::errors::{ParquetError, Result};
use crate::schema::types::ColumnDescPtr;
use crate::util::utf8::check_valid_utf8;
use arrow_array::{builder::make_view, ArrayRef};
use arrow_array::{ArrayRef, builder::make_view};
use arrow_buffer::Buffer;
use arrow_data::ByteView;
use arrow_schema::DataType as ArrowType;
Expand Down Expand Up @@ -248,7 +248,7 @@ impl ByteViewArrayDecoder {
return Err(general_err!(
"unsupported encoding for byte array: {}",
encoding
))
));
}
};

Expand Down
6 changes: 3 additions & 3 deletions parquet/src/arrow/array_reader/cached_array_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
//! [`CachedArrayReader`] wrapper around [`ArrayReader`]

use crate::arrow::array_reader::row_group_cache::BatchID;
use crate::arrow::array_reader::{row_group_cache::RowGroupCache, ArrayReader};
use crate::arrow::array_reader::{ArrayReader, row_group_cache::RowGroupCache};
use crate::arrow::arrow_reader::metrics::ArrowReaderMetrics;
use crate::errors::Result;
use arrow_array::{new_empty_array, ArrayRef, BooleanArray};
use arrow_array::{ArrayRef, BooleanArray, new_empty_array};
use arrow_buffer::BooleanBufferBuilder;
use arrow_schema::DataType as ArrowType;
use std::any::Any;
Expand Down Expand Up @@ -351,8 +351,8 @@ impl ArrayReader for CachedArrayReader {
#[cfg(test)]
mod tests {
use super::*;
use crate::arrow::array_reader::row_group_cache::RowGroupCache;
use crate::arrow::array_reader::ArrayReader;
use crate::arrow::array_reader::row_group_cache::RowGroupCache;
use arrow_array::{ArrayRef, Int32Array};
use std::sync::{Arc, Mutex};

Expand Down
16 changes: 8 additions & 8 deletions parquet/src/arrow/array_reader/fixed_len_byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,22 @@
// specific language governing permissions and limitations
// under the License.

use crate::arrow::array_reader::{read_records, skip_records, ArrayReader};
use crate::arrow::array_reader::{ArrayReader, read_records, skip_records};
use crate::arrow::buffer::bit_util::{iter_set_bits_rev, sign_extend_be};
use crate::arrow::decoder::{DeltaByteArrayDecoder, DictIndexDecoder};
use crate::arrow::record_reader::buffer::ValuesBuffer;
use crate::arrow::record_reader::GenericRecordReader;
use crate::arrow::record_reader::buffer::ValuesBuffer;
use crate::arrow::schema::parquet_to_arrow_field;
use crate::basic::{Encoding, Type};
use crate::column::page::PageIterator;
use crate::column::reader::decoder::ColumnValueDecoder;
use crate::errors::{ParquetError, Result};
use crate::schema::types::ColumnDescPtr;
use arrow_array::{
ArrayRef, Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array,
ArrayRef, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
FixedSizeBinaryArray, Float16Array, IntervalDayTimeArray, IntervalYearMonthArray,
};
use arrow_buffer::{i256, Buffer, IntervalDayTime};
use arrow_buffer::{Buffer, IntervalDayTime, i256};
use arrow_data::ArrayDataBuilder;
use arrow_schema::{DataType as ArrowType, IntervalUnit};
use bytes::Bytes;
Expand Down Expand Up @@ -59,7 +59,7 @@ pub fn make_fixed_len_byte_array_reader(
return Err(general_err!(
"invalid physical type for fixed length byte array reader - {}",
t
))
));
}
};
match &data_type {
Expand Down Expand Up @@ -117,7 +117,7 @@ pub fn make_fixed_len_byte_array_reader(
return Err(general_err!(
"invalid data type for fixed length byte array reader - {}",
data_type
))
));
}
}

Expand Down Expand Up @@ -394,7 +394,7 @@ impl ColumnValueDecoder for ValueDecoder {
return Err(general_err!(
"unsupported encoding for fixed length byte array: {}",
encoding
))
));
}
});
Ok(())
Expand Down Expand Up @@ -518,8 +518,8 @@ enum Decoder {
#[cfg(test)]
mod tests {
use super::*;
use crate::arrow::arrow_reader::ParquetRecordBatchReader;
use crate::arrow::ArrowWriter;
use crate::arrow::arrow_reader::ParquetRecordBatchReader;
use arrow::datatypes::Field;
use arrow::error::Result as ArrowResult;
use arrow_array::{Array, ListArray};
Expand Down
10 changes: 5 additions & 5 deletions parquet/src/arrow/array_reader/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ use crate::arrow::array_reader::ArrayReader;
use crate::errors::ParquetError;
use crate::errors::Result;
use arrow_array::FixedSizeListArray;
use arrow_array::{builder::BooleanBufferBuilder, new_empty_array, Array, ArrayRef};
use arrow_data::{transform::MutableArrayData, ArrayData};
use arrow_array::{Array, ArrayRef, builder::BooleanBufferBuilder, new_empty_array};
use arrow_data::{ArrayData, transform::MutableArrayData};
use arrow_schema::DataType as ArrowType;

/// Implementation of fixed-size list array reader.
Expand Down Expand Up @@ -225,15 +225,15 @@ impl ArrayReader for FixedSizeListArrayReader {
mod tests {
use super::*;
use crate::arrow::{
array_reader::{test_util::InMemoryArrayReader, ListArrayReader},
arrow_reader::{ArrowReaderBuilder, ArrowReaderOptions, ParquetRecordBatchReader},
ArrowWriter,
array_reader::{ListArrayReader, test_util::InMemoryArrayReader},
arrow_reader::{ArrowReaderBuilder, ArrowReaderOptions, ParquetRecordBatchReader},
};
use arrow::datatypes::{Field, Int32Type};
use arrow_array::{
FixedSizeListArray, ListArray, PrimitiveArray, RecordBatch,
builder::{FixedSizeListBuilder, Int32Builder, ListBuilder},
cast::AsArray,
FixedSizeListArray, ListArray, PrimitiveArray, RecordBatch,
};
use arrow_buffer::Buffer;
use arrow_data::ArrayDataBuilder;
Expand Down
10 changes: 5 additions & 5 deletions parquet/src/arrow/array_reader/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ use crate::arrow::array_reader::ArrayReader;
use crate::errors::ParquetError;
use crate::errors::Result;
use arrow_array::{
builder::BooleanBufferBuilder, new_empty_array, Array, ArrayRef, GenericListArray,
OffsetSizeTrait,
Array, ArrayRef, GenericListArray, OffsetSizeTrait, builder::BooleanBufferBuilder,
new_empty_array,
};
use arrow_buffer::Buffer;
use arrow_buffer::ToByteSlice;
use arrow_data::{transform::MutableArrayData, ArrayData};
use arrow_data::{ArrayData, transform::MutableArrayData};
use arrow_schema::DataType as ArrowType;
use std::any::Any;
use std::cmp::Ordering;
Expand Down Expand Up @@ -246,12 +246,12 @@ impl<OffsetSize: OffsetSizeTrait> ArrayReader for ListArrayReader<OffsetSize> {
#[cfg(test)]
mod tests {
use super::*;
use crate::arrow::array_reader::ArrayReaderBuilder;
use crate::arrow::array_reader::list_array::ListArrayReader;
use crate::arrow::array_reader::test_util::InMemoryArrayReader;
use crate::arrow::array_reader::ArrayReaderBuilder;
use crate::arrow::arrow_reader::metrics::ArrowReaderMetrics;
use crate::arrow::schema::parquet_to_arrow_schema_and_fields;
use crate::arrow::{parquet_to_arrow_schema, ArrowWriter, ProjectionMask};
use crate::arrow::{ArrowWriter, ProjectionMask, parquet_to_arrow_schema};
use crate::file::properties::WriterProperties;
use crate::file::reader::{FileReader, SerializedFileReader};
use crate::schema::parser::parse_message_type;
Expand Down
4 changes: 2 additions & 2 deletions parquet/src/arrow/array_reader/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,12 @@ impl ArrayReader for MapArrayReader {
#[cfg(test)]
mod tests {
use super::*;
use crate::arrow::arrow_reader::ParquetRecordBatchReader;
use crate::arrow::ArrowWriter;
use crate::arrow::arrow_reader::ParquetRecordBatchReader;
use arrow::datatypes::{Field, Int32Type, Schema};
use arrow_array::RecordBatch;
use arrow_array::builder::{MapBuilder, PrimitiveBuilder, StringBuilder};
use arrow_array::cast::*;
use arrow_array::RecordBatch;
use arrow_schema::Fields;
use bytes::Bytes;

Expand Down
2 changes: 1 addition & 1 deletion parquet/src/arrow/array_reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ use arrow_schema::DataType as ArrowType;
use std::any::Any;
use std::sync::Arc;

use crate::arrow::record_reader::buffer::ValuesBuffer;
use crate::arrow::record_reader::GenericRecordReader;
use crate::arrow::record_reader::buffer::ValuesBuffer;
use crate::column::page::PageIterator;
use crate::column::reader::decoder::ColumnValueDecoder;
use crate::file::reader::{FilePageIterator, FileReader};
Expand Down
2 changes: 1 addition & 1 deletion parquet/src/arrow/array_reader/null_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::arrow::array_reader::{read_records, skip_records, ArrayReader};
use crate::arrow::array_reader::{ArrayReader, read_records, skip_records};
use crate::arrow::record_reader::RecordReader;
use crate::column::page::PageIterator;
use crate::data_type::DataType;
Expand Down
Loading
Loading