Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
377 changes: 15 additions & 362 deletions parquet/src/arrow/async_reader/metadata.rs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion parquet/src/arrow/async_reader/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ impl ParquetObjectReader {
}

/// Provide a hint as to the size of the parquet file's footer,
/// see [fetch_parquet_metadata](crate::arrow::async_reader::fetch_parquet_metadata)
/// see [`ParquetMetaDataReader::with_prefetch_hint`]
pub fn with_footer_size_hint(self, hint: usize) -> Self {
Self {
metadata_size_hint: Some(hint),
Expand Down
3 changes: 0 additions & 3 deletions parquet/src/arrow/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,6 @@ pub use self::async_reader::ParquetRecordBatchStreamBuilder;
pub use self::async_writer::AsyncArrowWriter;
use crate::schema::types::{SchemaDescriptor, Type};
use arrow_schema::{FieldRef, Schema};
// continue to export deprecated methods until they are removed
#[allow(deprecated)]
pub use self::schema::arrow_to_parquet_schema;

pub use self::schema::{
add_encoded_arrow_schema_to_metadata, encode_arrow_schema, parquet_to_arrow_field_levels,
Expand Down
9 changes: 0 additions & 9 deletions parquet/src/arrow/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -358,15 +358,6 @@ impl<'a> ArrowSchemaConverter<'a> {
}
}

/// Convert arrow schema to parquet schema
///
/// The name of the root schema element defaults to `"arrow_schema"`, this can be
/// overridden with [`ArrowSchemaConverter`]
#[deprecated(since = "54.0.0", note = "Use `ArrowSchemaConverter` instead")]
pub fn arrow_to_parquet_schema(schema: &Schema) -> Result<SchemaDescriptor> {
ArrowSchemaConverter::new().convert(schema)
}

fn parse_key_value_metadata(
key_value_metadata: Option<&Vec<KeyValue>>,
) -> Option<HashMap<String, String>> {
Expand Down
10 changes: 0 additions & 10 deletions parquet/src/column/writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,16 +102,6 @@ impl ColumnWriter<'_> {
}
}

#[deprecated(
since = "54.0.0",
note = "Seems like a stray and nobody knows what's it for. Will be removed in the next release."
)]
#[allow(missing_docs)]
pub enum Level {
Page,
Column,
}

/// Gets a specific column writer corresponding to column descriptor `descr`.
pub fn get_column_writer<'a>(
descr: ColumnDescPtr,
Expand Down
26 changes: 0 additions & 26 deletions parquet/src/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,6 @@ impl Int96 {
self.value = [elem0, elem1, elem2];
}

/// Converts this INT96 into an i64 representing the number of MILLISECONDS since Epoch
#[deprecated(since = "54.0.0", note = "Use `to_millis` instead")]
pub fn to_i64(&self) -> i64 {
self.to_millis()
}

/// Converts this INT96 into an i64 representing the number of SECONDS since EPOCH
///
/// Will wrap around on overflow
Expand Down Expand Up @@ -1214,26 +1208,6 @@ pub trait DataType: 'static + Send {
Self: Sized;
}

// Workaround bug in specialization
#[deprecated(
since = "54.0.0",
note = "Seems like a stray and nobody knows what's it for. Will be removed in 55.0.0"
)]
#[allow(missing_docs)]
pub trait SliceAsBytesDataType: DataType
where
Self::T: SliceAsBytes,
{
}

#[allow(deprecated)]
impl<T> SliceAsBytesDataType for T
where
T: DataType,
<T as DataType>::T: SliceAsBytes,
{
}

macro_rules! make_type {
($name:ident, $reader_ident: ident, $writer_ident: ident, $native_ty:ty, $size:expr) => {
#[doc = concat!("Parquet physical type: ", stringify!($name))]
Expand Down
81 changes: 0 additions & 81 deletions parquet/src/file/footer.rs

This file was deleted.

30 changes: 0 additions & 30 deletions parquet/src/file/metadata/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,22 +208,6 @@ impl ParquetMetaData {
self.file_decryptor = file_decryptor;
}

/// Creates Parquet metadata from file metadata, a list of row
/// group metadata, and the column index structures.
#[deprecated(since = "53.1.0", note = "Use ParquetMetaDataBuilder")]
pub fn new_with_page_index(
file_metadata: FileMetaData,
row_groups: Vec<RowGroupMetaData>,
column_index: Option<ParquetColumnIndex>,
offset_index: Option<ParquetOffsetIndex>,
) -> Self {
ParquetMetaDataBuilder::new(file_metadata)
.set_row_groups(row_groups)
.set_column_index(column_index)
.set_offset_index(offset_index)
.build()
}

/// Convert this ParquetMetaData into a [`ParquetMetaDataBuilder`]
pub fn into_builder(self) -> ParquetMetaDataBuilder {
self.into()
Expand Down Expand Up @@ -1397,20 +1381,6 @@ impl ColumnChunkMetaDataBuilder {
self
}

/// Sets file offset in bytes.
///
/// This field was meant to provide an alternate to storing `ColumnMetadata` directly in
/// the `ColumnChunkMetadata`. However, most Parquet readers assume the `ColumnMetadata`
/// is stored inline and ignore this field.
#[deprecated(
since = "53.0.0",
note = "The Parquet specification requires this field to be 0"
)]
pub fn set_file_offset(mut self, value: i64) -> Self {
self.0.file_offset = value;
self
}

/// Sets number of values.
pub fn set_num_values(mut self, value: i64) -> Self {
self.0.num_values = value;
Expand Down
1 change: 0 additions & 1 deletion parquet/src/file/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@
//! ```
#[cfg(feature = "encryption")]
pub mod column_crypto_metadata;
pub mod footer;
pub mod metadata;
pub mod page_encoding_stats;
pub mod page_index;
Expand Down
49 changes: 2 additions & 47 deletions parquet/src/file/page_index/index_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

//! Support for reading [`Index`] and [`PageLocation`] from parquet metadata.
//! Support for reading [`Index`] and [`OffsetIndex`] from parquet metadata.

use crate::basic::Type;
use crate::data_type::Int96;
Expand All @@ -24,7 +24,7 @@ use crate::file::metadata::ColumnChunkMetaData;
use crate::file::page_index::index::{Index, NativeIndex};
use crate::file::page_index::offset_index::OffsetIndexMetaData;
use crate::file::reader::ChunkReader;
use crate::format::{ColumnIndex, OffsetIndex, PageLocation};
use crate::format::{ColumnIndex, OffsetIndex};
use crate::thrift::{TCompactSliceInputProtocol, TSerializable};
use std::ops::Range;

Expand Down Expand Up @@ -83,45 +83,6 @@ pub fn read_columns_indexes<R: ChunkReader>(
.transpose()
}

/// Reads [`OffsetIndex`], per-page [`PageLocation`] for all columns of a row
/// group.
///
/// Returns a vector of `location[column_number][page_number]`
///
/// Return an empty vector if this row group does not contain an
/// [`OffsetIndex]`.
///
/// See [Page Index Documentation] for more details.
///
/// [Page Index Documentation]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
#[deprecated(since = "53.0.0", note = "Use read_offset_indexes")]
pub fn read_pages_locations<R: ChunkReader>(
reader: &R,
chunks: &[ColumnChunkMetaData],
) -> Result<Vec<Vec<PageLocation>>, ParquetError> {
let fetch = chunks
.iter()
.fold(None, |range, c| acc_range(range, c.offset_index_range()));

let fetch = match fetch {
Some(r) => r,
None => return Ok(vec![]),
};

let bytes = reader.get_bytes(fetch.start as _, (fetch.end - fetch.start).try_into()?)?;

chunks
.iter()
.map(|c| match c.offset_index_range() {
Some(r) => decode_page_locations(
&bytes[usize::try_from(r.start - fetch.start)?
..usize::try_from(r.end - fetch.start)?],
),
None => Err(general_err!("missing offset index")),
})
.collect()
}

/// Reads per-column [`OffsetIndexMetaData`] for all columns of a row group by
/// decoding [`OffsetIndex`] .
///
Expand Down Expand Up @@ -172,12 +133,6 @@ pub(crate) fn decode_offset_index(data: &[u8]) -> Result<OffsetIndexMetaData, Pa
OffsetIndexMetaData::try_new(offset)
}

pub(crate) fn decode_page_locations(data: &[u8]) -> Result<Vec<PageLocation>, ParquetError> {
let mut prot = TCompactSliceInputProtocol::new(data);
let offset = OffsetIndex::read_from_in_protocol(&mut prot)?;
Ok(offset.page_locations)
}

pub(crate) fn decode_column_index(data: &[u8], column_type: Type) -> Result<Index, ParquetError> {
let mut prot = TCompactSliceInputProtocol::new(data);

Expand Down
Loading
Loading