Skip to content

Commit bffecb6

Browse files
committed
Merge branch 'restrict-decoder' into delta-packed-reader
2 parents 9b456fa + c618952 commit bffecb6

File tree

2 files changed

+126
-17
lines changed

2 files changed

+126
-17
lines changed

parquet/src/data_type.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,7 @@ pub(crate) mod private {
599599
+ super::SliceAsBytes
600600
+ PartialOrd
601601
+ Send
602+
+ crate::encodings::decoding::private::GetDecoder
602603
{
603604
/// Encode the value directly from a higher level encoder
604605
fn encode<W: std::io::Write>(

parquet/src/encodings/decoding.rs

Lines changed: 125 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use std::{cmp, marker::PhantomData, mem};
2222
use super::rle::RleDecoder;
2323

2424
use crate::basic::*;
25-
use crate::data_type::private::*;
25+
use crate::data_type::private::ParquetValueType;
2626
use crate::data_type::*;
2727
use crate::errors::{ParquetError, Result};
2828
use crate::schema::types::ColumnDescPtr;
@@ -31,6 +31,111 @@ use crate::util::{
3131
memory::{ByteBuffer, ByteBufferPtr},
3232
};
3333

34+
pub(crate) mod private {
35+
use super::*;
36+
37+
/// A trait that allows getting a [`Decoder`] implementation for a [`DataType`] with
38+
/// the corresponding [`ParquetValueType`]. This is necessary to support
39+
/// [`Decoder`] implementations that may not be applicable for all [`DataType`]
40+
/// and by extension all [`ParquetValueType`]
41+
pub trait GetDecoder {
42+
fn get_decoder<T: DataType<T = Self>>(
43+
descr: ColumnDescPtr,
44+
encoding: Encoding,
45+
) -> Result<Box<dyn Decoder<T>>> {
46+
get_decoder_default(descr, encoding)
47+
}
48+
}
49+
50+
fn get_decoder_default<T: DataType>(
51+
descr: ColumnDescPtr,
52+
encoding: Encoding,
53+
) -> Result<Box<dyn Decoder<T>>> {
54+
match encoding {
55+
Encoding::PLAIN => Ok(Box::new(PlainDecoder::new(descr.type_length()))),
56+
Encoding::RLE_DICTIONARY | Encoding::PLAIN_DICTIONARY => Err(general_err!(
57+
"Cannot initialize this encoding through this function"
58+
)),
59+
Encoding::RLE
60+
| Encoding::DELTA_BINARY_PACKED
61+
| Encoding::DELTA_BYTE_ARRAY
62+
| Encoding::DELTA_LENGTH_BYTE_ARRAY => Err(general_err!(
63+
"Encoding {} is not supported for type",
64+
encoding
65+
)),
66+
e => Err(nyi_err!("Encoding {} is not supported", e)),
67+
}
68+
}
69+
70+
impl GetDecoder for bool {
71+
fn get_decoder<T: DataType<T = Self>>(
72+
descr: ColumnDescPtr,
73+
encoding: Encoding,
74+
) -> Result<Box<dyn Decoder<T>>> {
75+
match encoding {
76+
Encoding::RLE => Ok(Box::new(RleValueDecoder::new())),
77+
_ => get_decoder_default(descr, encoding),
78+
}
79+
}
80+
}
81+
82+
impl GetDecoder for i32 {
83+
fn get_decoder<T: DataType<T = Self>>(
84+
descr: ColumnDescPtr,
85+
encoding: Encoding,
86+
) -> Result<Box<dyn Decoder<T>>> {
87+
match encoding {
88+
Encoding::DELTA_BINARY_PACKED => Ok(Box::new(DeltaBitPackDecoder::new())),
89+
_ => get_decoder_default(descr, encoding),
90+
}
91+
}
92+
}
93+
94+
impl GetDecoder for i64 {
95+
fn get_decoder<T: DataType<T = Self>>(
96+
descr: ColumnDescPtr,
97+
encoding: Encoding,
98+
) -> Result<Box<dyn Decoder<T>>> {
99+
match encoding {
100+
Encoding::DELTA_BINARY_PACKED => Ok(Box::new(DeltaBitPackDecoder::new())),
101+
_ => get_decoder_default(descr, encoding),
102+
}
103+
}
104+
}
105+
106+
impl GetDecoder for f32 {}
107+
impl GetDecoder for f64 {}
108+
109+
impl GetDecoder for ByteArray {
110+
fn get_decoder<T: DataType<T = Self>>(
111+
descr: ColumnDescPtr,
112+
encoding: Encoding,
113+
) -> Result<Box<dyn Decoder<T>>> {
114+
match encoding {
115+
Encoding::DELTA_BYTE_ARRAY => Ok(Box::new(DeltaByteArrayDecoder::new())),
116+
Encoding::DELTA_LENGTH_BYTE_ARRAY => {
117+
Ok(Box::new(DeltaLengthByteArrayDecoder::new()))
118+
}
119+
_ => get_decoder_default(descr, encoding),
120+
}
121+
}
122+
}
123+
124+
impl GetDecoder for FixedLenByteArray {
125+
fn get_decoder<T: DataType<T = Self>>(
126+
descr: ColumnDescPtr,
127+
encoding: Encoding,
128+
) -> Result<Box<dyn Decoder<T>>> {
129+
match encoding {
130+
Encoding::DELTA_BYTE_ARRAY => Ok(Box::new(DeltaByteArrayDecoder::new())),
131+
_ => get_decoder_default(descr, encoding),
132+
}
133+
}
134+
}
135+
136+
impl GetDecoder for Int96 {}
137+
}
138+
34139
// ----------------------------------------------------------------------
35140
// Decoders
36141

@@ -109,20 +214,8 @@ pub fn get_decoder<T: DataType>(
109214
descr: ColumnDescPtr,
110215
encoding: Encoding,
111216
) -> Result<Box<dyn Decoder<T>>> {
112-
let decoder: Box<dyn Decoder<T>> = match encoding {
113-
Encoding::PLAIN => Box::new(PlainDecoder::new(descr.type_length())),
114-
Encoding::RLE_DICTIONARY | Encoding::PLAIN_DICTIONARY => {
115-
return Err(general_err!(
116-
"Cannot initialize this encoding through this function"
117-
));
118-
}
119-
Encoding::RLE => Box::new(RleValueDecoder::new()),
120-
Encoding::DELTA_BINARY_PACKED => Box::new(DeltaBitPackDecoder::new()),
121-
Encoding::DELTA_LENGTH_BYTE_ARRAY => Box::new(DeltaLengthByteArrayDecoder::new()),
122-
Encoding::DELTA_BYTE_ARRAY => Box::new(DeltaByteArrayDecoder::new()),
123-
e => return Err(nyi_err!("Encoding {} is not supported", e)),
124-
};
125-
Ok(decoder)
217+
use self::private::GetDecoder;
218+
T::T::get_decoder(descr, encoding)
126219
}
127220

128221
// ----------------------------------------------------------------------
@@ -817,8 +910,11 @@ mod tests {
817910
// supported encodings
818911
create_and_check_decoder::<Int32Type>(Encoding::PLAIN, None);
819912
create_and_check_decoder::<Int32Type>(Encoding::DELTA_BINARY_PACKED, None);
820-
create_and_check_decoder::<Int32Type>(Encoding::DELTA_LENGTH_BYTE_ARRAY, None);
821-
create_and_check_decoder::<Int32Type>(Encoding::DELTA_BYTE_ARRAY, None);
913+
create_and_check_decoder::<ByteArrayType>(
914+
Encoding::DELTA_LENGTH_BYTE_ARRAY,
915+
None,
916+
);
917+
create_and_check_decoder::<ByteArrayType>(Encoding::DELTA_BYTE_ARRAY, None);
822918
create_and_check_decoder::<BoolType>(Encoding::RLE, None);
823919

824920
// error when initializing
@@ -834,6 +930,18 @@ mod tests {
834930
"Cannot initialize this encoding through this function"
835931
)),
836932
);
933+
create_and_check_decoder::<Int32Type>(
934+
Encoding::DELTA_LENGTH_BYTE_ARRAY,
935+
Some(general_err!(
936+
"Encoding DELTA_LENGTH_BYTE_ARRAY is not supported for type"
937+
)),
938+
);
939+
create_and_check_decoder::<Int32Type>(
940+
Encoding::DELTA_BYTE_ARRAY,
941+
Some(general_err!(
942+
"Encoding DELTA_BYTE_ARRAY is not supported for type"
943+
)),
944+
);
837945

838946
// unsupported
839947
create_and_check_decoder::<Int32Type>(

0 commit comments

Comments
 (0)