Skip to content

Commit e8ff5cb

Browse files
committed
Move state machine into ParquetMetadataDecoder
1 parent d46cc53 commit e8ff5cb

File tree

3 files changed

+457
-161
lines changed

3 files changed

+457
-161
lines changed

parquet/src/file/metadata/parser.rs

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,86 @@ use crate::encryption::{
4343
#[cfg(feature = "encryption")]
4444
use crate::format::EncryptionAlgorithm;
4545

46+
/// Helper struct for metadata parsing
47+
///
48+
/// This structure parses thrift-encoded bytes into the correct Rust structs,
49+
/// such as [`ParquetMetaData`], handling decryption if necessary.
50+
//
51+
// Note this structure is used to minimize the number of
52+
// places need to add `#[cfg(feature = "encryption")]` checks.
53+
pub(crate) use inner::MetadataParser;
54+
55+
#[cfg(feature = "encryption")]
56+
mod inner {
57+
use super::*;
58+
use crate::encryption::decrypt::FileDecryptionProperties;
59+
use crate::errors::Result;
60+
61+
/// API for decoding metadata that may be encrypted
62+
#[derive(Debug, Default)]
63+
pub(crate) struct MetadataParser {
64+
// the credentials and keys needed to decrypt metadata
65+
file_decryption_properties: Option<Arc<FileDecryptionProperties>>,
66+
}
67+
68+
impl MetadataParser {
69+
pub fn new() -> Self {
70+
MetadataParser::default()
71+
}
72+
73+
pub(crate) fn with_file_decryption_properties(
74+
mut self,
75+
file_decryption_properties: Option<Arc<FileDecryptionProperties>>,
76+
) -> Self {
77+
self.file_decryption_properties = file_decryption_properties;
78+
self
79+
}
80+
81+
pub(crate) fn decode_metadata(
82+
&self,
83+
buf: &[u8],
84+
encrypted_footer: bool,
85+
) -> Result<ParquetMetaData> {
86+
decode_metadata_with_encryption(
87+
buf,
88+
encrypted_footer,
89+
self.file_decryption_properties.as_deref(),
90+
)
91+
}
92+
}
93+
}
94+
95+
#[cfg(not(feature = "encryption"))]
96+
mod inner {
97+
use super::*;
98+
use crate::errors::Result;
99+
/// parallel implementation when encryption feature is not enabled
100+
///
101+
/// This has the same API as the encryption-enabled version
102+
#[derive(Debug, Default)]
103+
pub(crate) struct MetadataParser;
104+
105+
impl MetadataParser {
106+
pub(crate) fn new() -> Self {
107+
MetadataParser
108+
}
109+
110+
pub(crate) fn decode_metadata(
111+
&self,
112+
buf: &[u8],
113+
encrypted_footer: bool,
114+
) -> Result<ParquetMetaData> {
115+
if encrypted_footer {
116+
Err(general_err!(
117+
"Parquet file has an encrypted footer but the encryption feature is disabled"
118+
))
119+
} else {
120+
decode_metadata(buf)
121+
}
122+
}
123+
}
124+
}
125+
46126
/// Decodes [`ParquetMetaData`] from the provided bytes.
47127
///
48128
/// Typically this is used to decode the metadata from the end of a parquet
@@ -288,7 +368,7 @@ fn parse_single_offset_index(
288368
/// [Parquet Spec]: https://github.com/apache/parquet-format#metadata
289369
/// [Parquet Encryption Spec]: https://parquet.apache.org/docs/file-format/data-pages/encryption/
290370
#[cfg(feature = "encryption")]
291-
pub(crate) fn decode_metadata_with_encryption(
371+
fn decode_metadata_with_encryption(
292372
buf: &[u8],
293373
encrypted_footer: bool,
294374
file_decryption_properties: Option<&FileDecryptionProperties>,

0 commit comments

Comments
 (0)