Skip to content

Commit c5ad794

Browse files
committed
refactor: require the arrow_schema key to be present in the kv_metadata, if is required by the configuration
1 parent f2f9b00 commit c5ad794

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

datafusion/common/src/file_options/parquet_writer.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use std::sync::Arc;
2222

2323
use crate::{
2424
config::{ParquetOptions, TableParquetOptions},
25-
DataFusionError, Result,
25+
DataFusionError, Result, _internal_datafusion_err,
2626
};
2727

2828
use arrow_schema::Schema;
@@ -97,6 +97,13 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder {
9797

9898
let mut builder = global.into_writer_properties_builder()?;
9999

100+
// check that the arrow schema is present in the kv_metadata, if configured to do so
101+
if !global.skip_arrow_metadata
102+
&& !key_value_metadata.contains_key(ARROW_SCHEMA_META_KEY)
103+
{
104+
return Err(_internal_datafusion_err!("arrow schema was not added to the kv_metadata, even though it is required by configuration settings"));
105+
}
106+
100107
// add kv_meta, if any
101108
if !key_value_metadata.is_empty() {
102109
builder = builder.set_key_value_metadata(Some(
@@ -190,6 +197,8 @@ impl ParquetOptions {
190197
///
191198
/// The returned [`WriterPropertiesBuilder`] can then be further modified with additional options
192199
/// applied per column; a customization which is not applicable for [`ParquetOptions`].
200+
///
201+
/// Note that this method does not include the key_value_metadata from [`TableParquetOptions`].
193202
pub fn into_writer_properties_builder(&self) -> Result<WriterPropertiesBuilder> {
194203
let ParquetOptions {
195204
data_pagesize_limit,

0 commit comments

Comments
 (0)