diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index 89341ebe90c9..d9771838ada6 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -120,7 +120,8 @@ impl ArrowWriter { arrow_schema: SchemaRef, props: Option, ) -> Result { - Self::try_new_with_options(writer, arrow_schema, props, Default::default()) + let options = ArrowWriterOptions::new().with_properties(props.unwrap_or_default()); + Self::try_new_with_options(writer, arrow_schema, options) } /// Try to create a new Arrow writer with [`ArrowWriterOptions`]. @@ -131,11 +132,10 @@ impl ArrowWriter { pub fn try_new_with_options( writer: W, arrow_schema: SchemaRef, - props: Option, options: ArrowWriterOptions, ) -> Result { let schema = arrow_to_parquet_schema(&arrow_schema)?; - let mut props = props.unwrap_or_default(); + let mut props = options.properties; if !options.skip_arrow_metadata { // add serialized arrow schema add_encoded_arrow_schema_to_metadata(&arrow_schema, &mut props); @@ -266,6 +266,7 @@ impl RecordBatchWriter for ArrowWriter { /// See [`ArrowWriter`] for how to configure the writer. #[derive(Debug, Clone, Default)] pub struct ArrowWriterOptions { + properties: WriterProperties, skip_arrow_metadata: bool, } @@ -275,13 +276,20 @@ impl ArrowWriterOptions { Self::default() } + /// Sets the [`WriterProperties`] for writing parquet files. + pub fn with_properties(self, properties: WriterProperties) -> Self { + Self { properties, ..self } + } + /// Parquet files generated by the [`ArrowWriter`] contain embedded arrow schema /// by default. /// /// Set `skip_arrow_metadata` to true, to skip encoding this. - pub fn with_skip_arrow_metadata(mut self, skip_arrow_metadata: bool) -> Self { - self.skip_arrow_metadata = skip_arrow_metadata; - self + pub fn with_skip_arrow_metadata(self, skip_arrow_metadata: bool) -> Self { + Self { + skip_arrow_metadata, + ..self + } } } @@ -2938,8 +2946,7 @@ mod tests { let mut buf = Vec::with_capacity(1024); let mut writer = - ArrowWriter::try_new_with_options(&mut buf, file_schema.clone(), None, skip_options) - .unwrap(); + ArrowWriter::try_new_with_options(&mut buf, file_schema.clone(), skip_options).unwrap(); writer.write(&batch).unwrap(); writer.close().unwrap(); diff --git a/parquet/src/arrow/async_writer/mod.rs b/parquet/src/arrow/async_writer/mod.rs index d9710219c1e7..3f3da1a5f9b9 100644 --- a/parquet/src/arrow/async_writer/mod.rs +++ b/parquet/src/arrow/async_writer/mod.rs @@ -99,7 +99,8 @@ impl AsyncArrowWriter { buffer_size: usize, props: Option, ) -> Result { - Self::try_new_with_options(writer, arrow_schema, buffer_size, props, Default::default()) + let options = ArrowWriterOptions::new().with_properties(props.unwrap_or_default()); + Self::try_new_with_options(writer, arrow_schema, buffer_size, options) } /// Try to create a new Async Arrow Writer with [`ArrowWriterOptions`]. @@ -115,12 +116,11 @@ impl AsyncArrowWriter { writer: W, arrow_schema: SchemaRef, buffer_size: usize, - props: Option, options: ArrowWriterOptions, ) -> Result { let shared_buffer = SharedBuffer::new(buffer_size); let sync_writer = - ArrowWriter::try_new_with_options(shared_buffer.clone(), arrow_schema, props, options)?; + ArrowWriter::try_new_with_options(shared_buffer.clone(), arrow_schema, options)?; Ok(Self { sync_writer,