Skip to content

Commit

Permalink
refactor: move WriterProperties to ArrowWriterOptions
Browse files Browse the repository at this point in the history
  • Loading branch information
evenyag committed Jan 21, 2024
1 parent 0ea5554 commit 78b81c9
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 11 deletions.
23 changes: 15 additions & 8 deletions parquet/src/arrow/arrow_writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ impl<W: Write + Send> ArrowWriter<W> {
arrow_schema: SchemaRef,
props: Option<WriterProperties>,
) -> Result<Self> {
Self::try_new_with_options(writer, arrow_schema, props, Default::default())
let options = ArrowWriterOptions::new().with_properties(props.unwrap_or_default());
Self::try_new_with_options(writer, arrow_schema, options)
}

/// Try to create a new Arrow writer with [`ArrowWriterOptions`].
Expand All @@ -131,11 +132,10 @@ impl<W: Write + Send> ArrowWriter<W> {
pub fn try_new_with_options(
writer: W,
arrow_schema: SchemaRef,
props: Option<WriterProperties>,
options: ArrowWriterOptions,
) -> Result<Self> {
let schema = arrow_to_parquet_schema(&arrow_schema)?;
let mut props = props.unwrap_or_default();
let mut props = options.properties;
if !options.skip_arrow_metadata {
// add serialized arrow schema
add_encoded_arrow_schema_to_metadata(&arrow_schema, &mut props);
Expand Down Expand Up @@ -266,6 +266,7 @@ impl<W: Write + Send> RecordBatchWriter for ArrowWriter<W> {
/// See [`ArrowWriter`] for how to configure the writer.
#[derive(Debug, Clone, Default)]
pub struct ArrowWriterOptions {
properties: WriterProperties,
skip_arrow_metadata: bool,
}

Expand All @@ -275,13 +276,20 @@ impl ArrowWriterOptions {
Self::default()
}

/// Sets the [`WriterProperties`] for writing parquet files.
pub fn with_properties(self, properties: WriterProperties) -> Self {
Self { properties, ..self }
}

/// Parquet files generated by the [`ArrowWriter`] contain embedded arrow schema
/// by default.
///
/// Set `skip_arrow_metadata` to true, to skip encoding this.
pub fn with_skip_arrow_metadata(mut self, skip_arrow_metadata: bool) -> Self {
self.skip_arrow_metadata = skip_arrow_metadata;
self
pub fn with_skip_arrow_metadata(self, skip_arrow_metadata: bool) -> Self {
Self {
skip_arrow_metadata,
..self
}
}
}

Expand Down Expand Up @@ -2938,8 +2946,7 @@ mod tests {

let mut buf = Vec::with_capacity(1024);
let mut writer =
ArrowWriter::try_new_with_options(&mut buf, file_schema.clone(), None, skip_options)
.unwrap();
ArrowWriter::try_new_with_options(&mut buf, file_schema.clone(), skip_options).unwrap();
writer.write(&batch).unwrap();
writer.close().unwrap();

Expand Down
6 changes: 3 additions & 3 deletions parquet/src/arrow/async_writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ impl<W: AsyncWrite + Unpin + Send> AsyncArrowWriter<W> {
buffer_size: usize,
props: Option<WriterProperties>,
) -> Result<Self> {
Self::try_new_with_options(writer, arrow_schema, buffer_size, props, Default::default())
let options = ArrowWriterOptions::new().with_properties(props.unwrap_or_default());
Self::try_new_with_options(writer, arrow_schema, buffer_size, options)
}

/// Try to create a new Async Arrow Writer with [`ArrowWriterOptions`].
Expand All @@ -115,12 +116,11 @@ impl<W: AsyncWrite + Unpin + Send> AsyncArrowWriter<W> {
writer: W,
arrow_schema: SchemaRef,
buffer_size: usize,
props: Option<WriterProperties>,
options: ArrowWriterOptions,
) -> Result<Self> {
let shared_buffer = SharedBuffer::new(buffer_size);
let sync_writer =
ArrowWriter::try_new_with_options(shared_buffer.clone(), arrow_schema, props, options)?;
ArrowWriter::try_new_with_options(shared_buffer.clone(), arrow_schema, options)?;

Ok(Self {
sync_writer,
Expand Down

0 comments on commit 78b81c9

Please sign in to comment.