Skip to content

Commit

Permalink
refactor(rust): Remove top-level metadata from ArrowSchema (#18527)
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion authored Sep 3, 2024
1 parent f5edd91 commit 04d0978
Show file tree
Hide file tree
Showing 8 changed files with 11 additions and 63 deletions.
30 changes: 5 additions & 25 deletions crates/polars-arrow/src/datatypes/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ use polars_error::{polars_bail, PolarsResult};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

use super::{Field, Metadata};
use super::Field;

/// An ordered sequence of [`Field`]s with associated [`Metadata`].
/// An ordered sequence of [`Field`]s
///
/// [`ArrowSchema`] is an abstraction used to read from, and write to, Arrow IPC format,
/// Apache Parquet, and Apache Avro. All these formats have a concept of a schema
Expand All @@ -16,22 +16,11 @@ use super::{Field, Metadata};
pub struct ArrowSchema {
/// The fields composing this schema.
pub fields: Vec<Field>,
/// Optional metadata.
pub metadata: Metadata,
}

pub type ArrowSchemaRef = Arc<ArrowSchema>;

impl ArrowSchema {
/// Attaches a [`Metadata`] to [`ArrowSchema`]
#[inline]
pub fn with_metadata(self, metadata: Metadata) -> Self {
Self {
fields: self.fields,
metadata,
}
}

#[inline]
pub fn len(&self) -> usize {
self.fields.len()
Expand All @@ -58,10 +47,7 @@ impl ArrowSchema {
})
.collect();

ArrowSchema {
fields,
metadata: self.metadata,
}
ArrowSchema { fields }
}

pub fn try_project(&self, indices: &[usize]) -> PolarsResult<Self> {
Expand All @@ -76,18 +62,12 @@ impl ArrowSchema {
Ok(out.clone())
}).collect::<PolarsResult<Vec<_>>>()?;

Ok(ArrowSchema {
fields,
metadata: self.metadata.clone(),
})
Ok(ArrowSchema { fields })
}
}

impl From<Vec<Field>> for ArrowSchema {
fn from(fields: Vec<Field>) -> Self {
Self {
fields,
..Default::default()
}
Self { fields }
}
}
5 changes: 1 addition & 4 deletions crates/polars-arrow/src/io/ipc/read/file_async.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,7 @@ impl<'a> FileStream<'a> {
{
let (projection, schema) = if let Some(projection) = projection {
let (p, h, fields) = prepare_projection(&metadata.schema.fields, projection);
let schema = ArrowSchema {
fields,
metadata: metadata.schema.metadata.clone(),
};
let schema = ArrowSchema { fields };
(Some((p, h)), Some(schema))
} else {
(None, None)
Expand Down
5 changes: 1 addition & 4 deletions crates/polars-arrow/src/io/ipc/read/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,7 @@ impl<R: Read + Seek> FileReader<R> {
) -> Self {
let projection = projection.map(|projection| {
let (p, h, fields) = prepare_projection(&metadata.schema.fields, projection);
let schema = ArrowSchema {
fields,
metadata: metadata.schema.metadata.clone(),
};
let schema = ArrowSchema { fields };
(p, h, schema)
});
Self {
Expand Down
16 changes: 1 addition & 15 deletions crates/polars-arrow/src/io/ipc/read/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -391,22 +391,8 @@ pub(super) fn fb_to_schema(
arrow_format::ipc::Endianness::Big => false,
};

let mut metadata = Metadata::default();
if let Some(md_fields) = schema.custom_metadata()? {
for kv in md_fields {
let kv = kv?;
let k_str = kv.key()?;
let v_str = kv.value()?;
if let Some(k) = k_str {
if let Some(v) = v_str {
metadata.insert(PlSmallStr::from_str(k), PlSmallStr::from_str(v));
}
}
}
}

Ok((
ArrowSchema { fields, metadata },
ArrowSchema { fields },
IpcSchema {
fields: ipc_fields,
is_little_endian,
Expand Down
5 changes: 1 addition & 4 deletions crates/polars-arrow/src/io/ipc/read/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -251,10 +251,7 @@ impl<R: Read> StreamReader<R> {
pub fn new(reader: R, metadata: StreamMetadata, projection: Option<Vec<usize>>) -> Self {
let projection = projection.map(|projection| {
let (p, h, fields) = prepare_projection(&metadata.schema.fields, projection);
let schema = ArrowSchema {
fields,
metadata: metadata.schema.metadata.clone(),
};
let schema = ArrowSchema { fields };
(p, h, schema)
});

Expand Down
8 changes: 1 addition & 7 deletions crates/polars-arrow/src/io/ipc/write/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,7 @@ pub fn serialize_schema(
.map(|(field, ipc_field)| serialize_field(field, ipc_field))
.collect::<Vec<_>>();

let custom_metadata = schema
.metadata
.iter()
.map(|(k, v)| key_value(k.clone().into_string(), v.clone().into_string()))
.collect::<Vec<_>>();

let custom_metadata = (!custom_metadata.is_empty()).then_some(custom_metadata);
let custom_metadata = None;

arrow_format::ipc::Schema {
endianness,
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-parquet/src/arrow/read/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,6 @@ pub fn infer_schema_with_options(
let schema = read_schema_from_metadata(&mut metadata)?;
Ok(schema.unwrap_or_else(|| {
let fields = parquet_to_arrow_schema_with_options(file_metadata.schema().fields(), options);
ArrowSchema { fields, metadata }
ArrowSchema { fields }
}))
}
3 changes: 0 additions & 3 deletions crates/polars/tests/it/io/parquet/arrow/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,6 @@ fn all_types_chunked() -> PolarsResult<()> {

#[test]
fn read_int96_timestamps() -> PolarsResult<()> {
use std::collections::BTreeMap;

let timestamp_data = &[
0x50, 0x41, 0x52, 0x31, 0x15, 0x04, 0x15, 0x48, 0x15, 0x3c, 0x4c, 0x15, 0x06, 0x15, 0x00,
0x12, 0x00, 0x00, 0x24, 0x00, 0x00, 0x0d, 0x01, 0x08, 0x9f, 0xd5, 0x1f, 0x0d, 0x0a, 0x44,
Expand Down Expand Up @@ -131,7 +129,6 @@ fn read_int96_timestamps() -> PolarsResult<()> {
arrow::datatypes::ArrowDataType::Timestamp(time_unit, None),
false,
)],
metadata: BTreeMap::new(),
};
let reader = FileReader::new(reader, metadata.row_groups, schema, None);
reader.collect::<PolarsResult<Vec<_>>>()
Expand Down

0 comments on commit 04d0978

Please sign in to comment.