Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions parquet/src/arrow/arrow_reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5048,10 +5048,13 @@ mod tests {
.expect("Error creating reader builder");

let schema = builder.metadata().file_metadata().schema_descr();
assert_eq!(schema.column(0).logical_type(), Some(LogicalType::String));
assert_eq!(
schema.column(1).logical_type(),
Some(LogicalType::_Unknown { field_id: 2555 })
schema.column(0).logical_type_ref(),
Some(&LogicalType::String)
);
assert_eq!(
schema.column(1).logical_type_ref(),
Some(&LogicalType::_Unknown { field_id: 2555 })
);
assert_eq!(schema.column(1).physical_type(), PhysicalType::BYTE_ARRAY);

Expand Down
4 changes: 2 additions & 2 deletions parquet/src/arrow/schema/extension.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub(crate) fn try_add_extension_type(
mut arrow_field: Field,
parquet_type: &Type,
) -> Result<Field, ParquetError> {
let Some(parquet_logical_type) = parquet_type.get_basic_info().logical_type() else {
let Some(parquet_logical_type) = parquet_type.get_basic_info().logical_type_ref() else {
return Ok(arrow_field);
};
match parquet_logical_type {
Expand All @@ -65,7 +65,7 @@ pub(crate) fn try_add_extension_type(
///
/// This is used to preallocate the metadata hashmap size
pub(crate) fn has_extension_type(parquet_type: &Type) -> bool {
let Some(parquet_logical_type) = parquet_type.get_basic_info().logical_type() else {
let Some(parquet_logical_type) = parquet_type.get_basic_info().logical_type_ref() else {
return false;
};
match parquet_logical_type {
Expand Down
10 changes: 5 additions & 5 deletions parquet/src/arrow/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1840,7 +1840,7 @@ mod tests {
// This is because the Arrow conversion always sets logical type,
// even if there wasn't originally one.
// This is not an issue, but is an inconvenience for this test.
match a.logical_type() {
match a.logical_type_ref() {
Some(_) => {
assert_eq!(a, b)
}
Expand Down Expand Up @@ -2216,8 +2216,8 @@ mod tests {
let parquet_schema = ArrowSchemaConverter::new().convert(&arrow_schema)?;

assert_eq!(
parquet_schema.column(0).logical_type(),
Some(LogicalType::Uuid)
parquet_schema.column(0).logical_type_ref(),
Some(&LogicalType::Uuid)
);

let arrow_schema = parquet_to_arrow_schema(&parquet_schema, None)?;
Expand All @@ -2237,8 +2237,8 @@ mod tests {
let parquet_schema = ArrowSchemaConverter::new().convert(&arrow_schema)?;

assert_eq!(
parquet_schema.column(0).logical_type(),
Some(LogicalType::Json)
parquet_schema.column(0).logical_type_ref(),
Some(&LogicalType::Json)
);

let arrow_schema = parquet_to_arrow_schema(&parquet_schema, None)?;
Expand Down
24 changes: 14 additions & 10 deletions parquet/src/arrow/schema/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ fn decimal_256_type(scale: i32, precision: i32) -> Result<DataType> {
}

fn from_int32(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataType> {
match (info.logical_type(), info.converted_type()) {
match (info.logical_type_ref(), info.converted_type()) {
(None, ConvertedType::NONE) => Ok(DataType::Int32),
(
Some(
Expand All @@ -183,7 +183,9 @@ fn from_int32(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
(32, false) => Ok(DataType::UInt32),
_ => Err(arrow_err!("Cannot create INT32 physical type from {:?}", t)),
},
(Some(LogicalType::Decimal { scale, precision }), _) => decimal_128_type(scale, precision),
(Some(LogicalType::Decimal { scale, precision }), _) => {
decimal_128_type(*scale, *precision)
}
(Some(LogicalType::Date), _) => Ok(DataType::Date32),
(Some(LogicalType::Time { unit, .. }), _) => match unit {
ParquetTimeUnit::MILLIS => Ok(DataType::Time32(TimeUnit::Millisecond)),
Expand Down Expand Up @@ -212,7 +214,7 @@ fn from_int32(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
}

fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataType> {
match (info.logical_type(), info.converted_type()) {
match (info.logical_type_ref(), info.converted_type()) {
(None, ConvertedType::NONE) => Ok(DataType::Int64),
(
Some(LogicalType::Integer {
Expand Down Expand Up @@ -243,7 +245,7 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
ParquetTimeUnit::MICROS => TimeUnit::Microsecond,
ParquetTimeUnit::NANOS => TimeUnit::Nanosecond,
},
if is_adjusted_to_u_t_c {
if *is_adjusted_to_u_t_c {
Some("UTC".into())
} else {
None
Expand All @@ -260,7 +262,9 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
TimeUnit::Microsecond,
Some("UTC".into()),
)),
(Some(LogicalType::Decimal { scale, precision }), _) => decimal_128_type(scale, precision),
(Some(LogicalType::Decimal { scale, precision }), _) => {
decimal_128_type(*scale, *precision)
}
(None, ConvertedType::DECIMAL) => decimal_128_type(scale, precision),
(logical, converted) => Err(arrow_err!(
"Unable to convert parquet INT64 logical type {:?} or converted type {}",
Expand All @@ -271,7 +275,7 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
}

fn from_byte_array(info: &BasicTypeInfo, precision: i32, scale: i32) -> Result<DataType> {
match (info.logical_type(), info.converted_type()) {
match (info.logical_type_ref(), info.converted_type()) {
(Some(LogicalType::String), _) => Ok(DataType::Utf8),
(Some(LogicalType::Json), _) => Ok(DataType::Utf8),
(Some(LogicalType::Bson), _) => Ok(DataType::Binary),
Expand All @@ -290,7 +294,7 @@ fn from_byte_array(info: &BasicTypeInfo, precision: i32, scale: i32) -> Result<D
precision: p,
}),
_,
) => decimal_type(s, p),
) => decimal_type(*s, *p),
(None, ConvertedType::DECIMAL) => decimal_type(scale, precision),
(logical, converted) => Err(arrow_err!(
"Unable to convert parquet BYTE_ARRAY logical type {:?} or converted type {}",
Expand All @@ -307,12 +311,12 @@ fn from_fixed_len_byte_array(
type_length: i32,
) -> Result<DataType> {
// TODO: This should check the type length for the decimal and interval types
match (info.logical_type(), info.converted_type()) {
match (info.logical_type_ref(), info.converted_type()) {
(Some(LogicalType::Decimal { scale, precision }), _) => {
if type_length <= 16 {
decimal_128_type(scale, precision)
decimal_128_type(*scale, *precision)
} else {
decimal_256_type(scale, precision)
decimal_256_type(*scale, *precision)
}
}
(None, ConvertedType::DECIMAL) => {
Expand Down
14 changes: 13 additions & 1 deletion parquet/src/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1132,12 +1132,24 @@ pub enum ColumnOrder {

impl ColumnOrder {
/// Returns sort order for a physical/logical type.
#[deprecated(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here is one deprecation

since = "57.1.0",
note = "use `ColumnOrder::sort_order_for_type` instead"
)]
pub fn get_sort_order(
logical_type: Option<LogicalType>,
converted_type: ConvertedType,
physical_type: Type,
) -> SortOrder {
// TODO: Should this take converted and logical type, for compatibility?
Self::sort_order_for_type(logical_type.as_ref(), converted_type, physical_type)
}

/// Returns sort order for a physical/logical type.
pub fn sort_order_for_type(
logical_type: Option<&LogicalType>,
converted_type: ConvertedType,
physical_type: Type,
) -> SortOrder {
match logical_type {
Some(logical) => match logical {
LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
Expand Down
8 changes: 4 additions & 4 deletions parquet/src/file/metadata/thrift/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -784,8 +784,8 @@ pub(crate) fn parquet_metadata_from_bytes(buf: &[u8]) -> Result<ParquetMetaData>
let column_orders = column_orders.map(|mut cos| {
for (i, column) in schema_descr.columns().iter().enumerate() {
if let ColumnOrder::TYPE_DEFINED_ORDER(_) = cos[i] {
let sort_order = ColumnOrder::get_sort_order(
column.logical_type(),
let sort_order = ColumnOrder::sort_order_for_type(
column.logical_type_ref(),
column.converted_type(),
column.physical_type(),
);
Expand Down Expand Up @@ -1357,7 +1357,7 @@ fn write_schema_helper<W: Write>(
} else {
None
},
logical_type: basic_info.logical_type(),
logical_type: basic_info.logical_type_ref().cloned(),
};
element.write_thrift(writer)
}
Expand Down Expand Up @@ -1385,7 +1385,7 @@ fn write_schema_helper<W: Write>(
} else {
None
},
logical_type: basic_info.logical_type(),
logical_type: basic_info.logical_type_ref().cloned(),
};

element.write_thrift(writer)?;
Expand Down
4 changes: 2 additions & 2 deletions parquet/src/file/metadata/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,8 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
.columns()
.iter()
.map(|col| {
let sort_order = ColumnOrder::get_sort_order(
col.logical_type(),
let sort_order = ColumnOrder::sort_order_for_type(
col.logical_type_ref(),
col.converted_type(),
col.physical_type(),
);
Expand Down
8 changes: 4 additions & 4 deletions parquet/src/file/serialized_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2704,12 +2704,12 @@ mod tests {

let schema = reader.metadata().file_metadata().schema_descr();
assert_eq!(
schema.column(0).logical_type(),
Some(basic::LogicalType::String)
schema.column(0).logical_type_ref(),
Some(&basic::LogicalType::String)
);
assert_eq!(
schema.column(1).logical_type(),
Some(basic::LogicalType::_Unknown { field_id: 2555 })
schema.column(1).logical_type_ref(),
Some(&basic::LogicalType::_Unknown { field_id: 2555 })
);
assert_eq!(schema.column(1).physical_type(), Type::BYTE_ARRAY);

Expand Down
2 changes: 1 addition & 1 deletion parquet/src/geospatial/accumulator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ pub struct DefaultGeoStatsAccumulatorFactory {}
impl GeoStatsAccumulatorFactory for DefaultGeoStatsAccumulatorFactory {
fn new_accumulator(&self, _descr: &ColumnDescPtr) -> Box<dyn GeoStatsAccumulator> {
#[cfg(feature = "geospatial")]
if let Some(crate::basic::LogicalType::Geometry { .. }) = _descr.logical_type() {
if let Some(crate::basic::LogicalType::Geometry { .. }) = _descr.logical_type_ref() {
Box::new(ParquetGeoStatsAccumulator::default())
} else {
Box::new(VoidGeoStatsAccumulator::default())
Expand Down
4 changes: 2 additions & 2 deletions parquet/src/schema/printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ impl Printer<'_> {
// Also print logical type if it is available
// If there is a logical type, do not print converted type
let logical_type_str = print_logical_and_converted(
basic_info.logical_type().as_ref(),
basic_info.logical_type_ref(),
basic_info.converted_type(),
precision,
scale,
Expand All @@ -426,7 +426,7 @@ impl Printer<'_> {
write!(self.output, "[{}] ", basic_info.id());
}
let logical_str = print_logical_and_converted(
basic_info.logical_type().as_ref(),
basic_info.logical_type_ref(),
basic_info.converted_type(),
0,
0,
Expand Down
27 changes: 19 additions & 8 deletions parquet/src/schema/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,8 @@ impl Type {
pub(crate) fn is_list(&self) -> bool {
if self.is_group() {
let basic_info = self.get_basic_info();
if let Some(logical_type) = basic_info.logical_type() {
return logical_type == LogicalType::List;
if let Some(logical_type) = basic_info.logical_type_ref() {
return logical_type == &LogicalType::List;
}
return basic_info.converted_type() == ConvertedType::LIST;
}
Expand Down Expand Up @@ -710,6 +710,10 @@ impl BasicTypeInfo {
///
/// Note that this function will clone the `LogicalType`. If performance is a concern,
/// use [`Self::logical_type_ref`] instead.
#[deprecated(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just straight up deprecated the non reference version

since = "57.1.0",
note = "use `BasicTypeInfo::logical_type_ref` instead (LogicalType cloning is non trivial)"
)]
pub fn logical_type(&self) -> Option<LogicalType> {
// Unlike ConvertedType, LogicalType cannot implement Copy, thus we clone it
self.logical_type.clone()
Expand Down Expand Up @@ -919,8 +923,15 @@ impl ColumnDescriptor {
///
/// Note that this function will clone the `LogicalType`. If performance is a concern,
/// use [`Self::logical_type_ref`] instead.
#[deprecated(
since = "57.1.0",
note = "use `ColumnDescriptor::logical_type_ref` instead (LogicalType cloning is non trivial)"
)]
pub fn logical_type(&self) -> Option<LogicalType> {
self.primitive_type.get_basic_info().logical_type()
self.primitive_type
.get_basic_info()
.logical_type_ref()
.cloned()
}

/// Returns a reference to the [`LogicalType`] for this column.
Expand Down Expand Up @@ -966,8 +977,8 @@ impl ColumnDescriptor {

/// Returns the sort order for this column
pub fn sort_order(&self) -> SortOrder {
ColumnOrder::get_sort_order(
self.logical_type(),
ColumnOrder::sort_order_for_type(
self.logical_type_ref(),
self.converted_type(),
self.physical_type(),
)
Expand Down Expand Up @@ -1417,8 +1428,8 @@ mod tests {
let basic_info = tp.get_basic_info();
assert_eq!(basic_info.repetition(), Repetition::OPTIONAL);
assert_eq!(
basic_info.logical_type(),
Some(LogicalType::Integer {
basic_info.logical_type_ref(),
Some(&LogicalType::Integer {
bit_width: 32,
is_signed: true
})
Expand Down Expand Up @@ -1768,7 +1779,7 @@ mod tests {
assert!(tp.is_group());
assert!(!tp.is_primitive());
assert_eq!(basic_info.repetition(), Repetition::REPEATED);
assert_eq!(basic_info.logical_type(), Some(LogicalType::List));
assert_eq!(basic_info.logical_type_ref(), Some(&LogicalType::List));
assert_eq!(basic_info.converted_type(), ConvertedType::LIST);
assert_eq!(basic_info.id(), 1);
assert_eq!(tp.get_fields().len(), 2);
Expand Down
4 changes: 2 additions & 2 deletions parquet/src/variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ mod tests {
assert_eq!(field.name(), "data");
// data should have been written with the Variant logical type
assert_eq!(
field.get_basic_info().logical_type(),
Some(crate::basic::LogicalType::Variant {
field.get_basic_info().logical_type_ref(),
Some(&crate::basic::LogicalType::Variant {
specification_version: None
})
);
Expand Down
27 changes: 10 additions & 17 deletions parquet/tests/geospatial.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,28 +66,21 @@ fn test_read_logical_type() {

for (geospatial_file, expected_type) in expected_logical_type {
let metadata = read_metadata(geospatial_file);
let logical_type = metadata
.file_metadata()
.schema_descr()
.column(1)
.logical_type()
.unwrap();

assert_eq!(logical_type, expected_type);
let column_descr = metadata.file_metadata().schema_descr().column(1);
let logical_type = column_descr.logical_type_ref().unwrap();

assert_eq!(logical_type, &expected_type);
}

// The crs value may also contain arbitrary values (in this case some JSON
// a bit too lengthy to type out)
let metadata = read_metadata("crs-arbitrary-value.parquet");
let logical_type = metadata
.file_metadata()
.schema_descr()
.column(1)
.logical_type()
.unwrap();
let column_descr = metadata.file_metadata().schema_descr().column(1);
let logical_type = column_descr.logical_type_ref().unwrap();

if let LogicalType::Geometry { crs } = logical_type {
let crs_parsed: Value = serde_json::from_str(&crs.unwrap()).unwrap();
let crs = crs.as_ref();
let crs_parsed: Value = serde_json::from_str(crs.unwrap()).unwrap();
assert_eq!(crs_parsed.get("id").unwrap().get("code").unwrap(), 5070);
} else {
panic!("Expected geometry type but got {logical_type:?}");
Expand All @@ -103,8 +96,8 @@ fn test_read_geospatial_statistics() {
// optional binary field_id=-1 wkt (String);
// optional binary field_id=-1 geometry (Geometry(crs=));
let fields = metadata.file_metadata().schema().get_fields();
let logical_type = fields[2].get_basic_info().logical_type().unwrap();
assert_eq!(logical_type, LogicalType::Geometry { crs: None });
let logical_type = fields[2].get_basic_info().logical_type_ref().unwrap();
assert_eq!(logical_type, &LogicalType::Geometry { crs: None });

let geo_statistics = metadata.row_group(0).column(2).geo_statistics();
assert!(geo_statistics.is_some());
Expand Down
Loading