Skip to content

Commit

Permalink
refactor(rust): Unify internal string type (#18425)
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion authored Aug 29, 2024
1 parent 9eb9678 commit 4aa619d
Show file tree
Hide file tree
Showing 602 changed files with 6,568 additions and 4,772 deletions.
26 changes: 2 additions & 24 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,11 @@ recursive = "0.1"
regex = "1.9"
reqwest = { version = "0.12", default-features = false }
ryu = "1.0.13"
serde = { version = "1.0.188", features = ["derive"] }
serde = { version = "1.0.188", features = ["derive", "rc"] }
serde_json = "1"
simd-json = { version = "0.13", features = ["known-key"] }
simdutf8 = "0.1.4"
slotmap = "1"
smartstring = "1"
sqlparser = "0.49"
stacker = "0.1"
streaming-iterator = "0.1.9"
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-arrow/src/array/fixed_size_list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ mod iterator;
mod mutable;
pub use mutable::*;
use polars_error::{polars_bail, PolarsResult};
use polars_utils::pl_str::PlSmallStr;

/// The Arrow's equivalent to an immutable `Vec<Option<[T; size]>>` where `T` is an Arrow type.
/// Cloning and slicing this struct is `O(1)`.
Expand Down Expand Up @@ -199,7 +200,7 @@ impl FixedSizeListArray {

/// Returns a [`ArrowDataType`] consistent with [`FixedSizeListArray`].
pub fn default_datatype(data_type: ArrowDataType, size: usize) -> ArrowDataType {
let field = Box::new(Field::new("item", data_type, true));
let field = Box::new(Field::new(PlSmallStr::from_static("item"), data_type, true));
ArrowDataType::FixedSizeList(field, size)
}
}
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-arrow/src/array/fixed_size_list/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::sync::Arc;

use polars_error::{polars_bail, PolarsResult};
use polars_utils::pl_str::PlSmallStr;

use super::FixedSizeListArray;
use crate::array::physical_binary::extend_validity;
Expand Down Expand Up @@ -35,7 +36,7 @@ impl<M: MutableArray> MutableFixedSizeListArray<M> {
}

/// Creates a new [`MutableFixedSizeListArray`] from a [`MutableArray`] and size.
pub fn new_with_field(values: M, name: &str, nullable: bool, size: usize) -> Self {
pub fn new_with_field(values: M, name: PlSmallStr, nullable: bool, size: usize) -> Self {
let data_type = ArrowDataType::FixedSizeList(
Box::new(Field::new(name, values.data_type().clone(), nullable)),
size,
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-arrow/src/array/list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub use iterator::*;
mod mutable;
pub use mutable::*;
use polars_error::{polars_bail, PolarsResult};
use polars_utils::pl_str::PlSmallStr;

/// An [`Array`] semantically equivalent to `Vec<Option<Vec<Option<T>>>>` with Arrow's in-memory.
#[derive(Clone)]
Expand Down Expand Up @@ -185,7 +186,7 @@ impl<O: Offset> ListArray<O> {
impl<O: Offset> ListArray<O> {
/// Returns a default [`ArrowDataType`]: inner field is named "item" and is nullable
pub fn default_datatype(data_type: ArrowDataType) -> ArrowDataType {
let field = Box::new(Field::new("item", data_type, true));
let field = Box::new(Field::new(PlSmallStr::from_static("item"), data_type, true));
if O::IS_LARGE {
ArrowDataType::LargeList(field)
} else {
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-arrow/src/array/list/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::sync::Arc;

use polars_error::{polars_err, PolarsResult};
use polars_utils::pl_str::PlSmallStr;

use super::ListArray;
use crate::array::physical_binary::extend_validity;
Expand Down Expand Up @@ -122,7 +123,7 @@ impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
}

/// Creates a new [`MutableListArray`] from a [`MutableArray`].
pub fn new_with_field(values: M, name: &str, nullable: bool) -> Self {
pub fn new_with_field(values: M, name: PlSmallStr, nullable: bool) -> Self {
let field = Box::new(Field::new(name, values.data_type().clone(), nullable));
let data_type = if O::IS_LARGE {
ArrowDataType::LargeList(field)
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-arrow/src/array/primitive/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ pub fn get_write_value<'a, T: NativeType, F: Write>(
Time64(_) => unreachable!(), // remaining are not valid
Timestamp(time_unit, tz) => {
if let Some(tz) = tz {
let timezone = temporal_conversions::parse_offset(tz);
let timezone = temporal_conversions::parse_offset(tz.as_str());
match timezone {
Ok(timezone) => {
dyn_primitive!(array, i64, |time| {
Expand All @@ -65,7 +65,7 @@ pub fn get_write_value<'a, T: NativeType, F: Write>(
},
#[cfg(feature = "chrono-tz")]
Err(_) => {
let timezone = temporal_conversions::parse_offset_tz(tz);
let timezone = temporal_conversions::parse_offset_tz(tz.as_str());
match timezone {
Ok(timezone) => dyn_primitive!(array, i64, |time| {
temporal_conversions::timestamp_to_datetime(
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-arrow/src/array/struct_/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ use crate::compute::utils::combine_validities_and;
/// let int = Int32Array::from_slice(&[42, 28, 19, 31]).boxed();
///
/// let fields = vec![
/// Field::new("b", ArrowDataType::Boolean, false),
/// Field::new("c", ArrowDataType::Int32, false),
/// Field::new("b".into(), ArrowDataType::Boolean, false),
/// Field::new("c".into(), ArrowDataType::Int32, false),
/// ];
///
/// let array = StructArray::new(ArrowDataType::Struct(fields), vec![boolean, int], None);
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-arrow/src/compute/cast/primitive_to.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::hash::Hash;

use num_traits::{AsPrimitive, Float, ToPrimitive};
use polars_error::PolarsResult;
use polars_utils::pl_str::PlSmallStr;

use super::CastOptionsImpl;
use crate::array::*;
Expand Down Expand Up @@ -434,7 +435,7 @@ pub fn timestamp_to_timestamp(
from: &PrimitiveArray<i64>,
from_unit: TimeUnit,
to_unit: TimeUnit,
tz: &Option<String>,
tz: &Option<PlSmallStr>,
) -> PrimitiveArray<i64> {
let from_size = time_unit_multiple(from_unit);
let to_size = time_unit_multiple(to_unit);
Expand Down
8 changes: 4 additions & 4 deletions crates/polars-arrow/src/compute/temporal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,12 @@ macro_rules! date_like {
ArrowDataType::Timestamp(time_unit, Some(timezone_str)) => {
let array = $array.as_any().downcast_ref().unwrap();

if let Ok(timezone) = parse_offset(timezone_str) {
if let Ok(timezone) = parse_offset(timezone_str.as_str()) {
Ok(extract_impl(array, *time_unit, timezone, |x| {
x.$extract().try_into().unwrap()
}))
} else {
chrono_tz(array, *time_unit, timezone_str, |x| {
chrono_tz(array, *time_unit, timezone_str.as_str(), |x| {
x.$extract().try_into().unwrap()
})
}
Expand Down Expand Up @@ -129,12 +129,12 @@ macro_rules! time_like {
ArrowDataType::Timestamp(time_unit, Some(timezone_str)) => {
let array = $array.as_any().downcast_ref().unwrap();

if let Ok(timezone) = parse_offset(timezone_str) {
if let Ok(timezone) = parse_offset(timezone_str.as_str()) {
Ok(extract_impl(array, *time_unit, timezone, |x| {
x.$extract().try_into().unwrap()
}))
} else {
chrono_tz(array, *time_unit, timezone_str, |x| {
chrono_tz(array, *time_unit, timezone_str.as_str(), |x| {
x.$extract().try_into().unwrap()
})
}
Expand Down
30 changes: 23 additions & 7 deletions crates/polars-arrow/src/datatypes/field.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use polars_utils::pl_str::PlSmallStr;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

Expand All @@ -15,7 +16,7 @@ use super::{ArrowDataType, Metadata};
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct Field {
/// Its name
pub name: String,
pub name: PlSmallStr,
/// Its logical [`ArrowDataType`]
pub data_type: ArrowDataType,
/// Its nullability
Expand All @@ -26,9 +27,9 @@ pub struct Field {

impl Field {
/// Creates a new [`Field`].
pub fn new<T: Into<String>>(name: T, data_type: ArrowDataType, is_nullable: bool) -> Self {
pub fn new(name: PlSmallStr, data_type: ArrowDataType, is_nullable: bool) -> Self {
Field {
name: name.into(),
name,
data_type,
is_nullable,
metadata: Default::default(),
Expand Down Expand Up @@ -56,8 +57,18 @@ impl Field {
#[cfg(feature = "arrow_rs")]
impl From<Field> for arrow_schema::Field {
fn from(value: Field) -> Self {
Self::new(value.name, value.data_type.into(), value.is_nullable)
.with_metadata(value.metadata.into_iter().collect())
Self::new(
value.name.to_string(),
value.data_type.into(),
value.is_nullable,
)
.with_metadata(
value
.metadata
.into_iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect(),
)
}
}

Expand All @@ -75,9 +86,14 @@ impl From<&arrow_schema::Field> for Field {
let metadata = value
.metadata()
.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.map(|(k, v)| (PlSmallStr::from_str(k), PlSmallStr::from_str(v)))
.collect();
Self::new(value.name(), data_type, value.is_nullable()).with_metadata(metadata)
Self::new(
PlSmallStr::from_str(value.name().as_str()),
data_type,
value.is_nullable(),
)
.with_metadata(metadata)
}
}

Expand Down
Loading

0 comments on commit 4aa619d

Please sign in to comment.