Skip to content

Commit

Permalink
wip: Support to_string for Duration dtype
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Nov 7, 2024
1 parent 4b03406 commit 4d8fdeb
Show file tree
Hide file tree
Showing 13 changed files with 404 additions and 161 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ either = { workspace = true }
hashbrown = { workspace = true }
hashbrown_old_nightly_hack = { workspace = true }
indexmap = { workspace = true }
itoa = { workspace = true }
ndarray = { workspace = true, optional = true }
num-traits = { workspace = true }
once_cell = { workspace = true }
Expand Down
10 changes: 3 additions & 7 deletions crates/polars-core/src/chunked_array/temporal/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ use chrono::*;

use crate::prelude::*;

/// Number of seconds in a day
pub(crate) const NS_IN_DAY: i64 = 86_400_000_000_000;
pub(crate) const US_IN_DAY: i64 = 86_400_000_000;
pub(crate) const MS_IN_DAY: i64 = 86_400_000;
pub(crate) const SECONDS_IN_DAY: i64 = 86_400;

impl From<&AnyValue<'_>> for NaiveDateTime {
Expand Down Expand Up @@ -37,12 +39,10 @@ pub fn datetime_to_timestamp_ns(v: NaiveDateTime) -> i64 {
v.and_utc().timestamp_nanos_opt().unwrap()
}

// Used by lazy for literal conversion
pub fn datetime_to_timestamp_ms(v: NaiveDateTime) -> i64 {
v.and_utc().timestamp_millis()
}

// Used by lazy for literal conversion
pub fn datetime_to_timestamp_us(v: NaiveDateTime) -> i64 {
let us = v.and_utc().timestamp() * 1_000_000;
us + v.and_utc().timestamp_subsec_micros() as i64
Expand All @@ -51,7 +51,3 @@ pub fn datetime_to_timestamp_us(v: NaiveDateTime) -> i64 {
pub(crate) fn naive_datetime_to_date(v: NaiveDateTime) -> i32 {
(datetime_to_timestamp_ms(v) / (MILLISECONDS * SECONDS_IN_DAY)) as i32
}

pub(crate) const NS_IN_DAY: i64 = 86_400_000_000_000;
pub(crate) const US_IN_DAY: i64 = 86_400_000_000;
pub(crate) const MS_IN_DAY: i64 = 86_400_000;
19 changes: 19 additions & 0 deletions crates/polars-core/src/chunked_array/temporal/duration.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::export::chrono::Duration as ChronoDuration;
use crate::fmt::fmt_duration_string;
use crate::prelude::DataType::Duration;
use crate::prelude::*;

Expand Down Expand Up @@ -60,6 +61,24 @@ impl DurationChunked {
self.2 = Some(Duration(tu))
}

/// Convert from [`Duration`] to String; note that `strftime` format
/// strings are not supported, only the specifiers 'iso' and 'polars'.
pub fn to_string(&self, format: &str) -> PolarsResult<StringChunked> {
match format {
"iso" | "polars" => {
let out: StringChunked = self
.0
.apply_nonnull_values_generic(DataType::String, |v: i64| {
fmt_duration_string(v, self.time_unit(), format == "iso")
});
Ok(out)
},
_ => Err(PolarsError::InvalidOperation(
format!("format {:?} not supported for Duration type (expected one of 'iso' or 'polars')", format).into(),
)),
}
}

/// Construct a new [`DurationChunked`] from an iterator over [`ChronoDuration`].
pub fn from_duration<I: IntoIterator<Item = ChronoDuration>>(
name: PlSmallStr,
Expand Down
147 changes: 92 additions & 55 deletions crates/polars-core/src/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ use comfy_table::modifiers::*;
use comfy_table::presets::*;
#[cfg(any(feature = "fmt", feature = "fmt_no_tty"))]
use comfy_table::*;
#[cfg(feature = "dtype-duration")]
use itoa::Buffer;
use num_traits::{Num, NumCast};

use crate::config::*;
Expand Down Expand Up @@ -966,7 +968,9 @@ fn fmt_datetime(
}

#[cfg(feature = "dtype-duration")]
const NAMES: [&str; 4] = ["d", "h", "m", "s"];
const DURATION_PARTS: [&str; 4] = ["d", "h", "m", "s"];
#[cfg(feature = "dtype-duration")]
const ISO_DURATION_PARTS: [&str; 4] = ["D", "H", "M", "S"];
#[cfg(feature = "dtype-duration")]
const SIZES_NS: [i64; 4] = [
86_400_000_000_000,
Expand All @@ -980,63 +984,100 @@ const SIZES_US: [i64; 4] = [86_400_000_000, 3_600_000_000, 60_000_000, 1_000_000
const SIZES_MS: [i64; 4] = [86_400_000, 3_600_000, 60_000, 1_000];

#[cfg(feature = "dtype-duration")]
fn fmt_duration_ns(f: &mut Formatter<'_>, v: i64) -> fmt::Result {
pub fn fmt_duration_string(mut v: i64, unit: TimeUnit, iso: bool) -> String {
if v == 0 {
return write!(f, "0ns");
}
format_duration(f, v, SIZES_NS.as_slice(), NAMES.as_slice())?;
if v % 1000 != 0 {
write!(f, "{}ns", v % 1_000_000_000)?;
} else if v % 1_000_000 != 0 {
write!(f, "{}µs", (v % 1_000_000_000) / 1000)?;
} else if v % 1_000_000_000 != 0 {
write!(f, "{}ms", (v % 1_000_000_000) / 1_000_000)?;
}
Ok(())
}

#[cfg(feature = "dtype-duration")]
fn fmt_duration_us(f: &mut Formatter<'_>, v: i64) -> fmt::Result {
if v == 0 {
return write!(f, "0µs");
}
format_duration(f, v, SIZES_US.as_slice(), NAMES.as_slice())?;
if v % 1000 != 0 {
write!(f, "{}µs", (v % 1_000_000))?;
} else if v % 1_000_000 != 0 {
write!(f, "{}ms", (v % 1_000_000) / 1_000)?;
}
Ok(())
}
return if iso {
"PT0S".to_string()
} else {
match unit {
TimeUnit::Nanoseconds => "0ns".to_string(),
TimeUnit::Microseconds => "0µs".to_string(),
TimeUnit::Milliseconds => "0ms".to_string(),
}
};
};
let sizes = match unit {
TimeUnit::Nanoseconds => SIZES_NS.as_slice(),
TimeUnit::Microseconds => SIZES_US.as_slice(),
TimeUnit::Milliseconds => SIZES_MS.as_slice(),
};

#[cfg(feature = "dtype-duration")]
fn fmt_duration_ms(f: &mut Formatter<'_>, v: i64) -> fmt::Result {
if v == 0 {
return write!(f, "0ms");
}
format_duration(f, v, SIZES_MS.as_slice(), NAMES.as_slice())?;
if v % 1_000 != 0 {
write!(f, "{}ms", (v % 1_000))?;
}
Ok(())
}
let mut s = String::with_capacity(32);
let mut buffer = Buffer::new();
if iso {
if v < 0 {
s.push_str("-P");
v = v.abs()
} else {
s.push('P');
}
};

#[cfg(feature = "dtype-duration")]
fn format_duration(f: &mut Formatter, v: i64, sizes: &[i64], names: &[&str]) -> fmt::Result {
for i in 0..4 {
for (i, &size) in sizes.iter().enumerate() {
let whole_num = if i == 0 {
v / sizes[i]
v / size
} else {
(v % sizes[i - 1]) / sizes[i]
(v % sizes[i - 1]) / size
};
if whole_num <= -1 || whole_num >= 1 {
write!(f, "{}{}", whole_num, names[i])?;
if v % sizes[i] != 0 {
write!(f, " ")?;
if whole_num != 0 {
s.push_str(buffer.format(whole_num));
if iso {
if i == 3 {
let secs = match unit {
TimeUnit::Nanoseconds => format!(".{:09}", v % size),
TimeUnit::Microseconds => format!(".{:06}", v % size),
TimeUnit::Milliseconds => format!(".{:03}", v % size),
};
s.push_str(secs.trim_end_matches('0'));
}
s.push_str(ISO_DURATION_PARTS[i]);
if i == 0 {
s.push('T');
}
} else {
s.push_str(DURATION_PARTS[i]);
if v % size != 0 {
s.push(' ');
}
}
}
}
Ok(())
if iso {
if s.ends_with('T') {
s.pop();
}
} else {
match unit {
TimeUnit::Nanoseconds => {
if v % 1000 != 0 {
s.push_str(buffer.format(v % 1_000_000_000));
s.push_str("ns");
} else if v % 1_000_000 != 0 {
s.push_str(buffer.format((v % 1_000_000_000) / 1000));
s.push_str("µs");
} else if v % 1_000_000_000 != 0 {
s.push_str(buffer.format((v % 1_000_000_000) / 1_000_000));
s.push_str("ms");
}
},
TimeUnit::Microseconds => {
if v % 1000 != 0 {
s.push_str(buffer.format(v % 1_000_000));
s.push_str("µs");
} else if v % 1_000_000 != 0 {
s.push_str(buffer.format((v % 1_000_000) / 1_000));
s.push_str("ms");
}
},
TimeUnit::Milliseconds => {
if v % 1000 != 0 {
s.push_str(buffer.format(v % 1_000));
s.push_str("ms");
}
},
}
}
s
}

fn format_blob(f: &mut Formatter<'_>, bytes: &[u8]) -> fmt::Result {
Expand Down Expand Up @@ -1087,11 +1128,7 @@ impl Display for AnyValue<'_> {
fmt_datetime(f, *v, *tu, tz.as_ref().map(|v| v.as_ref()))
},
#[cfg(feature = "dtype-duration")]
AnyValue::Duration(v, tu) => match tu {
TimeUnit::Nanoseconds => fmt_duration_ns(f, *v),
TimeUnit::Microseconds => fmt_duration_us(f, *v),
TimeUnit::Milliseconds => fmt_duration_ms(f, *v),
},
AnyValue::Duration(v, tu) => write!(f, "{}", fmt_duration_string(*v, *tu, false)),
#[cfg(feature = "dtype-time")]
AnyValue::Time(_) => {
let nt: chrono::NaiveTime = self.into();
Expand Down Expand Up @@ -1221,7 +1258,7 @@ impl Series {

#[inline]
#[cfg(feature = "dtype-decimal")]
pub fn fmt_decimal(f: &mut Formatter<'_>, v: i128, scale: usize) -> fmt::Result {
fn fmt_decimal(f: &mut Formatter<'_>, v: i128, scale: usize) -> fmt::Result {
use arrow::compute::decimal::format_decimal;

let trim_zeros = get_trim_decimal_zeros();
Expand Down
12 changes: 2 additions & 10 deletions crates/polars-core/src/series/implementations/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -254,16 +254,8 @@ impl SeriesTrait for SeriesWrap<DatetimeChunked> {
}

fn cast(&self, dtype: &DataType, cast_options: CastOptions) -> PolarsResult<Series> {
match (dtype, self.0.time_unit()) {
(DataType::String, TimeUnit::Milliseconds) => {
Ok(self.0.to_string("%F %T%.3f")?.into_series())
},
(DataType::String, TimeUnit::Microseconds) => {
Ok(self.0.to_string("%F %T%.6f")?.into_series())
},
(DataType::String, TimeUnit::Nanoseconds) => {
Ok(self.0.to_string("%F %T%.9f")?.into_series())
},
match dtype {
DataType::String => Ok(self.0.to_string("iso")?.into_series()),
_ => self.0.cast_with_options(dtype, cast_options),
}
}
Expand Down
39 changes: 32 additions & 7 deletions crates/polars-time/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,17 +253,42 @@ pub trait TemporalMethods: AsSeries {

/// Convert Time into String with the given format.
/// See [chrono strftime/strptime](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html).
fn to_string(&self, format: &str) -> PolarsResult<Series> {
fn to_string(&self, mut format: &str) -> PolarsResult<Series> {
let s = self.as_series();
match s.dtype() {
#[cfg(feature = "dtype-date")]
DataType::Date => s.date().map(|ca| Ok(ca.to_string(format)?.into_series()))?,
#[cfg(feature = "dtype-datetime")]
DataType::Datetime(_, _) => s
.datetime()
.map(|ca| Ok(ca.to_string(format)?.into_series()))?,
DataType::Datetime(tu, tz) => {
if format == "iso" {
format = match (tu, tz.is_some()) {
(TimeUnit::Milliseconds, true) => "%F %T%.3f%:z",
(TimeUnit::Milliseconds, false) => "%F %T%.3f%",
(TimeUnit::Microseconds, true) => "%F %T%.6f%:z",
(TimeUnit::Microseconds, false) => "%F %T%.6f",
(TimeUnit::Nanoseconds, true) => "%F %T%.9f%:z",
(TimeUnit::Nanoseconds, false) => "%F %T%.9f",
}
};
s.datetime()
.map(|ca| Ok(ca.to_string(format)?.into_series()))?
},
#[cfg(feature = "dtype-date")]
DataType::Date => {
if format == "iso" {
format = "%Y-%m-%d"
};
s.date().map(|ca| Ok(ca.to_string(format)?.into_series()))?
},
#[cfg(feature = "dtype-time")]
DataType::Time => s.time().map(|ca| ca.to_string(format).into_series()),
DataType::Time => {
if format == "iso" {
format = "%T%.f"
};
s.time().map(|ca| ca.to_string(format).into_series())
},
#[cfg(feature = "dtype-duration")]
DataType::Duration(_) => s
.duration()
.map(|ca| Ok(ca.to_string(format)?.into_series()))?,
dt => polars_bail!(opq = to_string, dt),
}
}
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4547,7 +4547,7 @@ def insert_column(self, index: int, column: IntoExprColumn) -> DataFrame:
Parameters
----------
index
Index at which to insert the new `Series` column.
Index at which to insert the new column.
column
`Series` or expression to insert.
Expand Down
Loading

0 comments on commit 4d8fdeb

Please sign in to comment.