Skip to content

Commit

Permalink
feat: Support cast from Duration to String
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Nov 6, 2024
1 parent 04c4acb commit a3de5d0
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 106 deletions.
7 changes: 7 additions & 0 deletions crates/polars-core/src/chunked_array/logical/duration.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use super::*;
use crate::fmt::fmt_duration_string;
use crate::prelude::*;

pub type DurationChunked = Logical<DurationType, Int64Type>;
Expand Down Expand Up @@ -54,6 +55,12 @@ impl LogicalType for DurationChunked {
};
Ok(out.into_duration(to_unit).into_series())
},
String => {
let out: StringChunked = self.0.apply_nonnull_values_generic(String, |v: i64| {
fmt_duration_string(v, self.time_unit())
});
Ok(out.into())
},
dt if dt.is_numeric() => self.0.cast_with_options(dtype, cast_options),
dt => {
polars_bail!(
Expand Down
106 changes: 51 additions & 55 deletions crates/polars-core/src/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -980,63 +980,63 @@ const SIZES_US: [i64; 4] = [86_400_000_000, 3_600_000_000, 60_000_000, 1_000_000
const SIZES_MS: [i64; 4] = [86_400_000, 3_600_000, 60_000, 1_000];

#[cfg(feature = "dtype-duration")]
fn fmt_duration_ns(f: &mut Formatter<'_>, v: i64) -> fmt::Result {
pub fn fmt_duration_string(v: i64, unit: TimeUnit) -> String {
if v == 0 {
return write!(f, "0ns");
}
format_duration(f, v, SIZES_NS.as_slice(), NAMES.as_slice())?;
if v % 1000 != 0 {
write!(f, "{}ns", v % 1_000_000_000)?;
} else if v % 1_000_000 != 0 {
write!(f, "{}µs", (v % 1_000_000_000) / 1000)?;
} else if v % 1_000_000_000 != 0 {
write!(f, "{}ms", (v % 1_000_000_000) / 1_000_000)?;
}
Ok(())
}

#[cfg(feature = "dtype-duration")]
fn fmt_duration_us(f: &mut Formatter<'_>, v: i64) -> fmt::Result {
if v == 0 {
return write!(f, "0µs");
}
format_duration(f, v, SIZES_US.as_slice(), NAMES.as_slice())?;
if v % 1000 != 0 {
write!(f, "{}µs", (v % 1_000_000))?;
} else if v % 1_000_000 != 0 {
write!(f, "{}ms", (v % 1_000_000) / 1_000)?;
}
Ok(())
}

#[cfg(feature = "dtype-duration")]
fn fmt_duration_ms(f: &mut Formatter<'_>, v: i64) -> fmt::Result {
if v == 0 {
return write!(f, "0ms");
}
format_duration(f, v, SIZES_MS.as_slice(), NAMES.as_slice())?;
if v % 1_000 != 0 {
write!(f, "{}ms", (v % 1_000))?;
return match unit {
TimeUnit::Nanoseconds => "0ns".to_string(),
TimeUnit::Microseconds => "0µs".to_string(),
TimeUnit::Milliseconds => "0ms".to_string(),
};
}
Ok(())
}

#[cfg(feature = "dtype-duration")]
fn format_duration(f: &mut Formatter, v: i64, sizes: &[i64], names: &[&str]) -> fmt::Result {
for i in 0..4 {
let sizes = match unit {
TimeUnit::Nanoseconds => SIZES_NS.as_slice(),
TimeUnit::Microseconds => SIZES_US.as_slice(),
TimeUnit::Milliseconds => SIZES_MS.as_slice(),
};
let mut s = String::with_capacity(32);
for (i, &size) in sizes.iter().enumerate() {
let whole_num = if i == 0 {
v / sizes[i]
v / size
} else {
(v % sizes[i - 1]) / sizes[i]
(v % sizes[i - 1]) / size
};
if whole_num <= -1 || whole_num >= 1 {
write!(f, "{}{}", whole_num, names[i])?;
if v % sizes[i] != 0 {
write!(f, " ")?;
if whole_num != 0 {
s.push_str(&format!("{}{}", whole_num, NAMES[i]));
if v % size != 0 {
s.push(' ');
}
}
}
Ok(())
match unit {
TimeUnit::Nanoseconds => {
let ns = v % 1_000_000_000;
if ns != 0 {
s.push_str(&format!("{}ns", ns));
} else {
let us = ns / 1_000;
if us != 0 {
s.push_str(&format!("{}µs", us));
} else {
s.push_str(&format!("{}ms", ns / 1_000_000));
}
}
},
TimeUnit::Microseconds => {
let us = v % 1_000_000;
if us != 0 {
s.push_str(&format!("{}µs", us));
} else {
s.push_str(&format!("{}ms", us / 1_000));
}
},
TimeUnit::Milliseconds => {
let ms = v % 1_000;
if ms != 0 {
s.push_str(&format!("{}ms", ms));
}
},
}
s
}

fn format_blob(f: &mut Formatter<'_>, bytes: &[u8]) -> fmt::Result {
Expand Down Expand Up @@ -1087,11 +1087,7 @@ impl Display for AnyValue<'_> {
fmt_datetime(f, *v, *tu, tz.as_ref().map(|v| v.as_ref()))
},
#[cfg(feature = "dtype-duration")]
AnyValue::Duration(v, tu) => match tu {
TimeUnit::Nanoseconds => fmt_duration_ns(f, *v),
TimeUnit::Microseconds => fmt_duration_us(f, *v),
TimeUnit::Milliseconds => fmt_duration_ms(f, *v),
},
AnyValue::Duration(v, tu) => write!(f, "{}", fmt_duration_string(*v, *tu)),
#[cfg(feature = "dtype-time")]
AnyValue::Time(_) => {
let nt: chrono::NaiveTime = self.into();
Expand Down Expand Up @@ -1221,7 +1217,7 @@ impl Series {

#[inline]
#[cfg(feature = "dtype-decimal")]
pub fn fmt_decimal(f: &mut Formatter<'_>, v: i128, scale: usize) -> fmt::Result {
fn fmt_decimal(f: &mut Formatter<'_>, v: i128, scale: usize) -> fmt::Result {
use arrow::compute::decimal::format_decimal;

let trim_zeros = get_trim_decimal_zeros();
Expand Down
54 changes: 54 additions & 0 deletions py-polars/tests/unit/datatypes/test_duration.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,60 @@ def test_duration_cum_sum() -> None:
assert df.schema["A"].is_(duration_dtype) is False


def test_duration_cast() -> None:
durations = [
timedelta(days=180, seconds=56789, microseconds=987654),
timedelta(days=0, seconds=64875, microseconds=8884),
timedelta(days=2, hours=23, seconds=4975, milliseconds=1),
timedelta(hours=1, seconds=1, milliseconds=1, microseconds=1),
timedelta(seconds=-42, milliseconds=-42),
None,
]

df = pl.DataFrame({"td": durations}, schema={"td": pl.Duration("us")})
df_cast = df.select(
td_ms=pl.col("td").cast(pl.Duration("ms")),
td_str=pl.col("td").cast(pl.String),
td_int=pl.col("td").cast(pl.Int64),
)
assert df_cast.schema == {
"td_ms": pl.Duration(time_unit="ms"),
"td_str": pl.Utf8,
"td_int": pl.Int64,
}

expected = pl.DataFrame(
{
"td_ms": [
timedelta(days=180, seconds=56789, milliseconds=987),
timedelta(days=0, seconds=64875, milliseconds=8),
timedelta(days=2, hours=23, seconds=4975, milliseconds=1),
timedelta(hours=1, seconds=1, milliseconds=1),
timedelta(seconds=-42, milliseconds=-42),
None,
],
"td_str": [
"180d 15h 46m 29s 987654µs",
"18h 1m 15s 8884µs",
"3d 22m 55s 1000µs",
"1h 1s 1001µs",
"-42s -42000µs",
None,
],
"td_int": [
15608789987654,
64875008884,
260575001000,
3601001001,
-42042000,
None,
],
},
schema_overrides={"td_ms": pl.Duration(time_unit="ms")},
)
assert_frame_equal(expected, df_cast)


def test_duration_std_var() -> None:
df = pl.DataFrame(
{"duration": [1000, 5000, 3000]}, schema={"duration": pl.Duration}
Expand Down
102 changes: 51 additions & 51 deletions py-polars/tests/unit/interop/test_interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,13 +412,13 @@ def test_dataframe_from_repr() -> None:
pl.DataFrame,
pl.from_repr(
"""
┌─────┬─────┬─────┬─────┬─────┬───────┐
│ id ┆ q1 ┆ q2 ┆ q3 ┆ q4 ┆ total │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i8 ┆ i16 ┆ i32 ┆ i64 ┆ f64 │
╞═════╪═════╪═════╪═════╪═════╪═══════╡
└─────┴─────┴─────┴─────┴─────┴───────┘
"""
┌─────┬─────┬─────┬─────┬─────┬───────┐
│ id ┆ q1 ┆ q2 ┆ q3 ┆ q4 ┆ total │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i8 ┆ i16 ┆ i32 ┆ i64 ┆ f64 │
╞═════╪═════╪═════╪═════╪═════╪═══════╡
└─────┴─────┴─────┴─────┴─────┴───────┘
"""
),
)
assert df.shape == (0, 6)
Expand All @@ -437,11 +437,11 @@ def test_dataframe_from_repr() -> None:
pl.DataFrame,
pl.from_repr(
"""
┌──────┬───────┐
│ misc ┆ other │
╞══════╪═══════╡
└──────┴───────┘
"""
┌──────┬───────┐
│ misc ┆ other │
╞══════╪═══════╡
└──────┴───────┘
"""
),
)
assert_frame_equal(df, pl.DataFrame(schema={"misc": pl.String, "other": pl.String}))
Expand Down Expand Up @@ -472,17 +472,17 @@ def test_dataframe_from_repr() -> None:
pl.DataFrame,
pl.from_repr(
"""
# >>> Missing cols with old-style ellipsis, nulls, commented out
# ┌────────────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┬──────┐
# │ dt ┆ c1 ┆ c2 ┆ c3 ┆ ... ┆ c96 ┆ c97 ┆ c98 ┆ c99 │
# │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
# │ date ┆ i32 ┆ i32 ┆ i32 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
# ╞════════════╪═════╪═════╪═════╪═════╪═════╪═════╪═════╪══════╡
# │ 2023-03-25 ┆ 1 ┆ 2 ┆ 3 ┆ ... ┆ 96 ┆ 97 ┆ 98 ┆ 99 │
# │ 1999-12-31 ┆ 3 ┆ 6 ┆ 9 ┆ ... ┆ 288 ┆ 291 ┆ 294 ┆ null │
# │ null ┆ 9 ┆ 18 ┆ 27 ┆ ... ┆ 864 ┆ 873 ┆ 882 ┆ 891 │
# └────────────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┴──────┘
"""
# >>> Missing cols with old-style ellipsis, nulls, commented out
# ┌────────────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┬──────┐
# │ dt ┆ c1 ┆ c2 ┆ c3 ┆ ... ┆ c96 ┆ c97 ┆ c98 ┆ c99 │
# │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
# │ date ┆ i32 ┆ i32 ┆ i32 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
# ╞════════════╪═════╪═════╪═════╪═════╪═════╪═════╪═════╪══════╡
# │ 2023-03-25 ┆ 1 ┆ 2 ┆ 3 ┆ ... ┆ 96 ┆ 97 ┆ 98 ┆ 99 │
# │ 1999-12-31 ┆ 3 ┆ 6 ┆ 9 ┆ ... ┆ 288 ┆ 291 ┆ 294 ┆ null │
# │ null ┆ 9 ┆ 18 ┆ 27 ┆ ... ┆ 864 ┆ 873 ┆ 882 ┆ 891 │
# └────────────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┴──────┘
"""
),
)
assert df.schema == {
Expand All @@ -505,15 +505,15 @@ def test_dataframe_from_repr() -> None:
pl.DataFrame,
pl.from_repr(
"""
# >>> no dtypes:
# ┌────────────┬──────┐
# │ dt ┆ c99 │
# ╞════════════╪══════╡
# │ 2023-03-25 ┆ 99 │
# │ 1999-12-31 ┆ null │
# │ null ┆ 891 │
# └────────────┴──────┘
"""
# >>> no dtypes:
# ┌────────────┬──────┐
# │ dt ┆ c99 │
# ╞════════════╪══════╡
# │ 2023-03-25 ┆ 99 │
# │ 1999-12-31 ┆ null │
# │ null ┆ 891 │
# └────────────┴──────┘
"""
),
)
assert df.schema == {"dt": pl.Date, "c99": pl.Int64}
Expand All @@ -527,25 +527,25 @@ def test_dataframe_from_repr() -> None:
pl.DataFrame,
pl.from_repr(
"""
In [2]: with pl.Config() as cfg:
...: pl.Config.set_tbl_formatting("UTF8_FULL", rounded_corners=True)
...: print(df)
...:
shape: (1, 5)
╭───────────┬────────────┬───┬───────┬────────────────────────────────╮
│ source_ac ┆ source_cha ┆ … ┆ ident ┆ timestamp │
│ tor_id ┆ nnel_id ┆ ┆ --- ┆ --- │
│ --- ┆ --- ┆ ┆ str ┆ datetime[μs, Asia/Tokyo] │
│ i32 ┆ i64 ┆ ┆ ┆ │
╞═══════════╪════════════╪═══╪═══════╪════════════════════════════════╡
│ 123456780 ┆ 9876543210 ┆ … ┆ a:b:c ┆ 2023-03-25 10:56:59.663053 JST │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ … ┆ … ┆ … ┆ … ┆ … │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 803065983 ┆ 2055938745 ┆ … ┆ x:y:z ┆ 2023-03-25 12:38:18.050545 JST │
╰───────────┴────────────┴───┴───────┴────────────────────────────────╯
# "Een fluitje van een cent..." :)
"""
In [2]: with pl.Config() as cfg:
...: pl.Config.set_tbl_formatting("UTF8_FULL", rounded_corners=True)
...: print(df)
...:
shape: (1, 5)
╭───────────┬────────────┬───┬───────┬────────────────────────────────╮
│ source_ac ┆ source_cha ┆ … ┆ ident ┆ timestamp │
│ tor_id ┆ nnel_id ┆ ┆ --- ┆ --- │
│ --- ┆ --- ┆ ┆ str ┆ datetime[μs, Asia/Tokyo] │
│ i32 ┆ i64 ┆ ┆ ┆ │
╞═══════════╪════════════╪═══╪═══════╪════════════════════════════════╡
│ 123456780 ┆ 9876543210 ┆ … ┆ a:b:c ┆ 2023-03-25 10:56:59.663053 JST │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ … ┆ … ┆ … ┆ … ┆ … │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 803065983 ┆ 2055938745 ┆ … ┆ x:y:z ┆ 2023-03-25 12:38:18.050545 JST │
╰───────────┴────────────┴───┴───────┴────────────────────────────────╯
# "Een fluitje van een cent..." :)
"""
),
)
assert df.shape == (2, 4)
Expand Down

0 comments on commit a3de5d0

Please sign in to comment.