From 646cbf63339fd5a5a0a1628389f059fed4d7ad40 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Fri, 8 Nov 2024 18:40:30 +0400 Subject: [PATCH] furter optimise duration format func, add explanatory comments --- crates/polars-core/src/fmt.rs | 67 ++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/crates/polars-core/src/fmt.rs b/crates/polars-core/src/fmt.rs index 95c07e9191259..9b0829eccf374 100644 --- a/crates/polars-core/src/fmt.rs +++ b/crates/polars-core/src/fmt.rs @@ -973,10 +973,10 @@ const DURATION_PARTS: [&str; 4] = ["d", "h", "m", "s"]; const ISO_DURATION_PARTS: [&str; 4] = ["D", "H", "M", "S"]; #[cfg(feature = "dtype-duration")] const SIZES_NS: [i64; 4] = [ - 86_400_000_000_000, - 3_600_000_000_000, - 60_000_000_000, - 1_000_000_000, + 86_400_000_000_000, // per day + 3_600_000_000_000, // per hour + 60_000_000_000, // per minute + 1_000_000_000, // per second ]; #[cfg(feature = "dtype-duration")] const SIZES_US: [i64; 4] = [86_400_000_000, 3_600_000_000, 60_000_000, 1_000_000]; @@ -985,6 +985,17 @@ const SIZES_MS: [i64; 4] = [86_400_000, 3_600_000, 60_000, 1_000]; #[cfg(feature = "dtype-duration")] pub fn fmt_duration_string(mut v: i64, unit: TimeUnit, iso: bool) -> String { + // take the physical/integer duration value and return either a human-readable + // version of the duration or an ISO8601 duration string. + // + // Polars: "3d 22m 55s 1ms" + // ISO: "P3DT22M55.001S" + // + // The parts (days, hours, minutes, seconds) occur in the same order in + // each string, so we use the same code to generate each of them, with + // only the separators and the 'seconds' part differing. + // + // Ref: https://en.wikipedia.org/wiki/ISO_8601#Durations if v == 0 { return if iso { "PT0S".to_string() @@ -1006,13 +1017,17 @@ pub fn fmt_duration_string(mut v: i64, unit: TimeUnit, iso: bool) -> String { let mut buffer = itoa::Buffer::new(); if iso { if v < 0 { - s.push_str("-P"); + // negative sign before "P" indicates that the entire ISO duration is negative. + // the Polars version applies a negative sign to each *individual* part. + write!(s, "-P").unwrap(); v = v.abs() } else { - s.push('P'); + write!(s, "P").unwrap(); } }; + // iterate over dtype-specific sizes to appropriately scale + // and extract days, hours, minutes, seconds parts. for (i, &size) in sizes.iter().enumerate() { let whole_num = if i == 0 { v / size @@ -1020,61 +1035,63 @@ pub fn fmt_duration_string(mut v: i64, unit: TimeUnit, iso: bool) -> String { (v % sizes[i - 1]) / size }; if whole_num != 0 || (iso && i == 3) { - s.push_str(buffer.format(whole_num)); + write!(s, "{}", buffer.format(whole_num)).unwrap(); if iso { + // (index 3 => 'seconds' part): the ISO version writes + // fractional seconds, not nano/micro/milliseconds if i == 3 { let secs = match unit { TimeUnit::Nanoseconds => format!(".{:09}", v % size), TimeUnit::Microseconds => format!(".{:06}", v % size), TimeUnit::Milliseconds => format!(".{:03}", v % size), }; - s.push_str(secs.trim_end_matches('0')); + write!(s, "{}", secs.trim_end_matches('0')).unwrap(); } - s.push_str(ISO_DURATION_PARTS[i]); + write!(s, "{}", ISO_DURATION_PARTS[i]).unwrap(); + + // (index 0 => 'days' part): after writing days above (if non-zero) + // the ISO duration string requires a `T` before the time part if i == 0 { - s.push('T'); + write!(s, "T").unwrap(); } } else { - s.push_str(DURATION_PARTS[i]); + write!(s, "{}", DURATION_PARTS[i]).unwrap(); if v % size != 0 { - s.push(' '); + write!(s, " ").unwrap(); } } } else if iso && i == 0 { - s.push('T'); + write!(s, "T").unwrap(); } } if iso { + // ISO version has already written fractional seconds (if non-zero) if s.ends_with('T') { s.pop(); } } else { + // Polars version writes out fractional seconds separately as + // integer nano/micro/millseconds match unit { TimeUnit::Nanoseconds => { if v % 1000 != 0 { - s.push_str(buffer.format(v % 1_000_000_000)); - s.push_str("ns"); + write!(s, "{}ns", buffer.format(v % 1_000_000_000)).unwrap(); } else if v % 1_000_000 != 0 { - s.push_str(buffer.format((v % 1_000_000_000) / 1000)); - s.push_str("µs"); + write!(s, "{}µs", buffer.format((v % 1_000_000_000) / 1000)).unwrap(); } else if v % 1_000_000_000 != 0 { - s.push_str(buffer.format((v % 1_000_000_000) / 1_000_000)); - s.push_str("ms"); + write!(s, "{}ms", buffer.format((v % 1_000_000_000) / 1_000_000)).unwrap(); } }, TimeUnit::Microseconds => { if v % 1000 != 0 { - s.push_str(buffer.format(v % 1_000_000)); - s.push_str("µs"); + write!(s, "{}µs", buffer.format(v % 1_000_000)).unwrap(); } else if v % 1_000_000 != 0 { - s.push_str(buffer.format((v % 1_000_000) / 1_000)); - s.push_str("ms"); + write!(s, "{}ms", buffer.format((v % 1_000_000) / 1_000)).unwrap(); } }, TimeUnit::Milliseconds => { if v % 1000 != 0 { - s.push_str(buffer.format(v % 1_000)); - s.push_str("ms"); + write!(s, "{}ms", buffer.format(v % 1_000)).unwrap(); } }, }