Skip to content

Commit

Permalink
feat(rust, python): let "ambiguous" take "null" value too
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli committed Mar 10, 2024
1 parent 06116df commit 80b07d8
Show file tree
Hide file tree
Showing 12 changed files with 111 additions and 43 deletions.
24 changes: 15 additions & 9 deletions crates/polars-arrow/src/legacy/kernels/time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use polars_error::{polars_bail, PolarsError};
pub enum Ambiguous {
Earliest,
Latest,
Null,
Raise,
}
impl FromStr for Ambiguous {
Expand All @@ -21,8 +22,9 @@ impl FromStr for Ambiguous {
"earliest" => Ok(Ambiguous::Earliest),
"latest" => Ok(Ambiguous::Latest),
"raise" => Ok(Ambiguous::Raise),
"null" => Ok(Ambiguous::Null),
s => polars_bail!(InvalidOperation:
"Invalid argument {}, expected one of: \"earliest\", \"latest\", \"raise\"", s
"Invalid argument {}, expected one of: \"earliest\", \"latest\", \"null\", \"raise\"", s
),
}
}
Expand All @@ -34,13 +36,14 @@ pub fn convert_to_naive_local(
to_tz: &Tz,
ndt: NaiveDateTime,
ambiguous: Ambiguous,
) -> PolarsResult<NaiveDateTime> {
) -> PolarsResult<Option<NaiveDateTime>> {
let ndt = from_tz.from_utc_datetime(&ndt).naive_local();
match to_tz.from_local_datetime(&ndt) {
LocalResult::Single(dt) => Ok(dt.naive_utc()),
LocalResult::Single(dt) => Ok(Some(dt.naive_utc())),
LocalResult::Ambiguous(dt_earliest, dt_latest) => match ambiguous {
Ambiguous::Earliest => Ok(dt_earliest.naive_utc()),
Ambiguous::Latest => Ok(dt_latest.naive_utc()),
Ambiguous::Earliest => Ok(Some(dt_earliest.naive_utc())),
Ambiguous::Latest => Ok(Some(dt_latest.naive_utc())),
Ambiguous::Null => Ok(None),
Ambiguous::Raise => {
polars_bail!(ComputeError: "datetime '{}' is ambiguous in time zone '{}'. Please use `ambiguous` to tell how it should be localized.", ndt, to_tz)
},
Expand All @@ -52,19 +55,22 @@ pub fn convert_to_naive_local(
}
}

/// Same as convert_to_naive_local, but return `None` instead
/// raising - in some cases this can be used to save a string allocation.
#[cfg(feature = "timezones")]
pub fn convert_to_naive_local_opt(
from_tz: &Tz,
to_tz: &Tz,
ndt: NaiveDateTime,
ambiguous: Ambiguous,
) -> Option<NaiveDateTime> {
) -> Option<Option<NaiveDateTime>> {
let ndt = from_tz.from_utc_datetime(&ndt).naive_local();
match to_tz.from_local_datetime(&ndt) {
LocalResult::Single(dt) => Some(dt.naive_utc()),
LocalResult::Single(dt) => Some(Some(dt.naive_utc())),
LocalResult::Ambiguous(dt_earliest, dt_latest) => match ambiguous {
Ambiguous::Earliest => Some(dt_earliest.naive_utc()),
Ambiguous::Latest => Some(dt_latest.naive_utc()),
Ambiguous::Earliest => Some(Some(dt_earliest.naive_utc())),
Ambiguous::Latest => Some(Some(dt_latest.naive_utc())),
Ambiguous::Null => Some(None),
Ambiguous::Raise => None,
},
LocalResult::None => None,
Expand Down
33 changes: 22 additions & 11 deletions crates/polars-ops/src/chunked_array/datetime/replace_time_zone.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,27 +41,38 @@ pub fn replace_time_zone(
};
let out = match ambiguous.len() {
1 => match unsafe { ambiguous.get_unchecked(0) } {
Some(ambiguous) => datetime.0.try_apply(|timestamp| {
let ndt = timestamp_to_datetime(timestamp);
Ok(datetime_to_timestamp(convert_to_naive_local(
&from_tz,
&to_tz,
ndt,
Ambiguous::from_str(ambiguous)?,
)?))
}),
Some(ambiguous) => {
let iter = datetime.0.downcast_iter().map(|arr| {
let element_iter = arr.iter().map(|timestamp_opt| match timestamp_opt {
Some(timestamp) => {
let ndt = timestamp_to_datetime(*timestamp);
let res = convert_to_naive_local(
&from_tz,
&to_tz,
ndt,
Ambiguous::from_str(ambiguous)?,
)?;
Ok::<_, PolarsError>(res.map(datetime_to_timestamp))
},
None => Ok(None),
});
element_iter.try_collect_arr()
});
ChunkedArray::try_from_chunk_iter(datetime.0.name(), iter)
},
_ => Ok(datetime.0.apply(|_| None)),
},
_ => try_binary_elementwise(datetime, ambiguous, |timestamp_opt, ambiguous_opt| {
match (timestamp_opt, ambiguous_opt) {
(Some(timestamp), Some(ambiguous)) => {
let ndt = timestamp_to_datetime(timestamp);
Ok(Some(datetime_to_timestamp(convert_to_naive_local(
Ok(convert_to_naive_local(
&from_tz,
&to_tz,
ndt,
Ambiguous::from_str(ambiguous)?,
)?)))
)?
.map(datetime_to_timestamp))
},
_ => Ok(None),
}
Expand Down
4 changes: 3 additions & 1 deletion crates/polars-time/src/month_start.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ pub(crate) fn roll_backward(
let ndt = NaiveDateTime::new(date, time);
let t = match tz {
#[cfg(feature = "timezones")]
Some(tz) => datetime_to_timestamp(try_localize_datetime(ndt, tz, Ambiguous::Raise)?),
Some(tz) => datetime_to_timestamp(
try_localize_datetime(ndt, tz, Ambiguous::Raise)?.expect("we didn't use Ambiguous::Null"),
),
_ => datetime_to_timestamp(ndt),
};
Ok(t)
Expand Down
13 changes: 10 additions & 3 deletions crates/polars-time/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,20 @@ use chrono::TimeZone;
#[cfg(feature = "timezones")]
use polars_core::prelude::PolarsResult;

/// Localize datetime according to given time zone.
///
/// e.g. '2021-01-01 03:00' -> '2021-01-01 03:00CDT'
///
/// Note: this may only return `Ok(None)` if ambiguous is Ambiguous::Null.
/// Otherwise, it will either return `Ok(Some(NaiveDateTime))` or `PolarsError`.
/// Therefore, calling `try_localize_datetime(..., Ambiguous::Raise)?.unwrap()`
/// is safe, and will never panic.
#[cfg(feature = "timezones")]
pub(crate) fn try_localize_datetime(
ndt: NaiveDateTime,
tz: &Tz,
ambiguous: Ambiguous,
) -> PolarsResult<NaiveDateTime> {
// e.g. '2021-01-01 03:00' -> '2021-01-01 03:00CDT'
) -> PolarsResult<Option<NaiveDateTime>> {
convert_to_naive_local(&chrono_tz::UTC, tz, ndt, ambiguous)
}

Expand All @@ -24,7 +31,7 @@ pub(crate) fn localize_datetime_opt(
ndt: NaiveDateTime,
tz: &Tz,
ambiguous: Ambiguous,
) -> Option<NaiveDateTime> {
) -> Option<Option<NaiveDateTime>> {
// e.g. '2021-01-01 03:00' -> '2021-01-01 03:00CDT'
convert_to_naive_local_opt(&chrono_tz::UTC, tz, ndt, ambiguous)
}
Expand Down
41 changes: 24 additions & 17 deletions crates/polars-time/src/windows/duration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ impl Duration {
)
}

/// Localize result to given time zone., respecting DST fold of original datetime.
/// Localize result to given time zone, respecting DST fold of original datetime.
/// For example, 2022-11-06 01:30:00 CST truncated by 1 hour becomes 2022-11-06 01:00:00 CST,
/// whereas 2022-11-06 01:30:00 CDT truncated by 1 hour becomes 2022-11-06 01:00:00 CDT.
///
Expand All @@ -460,16 +460,24 @@ impl Duration {
tz: &Tz,
) -> PolarsResult<NaiveDateTime> {
match localize_datetime_opt(result_dt_local, tz, Ambiguous::Raise) {
Some(dt) => Ok(dt),
Some(dt) => Ok(dt.expect("we didn't use Ambiguous::Null")),
None => {
if try_localize_datetime(original_dt_local, tz, Ambiguous::Earliest)?
.expect("we didn't use Ambiguous::Null")
== original_dt_utc
{
try_localize_datetime(result_dt_local, tz, Ambiguous::Earliest)
Ok(
try_localize_datetime(result_dt_local, tz, Ambiguous::Earliest)?
.expect("we didn't use Ambiguous::Null"),
)
} else if try_localize_datetime(original_dt_local, tz, Ambiguous::Latest)?
.expect("we didn't use Ambiguous::Null")
== original_dt_utc
{
try_localize_datetime(result_dt_local, tz, Ambiguous::Latest)
Ok(
try_localize_datetime(result_dt_local, tz, Ambiguous::Latest)?
.expect("we didn't use Ambiguous::Null"),
)
} else {
unreachable!()
}
Expand Down Expand Up @@ -785,9 +793,10 @@ impl Duration {
new_t = match tz {
#[cfg(feature = "timezones")]
// for UTC, use fastpath below (same as naive)
Some(tz) if tz != &chrono_tz::UTC => {
datetime_to_timestamp(try_localize_datetime(dt, tz, Ambiguous::Raise)?)
},
Some(tz) if tz != &chrono_tz::UTC => datetime_to_timestamp(
try_localize_datetime(dt, tz, Ambiguous::Raise)?
.expect("we didn't use Ambiguous::Null"),
),
_ => datetime_to_timestamp(dt),
};
}
Expand All @@ -801,11 +810,10 @@ impl Duration {
new_t =
datetime_to_timestamp(unlocalize_datetime(timestamp_to_datetime(t), tz));
new_t += if d.negative { -t_weeks } else { t_weeks };
new_t = datetime_to_timestamp(try_localize_datetime(
timestamp_to_datetime(new_t),
tz,
Ambiguous::Raise,
)?);
new_t = datetime_to_timestamp(
try_localize_datetime(timestamp_to_datetime(new_t), tz, Ambiguous::Raise)?
.expect("we didn't use Ambiguous::Null"),
);
},
_ => new_t += if d.negative { -t_weeks } else { t_weeks },
};
Expand All @@ -820,11 +828,10 @@ impl Duration {
new_t =
datetime_to_timestamp(unlocalize_datetime(timestamp_to_datetime(t), tz));
new_t += if d.negative { -t_days } else { t_days };
new_t = datetime_to_timestamp(try_localize_datetime(
timestamp_to_datetime(new_t),
tz,
Ambiguous::Raise,
)?);
new_t = datetime_to_timestamp(
try_localize_datetime(timestamp_to_datetime(new_t), tz, Ambiguous::Raise)?
.expect("we didn't use Ambiguous::Null"),
);
},
_ => new_t += if d.negative { -t_days } else { t_days },
};
Expand Down
1 change: 1 addition & 0 deletions py-polars/polars/expr/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1620,6 +1620,7 @@ def replace_time_zone(
- `'raise'` (default): raise
- `'earliest'`: use the earliest datetime
- `'latest'`: use the latest datetime
- `'null'`: set to null
Examples
--------
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def to_datetime(
- `'raise'` (default): raise
- `'earliest'`: use the earliest datetime
- `'latest'`: use the latest datetime
- `'null'`: set to null
Examples
--------
Expand Down Expand Up @@ -253,6 +254,7 @@ def strptime(
- `'raise'` (default): raise
- `'earliest'`: use the earliest datetime
- `'latest'`: use the latest datetime
- `'null'`: set to null
Notes
-----
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/functions/as_datatype.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def datetime_(
- `'raise'` (default): raise
- `'earliest'`: use the earliest datetime
- `'latest'`: use the latest datetime
- `'null'`: set to null
Returns
-------
Expand Down
1 change: 1 addition & 0 deletions py-polars/polars/series/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1157,6 +1157,7 @@ def replace_time_zone(
- `'raise'` (default): raise
- `'earliest'`: use the earliest datetime
- `'latest'`: use the latest datetime
- `'null'`: set to null
Examples
--------
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/series/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ def to_datetime(
- `'raise'` (default): raise
- `'earliest'`: use the earliest datetime
- `'latest'`: use the latest datetime
- `'null'`: set to null
Examples
--------
Expand Down Expand Up @@ -237,6 +238,7 @@ def strptime(
- `'raise'` (default): raise
- `'earliest'`: use the earliest datetime
- `'latest'`: use the latest datetime
- `'null'`: set to null
Notes
-----
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/type_aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@
] # ListToStructWidthStrategy

# The following have no equivalent on the Rust side
Ambiguous: TypeAlias = Literal["earliest", "latest", "raise"]
Ambiguous: TypeAlias = Literal["earliest", "latest", "raise", "null"]
ConcatMethod = Literal[
"vertical",
"vertical_relaxed",
Expand Down
29 changes: 29 additions & 0 deletions py-polars/tests/unit/datatypes/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -1670,6 +1670,35 @@ def test_replace_time_zone_sortedness_expressions(
assert result["ts"].flags["SORTED_ASC"] == expected_sortedness


def test_replace_time_zone_ambiguous_null() -> None:
df = pl.DataFrame(
{
"a": [datetime(2020, 10, 25, 1)] * 3,
"b": ["earliest", "latest", "null"],
}
)
# expression containing 'null'
result = df.select(
pl.col("a").dt.replace_time_zone("Europe/London", ambiguous=pl.col("b"))
)["a"]
expected = [
datetime(2020, 10, 25, 1, fold=0, tzinfo=ZoneInfo("Europe/London")),
datetime(2020, 10, 25, 1, fold=1, tzinfo=ZoneInfo("Europe/London")),
None,
]
assert result[0] == expected[0]
assert result[1] == expected[1]
assert result[2] == expected[2]

# single 'null' value
result = df.select(
pl.col("a").dt.replace_time_zone("Europe/London", ambiguous="null")
)["a"]
assert result[0] is None
assert result[1] is None
assert result[2] is None


def test_use_earliest_deprecation() -> None:
# strptime
with pytest.warns(
Expand Down

0 comments on commit 80b07d8

Please sign in to comment.