Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial timezone support #903

Merged
merged 25 commits into from
May 5, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
5baf5a3
starting to work
billylanchantin Apr 19, 2024
a5702dc
sort features
billylanchantin Apr 20, 2024
14f2f76
pass in atom
billylanchantin Apr 21, 2024
e13ebfa
better timeunit handling
billylanchantin Apr 21, 2024
9efcfbb
timezones feature
billylanchantin Apr 21, 2024
b9223fd
phase out `precision_to_timeunit`
billylanchantin Apr 21, 2024
0f9b239
datetime key additions/shuffling
billylanchantin Apr 21, 2024
763be59
formatting/linting
billylanchantin Apr 21, 2024
17e510b
start to move to a naive_datetime dtype
billylanchantin Apr 27, 2024
839e141
get datetimes back out (incorrectly)
billylanchantin Apr 27, 2024
0e07aa9
start to get timezones working
billylanchantin Apr 27, 2024
d982f13
offsets can be negative
billylanchantin Apr 27, 2024
159c166
add tz dep
billylanchantin Apr 27, 2024
d9f4564
working non-UTC timezone test
billylanchantin Apr 27, 2024
8acfcd6
get duration tests passing
billylanchantin Apr 27, 2024
dc8b624
majority of tests passing
billylanchantin Apr 27, 2024
fab6028
fix unexpected datetime parsing issue
billylanchantin Apr 28, 2024
d21baf8
make enforce_highest_precision work with different sized tuples
billylanchantin Apr 28, 2024
a06716e
add a datetime section to some duration tests
billylanchantin Apr 28, 2024
9114904
test mismatched timezones
billylanchantin Apr 28, 2024
306c789
make tz a test-only dep
billylanchantin Apr 28, 2024
5740aa9
save progress on From traits (not working!)
billylanchantin Apr 29, 2024
6c715f9
rename to s_from_list_naive_datetime
billylanchantin Apr 30, 2024
8f5be24
revert to `&'a str` type and add TODOs
billylanchantin Apr 30, 2024
c8ac63d
didn't mean to remove this
billylanchantin Apr 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
starting to work
  • Loading branch information
billylanchantin committed Apr 19, 2024
commit 5baf5a35194da14b82f9b9875d2e07f99613306b
2 changes: 1 addition & 1 deletion lib/explorer/polars_backend/native.ex
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ defmodule Explorer.PolarsBackend.Native do
def s_from_list_bool(_name, _val), do: err()
def s_from_list_date(_name, _val), do: err()
def s_from_list_time(_name, _val), do: err()
def s_from_list_datetime(_name, _val, _precision), do: err()
def s_from_list_datetime(_name, _val, _precision, _tz), do: err()
def s_from_list_duration(_name, _val, _precision), do: err()
def s_from_list_f32(_name, _val), do: err()
def s_from_list_f64(_name, _val), do: err()
Expand Down
78 changes: 59 additions & 19 deletions lib/explorer/polars_backend/shared.ex
Original file line number Diff line number Diff line change
Expand Up @@ -170,27 +170,67 @@ defmodule Explorer.PolarsBackend.Shared do
def from_list(list, dtype, name) when is_list(list) do
case dtype do
# Signed integers
{:s, 8} -> Native.s_from_list_s8(name, list)
{:s, 16} -> Native.s_from_list_s16(name, list)
{:s, 32} -> Native.s_from_list_s32(name, list)
{:s, 64} -> Native.s_from_list_s64(name, list)
{:s, 8} ->
Native.s_from_list_s8(name, list)

{:s, 16} ->
Native.s_from_list_s16(name, list)

{:s, 32} ->
Native.s_from_list_s32(name, list)

{:s, 64} ->
Native.s_from_list_s64(name, list)

# Unsigned integers
{:u, 8} -> Native.s_from_list_u8(name, list)
{:u, 16} -> Native.s_from_list_u16(name, list)
{:u, 32} -> Native.s_from_list_u32(name, list)
{:u, 64} -> Native.s_from_list_u64(name, list)
{:u, 8} ->
Native.s_from_list_u8(name, list)

{:u, 16} ->
Native.s_from_list_u16(name, list)

{:u, 32} ->
Native.s_from_list_u32(name, list)

{:u, 64} ->
Native.s_from_list_u64(name, list)

# Floats
{:f, 32} -> Native.s_from_list_f32(name, list)
{:f, 64} -> Native.s_from_list_f64(name, list)
:boolean -> Native.s_from_list_bool(name, list)
:string -> Native.s_from_list_str(name, list)
:category -> Native.s_from_list_categories(name, list)
:date -> Native.s_from_list_date(name, list)
:time -> Native.s_from_list_time(name, list)
{:datetime, precision} -> Native.s_from_list_datetime(name, list, Atom.to_string(precision))
{:duration, precision} -> Native.s_from_list_duration(name, list, Atom.to_string(precision))
:binary -> Native.s_from_list_binary(name, list)
:null -> Native.s_from_list_null(name, length(list))
{:f, 32} ->
Native.s_from_list_f32(name, list)

{:f, 64} ->
Native.s_from_list_f64(name, list)

:boolean ->
Native.s_from_list_bool(name, list)

:string ->
Native.s_from_list_str(name, list)

:category ->
Native.s_from_list_categories(name, list)

:date ->
Native.s_from_list_date(name, list)

:time ->
Native.s_from_list_time(name, list)

{:datetime, precision} ->
Native.s_from_list_datetime(name, list, Atom.to_string(precision), nil)

{:datetime, precision, tz} ->
Native.s_from_list_datetime(name, list, Atom.to_string(precision), tz)

{:duration, precision} ->
Native.s_from_list_duration(name, list, Atom.to_string(precision))

:binary ->
Native.s_from_list_binary(name, list)

:null ->
Native.s_from_list_null(name, length(list))
end
end

Expand Down
15 changes: 9 additions & 6 deletions lib/explorer/shared.ex
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,10 @@ defmodule Explorer.Shared do

defp infer_type(%Date{} = _item), do: :date
defp infer_type(%Time{} = _item), do: :time
defp infer_type(%DateTime{time_zone: tz} = _item), do: {:datetime, :microsecond, tz}
defp infer_type(%NaiveDateTime{} = _item), do: {:datetime, :microsecond}
defp infer_type(%Explorer.Duration{precision: precision} = _item), do: {:duration, precision}
defp infer_type(%_{} = item), do: raise(ArgumentError, "unsupported datatype: #{inspect(item)}")
defp infer_type(item) when is_integer(item), do: {:s, 64}
defp infer_type(item) when is_float(item) or item in @non_finite, do: {:f, 64}
defp infer_type(item) when is_boolean(item), do: :boolean
Expand Down Expand Up @@ -559,19 +561,20 @@ defmodule Explorer.Shared do
@doc """
Converts dtype to its string representation.
"""
def dtype_to_string({:datetime, :millisecond}), do: "datetime[ms]"
def dtype_to_string({:datetime, :microsecond}), do: "datetime[μs]"
def dtype_to_string({:datetime, :nanosecond}), do: "datetime[ns]"
def dtype_to_string({:duration, :millisecond}), do: "duration[ms]"
def dtype_to_string({:duration, :microsecond}), do: "duration[μs]"
def dtype_to_string({:duration, :nanosecond}), do: "duration[ns]"
def dtype_to_string({:datetime, p}), do: "datetime[#{precision(p)}]"
def dtype_to_string({:datetime, p, tz}), do: "datetime[#{precision(p)}, #{tz}]"
def dtype_to_string({:duration, p}), do: "duration[#{precision(p)}]"
def dtype_to_string({:list, dtype}), do: "list[" <> dtype_to_string(dtype) <> "]"
def dtype_to_string({:struct, fields}), do: "struct[#{length(fields)}]"
def dtype_to_string({:f, size}), do: "f" <> Integer.to_string(size)
def dtype_to_string({:s, size}), do: "s" <> Integer.to_string(size)
def dtype_to_string({:u, size}), do: "u" <> Integer.to_string(size)
def dtype_to_string(other) when is_atom(other), do: Atom.to_string(other)

defp precision(:millisecond), do: "ms"
defp precision(:microsecond), do: "μs"
defp precision(:nanosecond), do: "ns"

@threshold 0.77
@max_suggestions 5

Expand Down
116 changes: 99 additions & 17 deletions native/explorer/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ use std::str::FromStr;
#[cfg(feature = "aws")]
use polars::prelude::cloud::AmazonS3ConfigKey as S3Key;

#[cfg(feature = "timezones")]
use chrono_tz::Tz;

pub use ex_dtypes::*;

pub struct ExDataFrameRef(pub DataFrame);
Expand Down Expand Up @@ -257,7 +260,7 @@ fn time_unit_of_ex_duration(duration: &ExDuration) -> TimeUnit {

#[derive(NifStruct, Copy, Clone, Debug)]
#[module = "NaiveDateTime"]
pub struct ExDateTime {
pub struct ExNaiveDateTime {
pub calendar: Atom,
pub day: u32,
pub month: u32,
Expand All @@ -268,11 +271,23 @@ pub struct ExDateTime {
pub microsecond: (u32, u32),
}

#[derive(NifStruct, Copy, Clone, Debug)]
#[module = "DateTime"]
pub struct ExDateTime<'a> {
pub calendar: Atom,
pub day: u32,
pub hour: u32,
pub microsecond: (u32, u32),
pub minute: u32,
pub month: u32,
pub second: u32,
pub time_zone: &'a str,
pub year: i32,
}

pub use polars::export::arrow::temporal_conversions::date32_to_date as days_to_date;

/// Converts a microsecond i64 to a `NaiveDateTime`.
/// This is because when getting a timestamp, it might have negative values.
pub fn timestamp_to_datetime(microseconds: i64) -> NaiveDateTime {
pub fn timestamp_to_datetime_utc(microseconds: i64) -> DateTime<Utc> {
let sign = microseconds.signum();
let seconds = match sign {
-1 => microseconds / 1_000_000 - 1,
Expand All @@ -283,9 +298,13 @@ pub fn timestamp_to_datetime(microseconds: i64) -> NaiveDateTime {
_ => microseconds % 1_000_000,
};
let nanoseconds = remainder.abs() * 1_000;
DateTime::<Utc>::from_timestamp(seconds, nanoseconds.try_into().unwrap())
.expect("construct a UTC")
.naive_utc()
DateTime::<Utc>::from_timestamp(seconds, nanoseconds.try_into().unwrap()).expect("construct a UTC")
}

/// Converts a microsecond i64 to a `NaiveDateTime`.
/// This is because when getting a timestamp, it might have negative values.
pub fn timestamp_to_datetime(microseconds: i64) -> NaiveDateTime {
timestamp_to_datetime_utc(microseconds).naive_utc()
}

// Limit the number of digits in the microsecond part of a timestamp to 6.
Expand All @@ -299,14 +318,14 @@ fn microseconds_six_digits(microseconds: u32) -> u32 {
}
}

impl From<i64> for ExDateTime {
impl From<i64> for ExNaiveDateTime {
fn from(microseconds: i64) -> Self {
timestamp_to_datetime(microseconds).into()
}
}

impl From<ExDateTime> for i64 {
fn from(dt: ExDateTime) -> i64 {
impl From<ExNaiveDateTime> for i64 {
fn from(dt: ExNaiveDateTime) -> i64 {
let duration = NaiveDate::from_ymd_opt(dt.year, dt.month, dt.day)
.unwrap()
.and_hms_micro_opt(dt.hour, dt.minute, dt.second, dt.microsecond.0)
Expand All @@ -325,18 +344,18 @@ impl From<ExDateTime> for i64 {
}
}

impl From<ExDateTime> for NaiveDateTime {
fn from(dt: ExDateTime) -> NaiveDateTime {
impl From<ExNaiveDateTime> for NaiveDateTime {
fn from(dt: ExNaiveDateTime) -> NaiveDateTime {
NaiveDate::from_ymd_opt(dt.year, dt.month, dt.day)
.unwrap()
.and_hms_micro_opt(dt.hour, dt.minute, dt.second, dt.microsecond.0)
.unwrap()
}
}

impl From<NaiveDateTime> for ExDateTime {
impl From<NaiveDateTime> for ExNaiveDateTime {
fn from(dt: NaiveDateTime) -> Self {
ExDateTime {
ExNaiveDateTime {
calendar: atoms::calendar_iso_module(),
day: dt.day(),
month: dt.month(),
Expand All @@ -352,12 +371,75 @@ impl From<NaiveDateTime> for ExDateTime {
}
}

impl Literal for ExDateTime {
impl Literal for ExNaiveDateTime {
fn lit(self) -> Expr {
NaiveDateTime::from(self).lit()
}
}

impl From<i64> for ExDateTime<'_> {
fn from(microseconds: i64) -> Self {
timestamp_to_datetime_utc(microseconds).into()
}
}

impl From<ExDateTime<'_>> for i64 {
fn from(dt: ExDateTime<'_>) -> i64 {
let duration = NaiveDate::from_ymd_opt(dt.year, dt.month, dt.day)
.unwrap()
.and_hms_micro_opt(dt.hour, dt.minute, dt.second, dt.microsecond.0)
.unwrap()
.signed_duration_since(
NaiveDate::from_ymd_opt(1970, 1, 1)
.unwrap()
.and_hms_opt(0, 0, 0)
.unwrap(),
);

match duration.num_microseconds() {
Some(us) => us,
None => duration.num_milliseconds() * 1_000,
}
}
}

impl From<chrono::DateTime<chrono::Utc>> for ExDateTime<'static> {
fn from(dt: chrono::DateTime<chrono::Utc>) -> ExDateTime<'static> {
dt.timestamp_micros().into()
}
}

// impl<Tz: chrono::TimeZone + std::str::FromStr> From<ExDateTime<'_>> for DateTime<Tz> {
// fn from(dt: ExDateTime<'_>) -> DateTime<Tz> where <Tz as FromStr>::Err: core::fmt::Debug {
// let tz: Tz = dt.time_zone.parse().unwrap();
// tz.ymd(dt.year, dt.month, dt.day)
// .and_hms_micro_opt(dt.hour, dt.minute, dt.second, dt.microsecond.0)
// .unwrap()
// }
// }

// impl From<DateTime<T=Tz>> for ExDateTime<'_> {
// fn from(dt: DateTime<T=Tz>) -> Self {
// ExDateTime {
// calendar: atoms::calendar_iso_module(),
// day: dt.day(),
// hour: dt.hour(),
// microsecond: (microseconds_six_digits(dt.and_utc().timestamp_subsec_micros()), 6),
// minute: dt.minute(),
// month: dt.month(),
// second: dt.second(),
// time_zone: dt.time_zone().to_string(),
// year: dt.year(),
// }
// }
// }

// impl Literal for ExDateTime<'_> {
// fn lit(self) -> Expr {
// DateTime::from(self).lit()
// }
// }

#[derive(NifStruct, Copy, Clone, Debug)]
#[module = "Time"]
pub struct ExTime {
Expand Down Expand Up @@ -433,7 +515,7 @@ pub enum ExValidValue<'a> {
Str(&'a str),
Date(ExDate),
Time(ExTime),
DateTime(ExDateTime),
DateTime(ExNaiveDateTime),
Duration(ExDuration),
}

Expand Down Expand Up @@ -476,7 +558,7 @@ impl<'a> rustler::Decoder<'a> for ExValidValue<'a> {
Ok(ExValidValue::Date(date))
} else if let Ok(time) = term.decode::<ExTime>() {
Ok(ExValidValue::Time(time))
} else if let Ok(datetime) = term.decode::<ExDateTime>() {
} else if let Ok(datetime) = term.decode::<ExNaiveDateTime>() {
Ok(ExValidValue::DateTime(datetime))
} else if let Ok(duration) = term.decode::<ExDuration>() {
Ok(ExValidValue::Duration(duration))
Expand Down
4 changes: 2 additions & 2 deletions native/explorer/src/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use polars::prelude::{GetOutput, IntoSeries, Utf8JsonPathImpl};
use polars::series::Series;

use crate::datatypes::{
ExCorrelationMethod, ExDate, ExDateTime, ExDuration, ExRankMethod, ExSeriesDtype, ExValidValue,
ExCorrelationMethod, ExDate, ExNaiveDateTime, ExDuration, ExRankMethod, ExSeriesDtype, ExValidValue,
};
use crate::series::{cast_str_to_f64, ewm_opts, rolling_opts};
use crate::{ExDataFrame, ExExpr, ExSeries};
Expand Down Expand Up @@ -71,7 +71,7 @@ pub fn expr_date(date: ExDate) -> ExExpr {
}

#[rustler::nif]
pub fn expr_datetime(datetime: ExDateTime) -> ExExpr {
pub fn expr_datetime(datetime: ExNaiveDateTime) -> ExExpr {
ExExpr::new(datetime.lit())
}

Expand Down
Loading