Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial timezone support #903

Merged
merged 25 commits into from
May 5, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
5baf5a3
starting to work
billylanchantin Apr 19, 2024
a5702dc
sort features
billylanchantin Apr 20, 2024
14f2f76
pass in atom
billylanchantin Apr 21, 2024
e13ebfa
better timeunit handling
billylanchantin Apr 21, 2024
9efcfbb
timezones feature
billylanchantin Apr 21, 2024
b9223fd
phase out `precision_to_timeunit`
billylanchantin Apr 21, 2024
0f9b239
datetime key additions/shuffling
billylanchantin Apr 21, 2024
763be59
formatting/linting
billylanchantin Apr 21, 2024
17e510b
start to move to a naive_datetime dtype
billylanchantin Apr 27, 2024
839e141
get datetimes back out (incorrectly)
billylanchantin Apr 27, 2024
0e07aa9
start to get timezones working
billylanchantin Apr 27, 2024
d982f13
offsets can be negative
billylanchantin Apr 27, 2024
159c166
add tz dep
billylanchantin Apr 27, 2024
d9f4564
working non-UTC timezone test
billylanchantin Apr 27, 2024
8acfcd6
get duration tests passing
billylanchantin Apr 27, 2024
dc8b624
majority of tests passing
billylanchantin Apr 27, 2024
fab6028
fix unexpected datetime parsing issue
billylanchantin Apr 28, 2024
d21baf8
make enforce_highest_precision work with different sized tuples
billylanchantin Apr 28, 2024
a06716e
add a datetime section to some duration tests
billylanchantin Apr 28, 2024
9114904
test mismatched timezones
billylanchantin Apr 28, 2024
306c789
make tz a test-only dep
billylanchantin Apr 28, 2024
5740aa9
save progress on From traits (not working!)
billylanchantin Apr 29, 2024
6c715f9
rename to s_from_list_naive_datetime
billylanchantin Apr 30, 2024
8f5be24
revert to `&'a str` type and add TODOs
billylanchantin Apr 30, 2024
c8ac63d
didn't mean to remove this
billylanchantin Apr 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
get datetimes back out (incorrectly)
  • Loading branch information
billylanchantin committed Apr 27, 2024
commit 839e14102aaf6e31c36740f616ad3319839d507e
151 changes: 140 additions & 11 deletions native/explorer/src/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::{mem, slice};

use crate::atoms::{
self, calendar, day, hour, infinity, microsecond, millisecond, minute, month, nan, nanosecond,
neg_infinity, precision, second, value, year,
neg_infinity, precision, second, std_offset, time_zone, utc_offset, value, year, zone_abbr,
};
use crate::datatypes::{
days_to_date, time64ns_to_time, timestamp_to_datetime, ExSeries, ExSeriesRef,
Expand Down Expand Up @@ -103,7 +103,7 @@ fn date_series_to_list<'b>(s: &Series, env: Env<'b>) -> Result<Term<'b>, Explore
))
}

macro_rules! unsafe_encode_datetime {
macro_rules! unsafe_encode_naive_datetime {
($v: expr, $naive_datetime_struct_keys: ident, $calendar_iso_module: ident, $naive_datetime_module: ident, $env: ident) => {{
let dt = timestamp_to_datetime($v);
let microseconds = dt.and_utc().timestamp_subsec_micros();
Expand Down Expand Up @@ -158,7 +158,7 @@ fn naive_datetime_struct_keys(env: Env) -> [NIF_TERM; 9] {
}

#[inline]
fn datetime_to_microseconds(v: i64, time_unit: TimeUnit) -> i64 {
fn naive_datetime_to_microseconds(v: i64, time_unit: TimeUnit) -> i64 {
match time_unit {
TimeUnit::Milliseconds => v * 1000,
TimeUnit::Microseconds => v,
Expand All @@ -167,13 +167,13 @@ fn datetime_to_microseconds(v: i64, time_unit: TimeUnit) -> i64 {
}

#[inline]
pub fn encode_datetime(v: i64, time_unit: TimeUnit, env: Env) -> Result<Term, ExplorerError> {
pub fn encode_naive_datetime(v: i64, time_unit: TimeUnit, env: Env) -> Result<Term, ExplorerError> {
let naive_datetime_struct_keys = &naive_datetime_struct_keys(env);
let calendar_iso_module = atoms::calendar_iso_module().encode(env).as_c_arg();
let naive_datetime_module = atoms::naive_datetime_module().encode(env).as_c_arg();
let microseconds_time = datetime_to_microseconds(v, time_unit);
let microseconds_time = naive_datetime_to_microseconds(v, time_unit);

Ok(unsafe_encode_datetime!(
Ok(unsafe_encode_naive_datetime!(
microseconds_time,
naive_datetime_struct_keys,
calendar_iso_module,
Expand All @@ -183,7 +183,7 @@ pub fn encode_datetime(v: i64, time_unit: TimeUnit, env: Env) -> Result<Term, Ex
}

#[inline]
fn datetime_series_to_list<'b>(
fn naive_datetime_series_to_list<'b>(
s: &Series,
time_unit: TimeUnit,
env: Env<'b>,
Expand All @@ -196,9 +196,9 @@ fn datetime_series_to_list<'b>(
env,
s.datetime()?.into_iter().map(|option| option
.map(|v| {
let microseconds_time = datetime_to_microseconds(v, time_unit);
let microseconds_time = naive_datetime_to_microseconds(v, time_unit);

unsafe_encode_datetime!(
unsafe_encode_naive_datetime!(
microseconds_time,
naive_datetime_struct_keys,
calendar_iso_module,
Expand All @@ -210,6 +210,129 @@ fn datetime_series_to_list<'b>(
))
}

macro_rules! unsafe_encode_datetime {
(
$v: expr,
$time_zone: expr,
$datetime_struct_keys: ident,
$calendar_iso_module: ident,
$datetime_module: ident,
$env: ident
) => {{
let dt = timestamp_to_datetime($v);
let microseconds = dt.and_utc().timestamp_subsec_micros();

// Limit the number of digits in the microsecond part of a timestamp to 6.
// This is necessary because the microsecond part of Elixir is only 6 digits.
let limited_ms = if microseconds > 999_999 {
999_999
} else {
microseconds
};

unsafe {
Term::new(
$env,
map::make_map_from_arrays(
$env.as_c_arg(),
$datetime_struct_keys,
&[
$datetime_module,
$calendar_iso_module,
dt.day().encode($env).as_c_arg(),
dt.hour().encode($env).as_c_arg(),
(limited_ms, 6).encode($env).as_c_arg(),
dt.minute().encode($env).as_c_arg(),
dt.month().encode($env).as_c_arg(),
dt.second().encode($env).as_c_arg(),
// std_offset
0.encode($env).as_c_arg(),
$time_zone.to_string().encode($env).as_c_arg(),
// utc_offset
0.encode($env).as_c_arg(),
dt.year().encode($env).as_c_arg(),
"zone_abbr".encode($env).as_c_arg(),
],
)
.unwrap(),
)
}
}};
}

// Here we build the DateTime struct manually, as it's much faster than using NifStruct
// This is because we already have the keys (we know this at compile time), and the types,
// so we can build the struct directly.
fn datetime_struct_keys(env: Env) -> [NIF_TERM; 13] {
return [
atom::__struct__().encode(env).as_c_arg(),
calendar().encode(env).as_c_arg(),
day().encode(env).as_c_arg(),
hour().encode(env).as_c_arg(),
microsecond().encode(env).as_c_arg(),
minute().encode(env).as_c_arg(),
month().encode(env).as_c_arg(),
second().encode(env).as_c_arg(),
std_offset().encode(env).as_c_arg(),
time_zone().encode(env).as_c_arg(),
utc_offset().encode(env).as_c_arg(),
year().encode(env).as_c_arg(),
zone_abbr().encode(env).as_c_arg(),
];
}

#[inline]
pub fn encode_datetime(
v: i64,
time_unit: TimeUnit,
time_zone: String,
env: Env,
) -> Result<Term, ExplorerError> {
let datetime_struct_keys = &datetime_struct_keys(env);
let calendar_iso_module = atoms::calendar_iso_module().encode(env).as_c_arg();
let datetime_module = atoms::datetime_module().encode(env).as_c_arg();
let microseconds_time = naive_datetime_to_microseconds(v, time_unit);

Ok(unsafe_encode_datetime!(
microseconds_time,
time_zone,
datetime_struct_keys,
calendar_iso_module,
datetime_module,
env
))
}

#[inline]
fn datetime_series_to_list<'b>(
s: &Series,
time_unit: TimeUnit,
time_zone: String,
env: Env<'b>,
) -> Result<Term<'b>, ExplorerError> {
let datetime_struct_keys = &datetime_struct_keys(env);
let calendar_iso_module = atoms::calendar_iso_module().encode(env).as_c_arg();
let datetime_module = atoms::datetime_module().encode(env).as_c_arg();

Ok(unsafe_iterator_series_to_list!(
env,
s.datetime()?.into_iter().map(|option| option
.map(|v| {
let microseconds_time = naive_datetime_to_microseconds(v, time_unit);

unsafe_encode_datetime!(
microseconds_time,
time_zone,
datetime_struct_keys,
calendar_iso_module,
datetime_module,
env
)
})
.encode(env))
))
}

fn time_unit_to_atom(time_unit: TimeUnit) -> atom::Atom {
match time_unit {
TimeUnit::Milliseconds => millisecond(),
Expand Down Expand Up @@ -565,7 +688,10 @@ pub fn term_from_value<'b>(v: AnyValue, env: Env<'b>) -> Result<Term<'b>, Explor
AnyValue::Float64(v) => Ok(Some(term_from_float64(v, env)).encode(env)),
AnyValue::Date(v) => encode_date(v, env),
AnyValue::Time(v) => encode_time(v, env),
AnyValue::Datetime(v, time_unit, None) => encode_datetime(v, time_unit, env),
AnyValue::Datetime(v, time_unit, None) => encode_naive_datetime(v, time_unit, env),
AnyValue::Datetime(v, time_unit, Some(time_zone)) => {
encode_datetime(v, time_unit, time_zone.to_string(), env)
}
AnyValue::Duration(v, time_unit) => encode_duration(v, time_unit, env),
AnyValue::Categorical(idx, mapping, _) => Ok(mapping.get(idx).encode(env)),
AnyValue::List(series) => list_from_series(ExSeries::new(series), env),
Expand Down Expand Up @@ -599,7 +725,10 @@ pub fn list_from_series(s: ExSeries, env: Env) -> Result<Term, ExplorerError> {

DataType::Date => date_series_to_list(&s, env),
DataType::Time => time_series_to_list(&s, env),
DataType::Datetime(time_unit, None) => datetime_series_to_list(&s, *time_unit, env),
DataType::Datetime(time_unit, None) => naive_datetime_series_to_list(&s, *time_unit, env),
DataType::Datetime(time_unit, Some(time_zone)) => {
datetime_series_to_list(&s, *time_unit, time_zone.clone().to_string(), env)
}
DataType::Duration(time_unit) => duration_series_to_list(&s, *time_unit, env),
DataType::Binary => generic_binary_series_to_list(&s.resource, &s, env),
DataType::String => generic_string_series_to_list(&s, env),
Expand Down
7 changes: 6 additions & 1 deletion native/explorer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ mod atoms {
rustler::atoms! {
calendar_iso_module = "Elixir.Calendar.ISO",
date_module = "Elixir.Date",
datetime_module = "Elixir.DateTime",
duration_module = "Elixir.Explorer.Duration",
naive_datetime_module = "Elixir.NaiveDateTime",
time_module = "Elixir.Time",
Expand All @@ -67,7 +68,11 @@ mod atoms {
calendar,
nan,
infinity,
neg_infinity
neg_infinity,
std_offset,
time_zone,
utc_offset,
zone_abbr,
}
}

Expand Down
9 changes: 5 additions & 4 deletions native/explorer/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ use crate::{
encoding, ExDataFrame, ExSeries, ExplorerError,
};

use encoding::encode_datetime;
use encoding::encode_naive_datetime;
// use encoding::encode_datetime;

use polars::prelude::*;
use polars_ops::chunked_array::cov::{cov, pearson_corr};
Expand Down Expand Up @@ -1007,7 +1008,7 @@ pub fn s_min(env: Env, s: ExSeries) -> Result<Term, ExplorerError> {
DataType::Time => Ok(s.min::<i64>()?.map(ExTime::from).encode(env)),
DataType::Datetime(unit, _) => Ok(s
.min::<i64>()?
.map(|v| encode_datetime(v, *unit, env).unwrap())
.map(|v| encode_naive_datetime(v, *unit, env).unwrap())
.encode(env)),
dt => panic!("min/1 not implemented for {dt:?}"),
}
Expand All @@ -1029,7 +1030,7 @@ pub fn s_max(env: Env, s: ExSeries) -> Result<Term, ExplorerError> {
DataType::Time => Ok(s.max::<i64>()?.map(ExTime::from).encode(env)),
DataType::Datetime(unit, _) => Ok(s
.max::<i64>()?
.map(|v| encode_datetime(v, *unit, env).unwrap())
.map(|v| encode_naive_datetime(v, *unit, env).unwrap())
.encode(env)),
dt => panic!("max/1 not implemented for {dt:?}"),
}
Expand Down Expand Up @@ -1230,7 +1231,7 @@ pub fn s_quantile<'a>(
},
DataType::Datetime(unit, None) => match s.datetime()?.quantile(quantile, strategy)? {
None => Ok(None::<ExNaiveDateTime>.encode(env)),
Some(time) => Ok(encode_datetime(time as i64, *unit, env)
Some(time) => Ok(encode_naive_datetime(time as i64, *unit, env)
.unwrap()
.encode(env)),
},
Expand Down
17 changes: 11 additions & 6 deletions test/explorer/series/datetime_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,18 @@ defmodule Explorer.Series.DateTimeTest do

describe "timezones" do
test "UTC" do
datetimes = [~U[2024-01-01T12:00:00Z], ~U[2024-01-01T13:00:00Z], ~U[2024-01-01T14:00:00Z]]
datetimes_in = [
~U[2024-01-01T12:00:00.000000Z],
~U[2024-01-01T13:00:00.000000Z],
~U[2024-01-01T14:00:00.000000Z]
]

datetimes
|> Series.from_list()
|> IO.inspect()
|> Series.to_list()
|> IO.inspect()
datetimes_out =
datetimes_in
|> Series.from_list()
|> Series.to_list()

assert datetimes_out == datetimes_in
end
end
end