Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 176 additions & 1 deletion arrow-array/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use arrow_schema::{
DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
DataType, IntervalUnit, TimeUnit,
};
use chrono::{Duration, NaiveDate, NaiveDateTime};
use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, TimeZone};
use half::f16;
use std::fmt::Debug;
use std::marker::PhantomData;
Expand Down Expand Up @@ -325,6 +325,34 @@ pub trait ArrowTimestampType: ArrowTemporalType<Native = i64> {
///
/// See [`DataType::Timestamp`] for more information on timezone handling
fn make_value(naive: NaiveDateTime) -> Option<i64>;

/// Creates a timestamp value from a [`DateTime`] in any timezone.
///
/// Returns `None` if the timestamp value would overflow the i64 range
/// (e.g., for nanosecond precision with extreme datetime values).
///
/// # Arguments
///
/// * `datetime` - The datetime to convert
fn from_datetime<Tz: TimeZone>(datetime: DateTime<Tz>) -> Option<i64>;

/// Creates a timestamp value from a [`NaiveDateTime`] interpreted in the given timezone.
///
/// # Arguments
///
/// * `naive` - The local datetime to convert
/// * `tz` - Optional timezone. If `None`, interprets as UTC
/// (equivalent to calling [`Self::make_value`]).
fn from_naive_datetime(naive: NaiveDateTime, tz: Option<&Tz>) -> Option<i64> {
match tz {
Some(tz) => match tz.from_local_datetime(&naive) {
chrono::offset::LocalResult::Single(dt) => Self::from_datetime(dt),
chrono::offset::LocalResult::Ambiguous(dt1, _) => Self::from_datetime(dt1),
chrono::offset::LocalResult::None => None,
},
None => Self::make_value(naive),
}
}
}

impl ArrowTimestampType for TimestampSecondType {
Expand All @@ -333,6 +361,10 @@ impl ArrowTimestampType for TimestampSecondType {
fn make_value(naive: NaiveDateTime) -> Option<i64> {
Some(naive.and_utc().timestamp())
}

fn from_datetime<Tz: TimeZone>(datetime: DateTime<Tz>) -> Option<i64> {
Some(datetime.timestamp())
}
}
impl ArrowTimestampType for TimestampMillisecondType {
const UNIT: TimeUnit = TimeUnit::Millisecond;
Expand All @@ -342,6 +374,10 @@ impl ArrowTimestampType for TimestampMillisecondType {
let millis = utc.timestamp().checked_mul(1_000)?;
millis.checked_add(utc.timestamp_subsec_millis() as i64)
}

fn from_datetime<Tz: TimeZone>(datetime: DateTime<Tz>) -> Option<i64> {
Some(datetime.timestamp_millis())
}
}
impl ArrowTimestampType for TimestampMicrosecondType {
const UNIT: TimeUnit = TimeUnit::Microsecond;
Expand All @@ -351,6 +387,10 @@ impl ArrowTimestampType for TimestampMicrosecondType {
let micros = utc.timestamp().checked_mul(1_000_000)?;
micros.checked_add(utc.timestamp_subsec_micros() as i64)
}

fn from_datetime<Tz: TimeZone>(datetime: DateTime<Tz>) -> Option<i64> {
Some(datetime.timestamp_micros())
}
}
impl ArrowTimestampType for TimestampNanosecondType {
const UNIT: TimeUnit = TimeUnit::Nanosecond;
Expand All @@ -360,6 +400,10 @@ impl ArrowTimestampType for TimestampNanosecondType {
let nanos = utc.timestamp().checked_mul(1_000_000_000)?;
nanos.checked_add(utc.timestamp_subsec_nanos() as i64)
}

fn from_datetime<Tz: TimeZone>(datetime: DateTime<Tz>) -> Option<i64> {
datetime.timestamp_nanos_opt()
}
}

fn add_year_months<T: ArrowTimestampType>(
Expand Down Expand Up @@ -1887,6 +1931,7 @@ impl ByteViewType for BinaryViewType {
mod tests {
use super::*;
use arrow_data::{BufferSpec, layout};
use chrono::DateTime;

#[test]
fn month_day_nano_should_roundtrip() {
Expand Down Expand Up @@ -1975,4 +2020,134 @@ mod tests {
test_layout::<DurationMillisecondType>();
test_layout::<DurationSecondType>();
}

#[test]
fn timestamp_from_datetime() {
use chrono::{FixedOffset, NaiveDate, NaiveTime, Utc};

// Test UTC timezone
let date = NaiveDate::from_ymd_opt(2021, 1, 1).unwrap();
let time = NaiveTime::from_hms_opt(12, 0, 0).unwrap();
let naive = NaiveDateTime::new(date, time);
let datetime_utc = Utc.from_utc_datetime(&naive);

assert_eq!(
TimestampSecondType::from_datetime(datetime_utc).unwrap(),
1609502400
);
assert_eq!(
TimestampMillisecondType::from_datetime(datetime_utc).unwrap(),
1609502400000
);
assert_eq!(
TimestampMicrosecondType::from_datetime(datetime_utc).unwrap(),
1609502400000000
);
assert_eq!(
TimestampNanosecondType::from_datetime(datetime_utc).unwrap(),
1609502400000000000
);

// Test FixedOffset timezone (+8:00): 12:00+8 = 04:00 UTC
let tz_plus_8 = FixedOffset::east_opt(8 * 3600).unwrap();
let datetime_plus_8 = tz_plus_8.from_local_datetime(&naive).unwrap();
assert_eq!(
TimestampSecondType::from_datetime(datetime_plus_8).unwrap(),
1609502400 - 28800
);

// Test subsecond precision
let datetime = DateTime::from_timestamp(1000000000, 123456789).unwrap();
assert_eq!(
TimestampSecondType::from_datetime(datetime).unwrap(),
1000000000
);
assert_eq!(
TimestampMillisecondType::from_datetime(datetime).unwrap(),
1000000000123
);
assert_eq!(
TimestampMicrosecondType::from_datetime(datetime).unwrap(),
1000000000123456
);
assert_eq!(
TimestampNanosecondType::from_datetime(datetime).unwrap(),
1000000000123456789
);
}

#[test]
fn timestamp_from_naive_datetime() {
use crate::temporal_conversions::as_datetime_with_timezone;
use chrono::{NaiveDate, NaiveTime};

let date = NaiveDate::from_ymd_opt(2021, 1, 1).unwrap();
let time = NaiveTime::from_hms_opt(12, 0, 0).unwrap();
let naive = NaiveDateTime::new(date, time);

// Test UTC (tz=None)
assert_eq!(
TimestampSecondType::from_naive_datetime(naive, None).unwrap(),
1609502400
);
assert_eq!(
TimestampMillisecondType::from_naive_datetime(naive, None).unwrap(),
1609502400000
);

// Test with timezone (-05:00): 12:00-5 = 17:00 UTC
let tz: Tz = "-05:00".parse().unwrap();
let ts_sec = TimestampSecondType::from_naive_datetime(naive, Some(&tz)).unwrap();
assert_eq!(ts_sec, 1609502400 + 5 * 3600);

// Test all types with timezone and roundtrip
let date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
let time = NaiveTime::from_hms_opt(14, 30, 45).unwrap();
let naive = NaiveDateTime::new(date, time);
let tz: Tz = "+01:00".parse().unwrap();

let ts_usec = TimestampMicrosecondType::from_naive_datetime(naive, Some(&tz)).unwrap();
let recovered = as_datetime_with_timezone::<TimestampMicrosecondType>(ts_usec, tz).unwrap();
assert_eq!(recovered.naive_local(), naive);
}

#[test]
#[cfg(feature = "chrono-tz")]
fn timestamp_from_naive_datetime_ambiguous() {
use chrono::{NaiveDate, NaiveTime};

// 2024-11-03 01:30:00 in US Eastern Time is ambiguous (daylight saving time ends)
// It can be either 01:30:00 EDT (UTC-4) or 01:30:00 EST (UTC-5)
let date = NaiveDate::from_ymd_opt(2024, 11, 3).unwrap();
let time = NaiveTime::from_hms_opt(1, 30, 0).unwrap();
let naive = NaiveDateTime::new(date, time);
let tz: Tz = "America/New_York".parse().unwrap();

// Should return the first time (EST, UTC-5) = 06:30:00 UTC
let result = TimestampSecondType::from_naive_datetime(naive, Some(&tz));
assert!(result.is_some());
assert_eq!(result.unwrap(), 1730611800);
}

#[test]
#[cfg(feature = "chrono-tz")]
fn timestamp_from_naive_datetime_none() {
use chrono::{NaiveDate, NaiveTime};

// 2024-03-10 02:30:00 in US Eastern Time doesn't exist
// (daylight saving time starts at 02:00, jumps to 03:00)
let date = NaiveDate::from_ymd_opt(2024, 3, 10).unwrap();
let time = NaiveTime::from_hms_opt(2, 30, 0).unwrap();
let naive = NaiveDateTime::new(date, time);
let tz: Tz = "America/New_York".parse().unwrap();

// Should return None
let result = TimestampSecondType::from_naive_datetime(naive, Some(&tz));
assert!(result.is_none());

// Test for all timestamp types
assert!(TimestampMillisecondType::from_naive_datetime(naive, Some(&tz)).is_none());
assert!(TimestampMicrosecondType::from_naive_datetime(naive, Some(&tz)).is_none());
assert!(TimestampNanosecondType::from_naive_datetime(naive, Some(&tz)).is_none());
}
}
Loading