Skip to content

Commit 48cac54

Browse files
wtnclaude
andcommitted
fix(rust): Return null for out-of-range dates in temporal operations
Change `to_temporal_unit!`, `to_calendar_value!`, and `to_boolean_temporal_unit!` macros to return null for out-of-range dates instead of silently returning incorrect values. Co-authored-by: Claude <noreply@anthropic.com>
1 parent 850c943 commit 48cac54

File tree

3 files changed

+79
-72
lines changed

3 files changed

+79
-72
lines changed

crates/polars-time/src/chunkedarray/kernels.rs

Lines changed: 19 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
//! macros that define kernels for extracting
22
//! `week`, `weekday`, `year`, `hour` etc. from primitive arrays.
33
use arrow::array::{BooleanArray, PrimitiveArray};
4-
use arrow::compute::arity::unary;
54
#[cfg(feature = "dtype-time")]
65
use arrow::temporal_conversions::time64ns_to_time_opt;
76
use arrow::temporal_conversions::{
@@ -42,14 +41,12 @@ macro_rules! to_temporal_unit {
4241
$primitive_out: ty,
4342
$dtype_out:expr) => {
4443
pub(crate) fn $name(arr: &PrimitiveArray<$primitive_in>) -> ArrayRef {
45-
Box::new(unary(
46-
arr,
47-
|value| {
48-
$to_datetime_fn(value)
49-
.map(|dt| dt.$chrono_method() as $primitive_out)
50-
.unwrap_or(value as $primitive_out)
51-
},
52-
$dtype_out,
44+
Box::new(PrimitiveArray::<$primitive_out>::from_trusted_len_iter(
45+
arr.iter().map(|opt_value| {
46+
opt_value.and_then(|&value| {
47+
$to_datetime_fn(value).map(|dt| dt.$chrono_method() as $primitive_out)
48+
})
49+
}),
5350
)) as ArrayRef
5451
}
5552
};
@@ -58,20 +55,13 @@ macro_rules! to_temporal_unit {
5855
macro_rules! to_boolean_temporal_unit {
5956
($name: ident, $chrono_method: ident, $boolean_method: ident, $to_datetime_fn: expr, $dtype_in: ty) => {
6057
pub(crate) fn $name(arr: &PrimitiveArray<$dtype_in>) -> ArrayRef {
61-
let values = arr
62-
.values()
63-
.iter()
64-
.map(|value| {
65-
$to_datetime_fn(*value)
66-
.map(|dt| $boolean_method(dt.$chrono_method()))
67-
.unwrap_or(false)
68-
})
69-
.collect::<Vec<_>>();
70-
Box::new(BooleanArray::new(
71-
ArrowDataType::Boolean,
72-
values.into(),
73-
arr.validity().cloned(),
74-
))
58+
Box::new(BooleanArray::from_trusted_len_iter(arr.iter().map(
59+
|opt_value| {
60+
opt_value.and_then(|&value| {
61+
$to_datetime_fn(value).map(|dt| $boolean_method(dt.$chrono_method()))
62+
})
63+
},
64+
)))
7565
}
7666
};
7767
}
@@ -82,14 +72,12 @@ macro_rules! to_calendar_value {
8272
$primitive_out: ty,
8373
$dtype_out:expr) => {
8474
pub(crate) fn $name(arr: &PrimitiveArray<$primitive_in>) -> ArrayRef {
85-
Box::new(unary(
86-
arr,
87-
|value| {
88-
$to_datetime_fn(value)
89-
.map(|$dt| $expr as $primitive_out)
90-
.unwrap_or(value as $primitive_out)
91-
},
92-
$dtype_out,
75+
Box::new(PrimitiveArray::<$primitive_out>::from_trusted_len_iter(
76+
arr.iter().map(|opt_value| {
77+
opt_value.and_then(|&value| {
78+
$to_datetime_fn(value).map(|$dt| $expr as $primitive_out)
79+
})
80+
}),
9381
)) as ArrayRef
9482
}
9583
};
Lines changed: 46 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,49 @@
1-
use polars::prelude::*;
1+
#[test]
2+
#[cfg(feature = "dtype-date")]
3+
fn test_date_temporal_operations_11991() {
4+
use polars::prelude::*;
5+
6+
let normal_date = 18628; // 2021-01-01
7+
let s = Int32Chunked::new("".into(), &[normal_date])
8+
.into_date()
9+
.into_series();
10+
11+
let year = s.year().unwrap();
12+
assert_eq!(year.get(0), Some(2021));
13+
14+
let month = s.month().unwrap();
15+
assert_eq!(month.get(0), Some(1));
16+
17+
let day = s.day().unwrap();
18+
assert_eq!(day.get(0), Some(1));
19+
20+
// Null values should remain null (regression test for #15313)
21+
let s_with_null = Int32Chunked::new("".into(), &[Some(18628), None])
22+
.into_date()
23+
.into_series();
24+
25+
let year_with_null = s_with_null.year().unwrap();
26+
assert_eq!(year_with_null.get(0), Some(2021));
27+
assert_eq!(year_with_null.get(1), None);
28+
}
229

330
#[test]
4-
fn test_datetime_parse_overflow_7631() {
5-
let df = df![
6-
"year"=> &[2020, 2021, 2022],
7-
"month"=> &[1, 2, 3],
8-
"day"=> &[1, 2, 3],
9-
]
10-
.unwrap()
11-
.lazy();
12-
13-
let df = df.with_column(
14-
concat_str([col("year"), col("month"), col("day")], "-", false) // produces e.g., `2020-1-1`
15-
.str()
16-
.strptime(
17-
DataType::Datetime(TimeUnit::Milliseconds, None),
18-
StrptimeOptions {
19-
format: Some("%Y-%m-%_d".into()),
20-
..Default::default()
21-
},
22-
lit("latest"),
23-
)
24-
.alias("dt1"),
25-
);
26-
let actual = df.collect().unwrap();
27-
28-
let expected = DataFrame::new(vec![
29-
Column::new("year".into(), &[2020, 2021, 2022]),
30-
Column::new("month".into(), &[1, 2, 3]),
31-
Column::new("day".into(), &[1, 2, 3]),
32-
Column::new(
33-
"dt1".into(),
34-
&[
35-
AnyValue::Datetime(1577836800000, TimeUnit::Milliseconds, None),
36-
AnyValue::Datetime(1612224000000, TimeUnit::Milliseconds, None),
37-
AnyValue::Datetime(1646265600000, TimeUnit::Milliseconds, None),
38-
],
39-
),
40-
])
41-
.unwrap();
42-
43-
assert_eq!(actual, expected);
31+
#[cfg(feature = "dtype-date")]
32+
fn test_out_of_range_date_year_11991() {
33+
use polars::prelude::*;
34+
35+
// Out-of-range dates should return null instead of panicking or returning wrong values
36+
// Regression test for #11991 where out-of-range dates silently returned the input value
37+
let out_of_range_date = -96_465_659;
38+
let s = Int32Chunked::new("".into(), &[out_of_range_date])
39+
.into_date()
40+
.into_series();
41+
42+
let year = s.year().unwrap();
43+
// Should return null, not the input value -96465659
44+
assert_eq!(year.get(0), None);
45+
46+
// is_leap_year should also return null for out-of-range dates
47+
let is_leap = s.is_leap_year().unwrap();
48+
assert_eq!(is_leap.get(0), None);
4449
}

py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1551,3 +1551,17 @@ def test_literal_from_timedelta(value: time, dtype: pl.Duration | None) -> None:
15511551
out = pl.select(pl.lit(value, dtype=dtype))
15521552
assert out.schema == OrderedDict({"literal": dtype or pl.Duration("us")})
15531553
assert out.item() == value
1554+
1555+
1556+
def test_out_of_range_date_year_11991() -> None:
1557+
# Out-of-range dates should return null instead of wrong values or panicking
1558+
# Regression test for #11991 where out-of-range dates silently returned
1559+
# the input value
1560+
s = pl.Series([-96_465_659]).cast(pl.Date)
1561+
result = s.dt.year()
1562+
# Should return null, not the input value -96465659
1563+
assert result[0] is None
1564+
1565+
# is_leap_year should also return null for out-of-range dates
1566+
result_leap = s.dt.is_leap_year()
1567+
assert result_leap[0] is None

0 commit comments

Comments
 (0)