Skip to content

Commit dd08857

Browse files
authored
perf: to_datetime() now avoids caching inputs unless data is inspected to infer format (#1667)
1 parent 86b7504 commit dd08857

File tree

3 files changed

+62
-1
lines changed

3 files changed

+62
-1
lines changed

bigframes/core/tools/datetimes.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def to_datetime(
5252
f"to datetime is not implemented. {constants.FEEDBACK_LINK}"
5353
)
5454

55-
arg = bigframes.series.Series(arg)._cached()
55+
arg = bigframes.series.Series(arg)
5656

5757
if format and unit and arg.dtype in (bigframes.dtypes.INT_DTYPE, bigframes.dtypes.FLOAT_DTYPE): # type: ignore
5858
raise ValueError("cannot specify both format and unit")
@@ -74,6 +74,11 @@ def to_datetime(
7474
)
7575

7676
assert unit is None
77+
78+
# The following operations evaluate individual values to infer a format,
79+
# so cache if needed.
80+
arg = arg._cached(force=False)
81+
7782
as_datetime = arg._apply_unary_op( # type: ignore
7883
ops.ToDatetimeOp(
7984
format=format,

tests/unit/core/tools/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from typing import cast
16+
from unittest import mock
17+
18+
import bigframes.core.tools.datetimes
19+
import bigframes.dtypes
20+
import bigframes.pandas
21+
import bigframes.testing.mocks
22+
23+
24+
def test_to_datetime_with_series_and_format_doesnt_cache(monkeypatch):
25+
df = bigframes.testing.mocks.create_dataframe(monkeypatch)
26+
series = mock.Mock(spec=bigframes.pandas.Series, wraps=df["col"])
27+
dt_series = cast(
28+
bigframes.pandas.Series,
29+
bigframes.core.tools.datetimes.to_datetime(series, format="%Y%m%d"),
30+
)
31+
series._cached.assert_not_called()
32+
assert dt_series.dtype == bigframes.dtypes.DATETIME_DTYPE
33+
34+
35+
def test_to_datetime_with_series_and_format_utc_doesnt_cache(monkeypatch):
36+
df = bigframes.testing.mocks.create_dataframe(monkeypatch)
37+
series = mock.Mock(spec=bigframes.pandas.Series, wraps=df["col"])
38+
dt_series = cast(
39+
bigframes.pandas.Series,
40+
bigframes.core.tools.datetimes.to_datetime(series, format="%Y%m%d", utc=True),
41+
)
42+
series._cached.assert_not_called()
43+
assert dt_series.dtype == bigframes.dtypes.TIMESTAMP_DTYPE

0 commit comments

Comments
 (0)