Skip to content

Commit 06f9dbd

Browse files
more test fixes
1 parent 73aff68 commit 06f9dbd

File tree

4 files changed

+74
-52
lines changed

4 files changed

+74
-52
lines changed

bigframes/testing/utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929

3030
from bigframes import operations as ops
3131
from bigframes.core import expression as ex
32-
import bigframes.dtypes
3332
import bigframes.functions._utils as bff_utils
3433
import bigframes.pandas as bpd
3534

@@ -90,7 +89,10 @@ def assert_series_equivalent(pd_series: pd.Series, bf_series: bpd.Series, **kwar
9089

9190

9291
def _normalize_all_nulls(col: pd.Series) -> pd.Series:
93-
if col.dtype in (bigframes.dtypes.FLOAT_DTYPE, bigframes.dtypes.INT_DTYPE):
92+
# This over-normalizes probably, make more conservative later
93+
if col.hasnans and (
94+
pd_types.is_float_dtype(col.dtype) or pd_types.is_integer_dtype(col.dtype)
95+
):
9496
col = col.astype("float64")
9597
if pd_types.is_object_dtype(col):
9698
col = col.fillna(float("nan"))

tests/system/small/operations/test_timedeltas.py

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,13 @@ def temporal_dfs(session):
8383

8484

8585
def _assert_series_equal(actual: pd.Series, expected: pd.Series):
86-
"""Helper function specifically for timedelta testsing. Don't use it outside of this module."""
86+
"""Helper function specifically for timedelta testing. Don't use it outside of this module."""
87+
# expected[expected.select_dtypes('timedelta64').columns] = expected.select_dtypes('timedelta64').astype(dtypes.TIMEDELTA_DTYPE)
8788
bigframes.testing.assert_series_equal(
8889
actual,
89-
expected,
90+
expected, # .convert_dtypes(dtype_backend="pyarrow"),
9091
check_index_type=False,
92+
check_dtype=False,
9193
)
9294

9395

@@ -117,15 +119,15 @@ def test_timedelta_binary_ops_between_series(temporal_dfs, op, col_1, col_2):
117119
@pytest.mark.parametrize(
118120
("op", "col", "literal"),
119121
[
120-
(operator.add, "timedelta_col_1", pd.Timedelta(2, "s")),
121-
(operator.sub, "timedelta_col_1", pd.Timedelta(2, "s")),
122-
(operator.truediv, "timedelta_col_1", pd.Timedelta(2, "s")),
123-
(operator.floordiv, "timedelta_col_1", pd.Timedelta(2, "s")),
122+
(operator.add, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us")),
123+
(operator.sub, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us")),
124+
(operator.truediv, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us")),
125+
(operator.floordiv, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us")),
124126
(operator.truediv, "timedelta_col_1", 3),
125127
(operator.floordiv, "timedelta_col_1", 3),
126128
(operator.mul, "timedelta_col_1", 3),
127-
(operator.mul, "float_col", pd.Timedelta(1, "s")),
128-
(operator.mod, "timedelta_col_1", pd.Timedelta(7, "s")),
129+
(operator.mul, "float_col", pd.Timedelta(1, "s").as_unit("us")),
130+
(operator.mod, "timedelta_col_1", pd.Timedelta(7, "s").as_unit("us")),
129131
],
130132
)
131133
def test_timedelta_binary_ops_series_and_literal(temporal_dfs, op, col, literal):
@@ -140,15 +142,15 @@ def test_timedelta_binary_ops_series_and_literal(temporal_dfs, op, col, literal)
140142
@pytest.mark.parametrize(
141143
("op", "col", "literal"),
142144
[
143-
(operator.add, "timedelta_col_1", pd.Timedelta(2, "s")),
144-
(operator.sub, "timedelta_col_1", pd.Timedelta(2, "s")),
145-
(operator.truediv, "timedelta_col_1", pd.Timedelta(2, "s")),
146-
(operator.floordiv, "timedelta_col_1", pd.Timedelta(2, "s")),
147-
(operator.truediv, "float_col", pd.Timedelta(2, "s")),
148-
(operator.floordiv, "float_col", pd.Timedelta(2, "s")),
145+
(operator.add, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us")),
146+
(operator.sub, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us")),
147+
(operator.truediv, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us")),
148+
(operator.floordiv, "timedelta_col_1", pd.Timedelta(2, "s").as_unit("us")),
149+
(operator.truediv, "float_col", pd.Timedelta(2, "s").as_unit("us")),
150+
(operator.floordiv, "float_col", pd.Timedelta(2, "s").as_unit("us")),
149151
(operator.mul, "timedelta_col_1", 3),
150-
(operator.mul, "float_col", pd.Timedelta(1, "s")),
151-
(operator.mod, "timedelta_col_1", pd.Timedelta(7, "s")),
152+
(operator.mul, "float_col", pd.Timedelta(1, "s").as_unit("us")),
153+
(operator.mod, "timedelta_col_1", pd.Timedelta(7, "s").as_unit("us")),
152154
],
153155
)
154156
def test_timedelta_binary_ops_literal_and_series(temporal_dfs, op, col, literal):
@@ -201,7 +203,7 @@ def test_timestamp_add__ts_series_plus_td_series__explicit_cast(temporal_dfs, co
201203
@pytest.mark.parametrize(
202204
"literal",
203205
[
204-
pytest.param(pd.Timedelta(1, unit="s"), id="pandas"),
206+
pytest.param(pd.Timedelta(1, unit="s").as_unit("us"), id="pandas"),
205207
pytest.param(datetime.timedelta(seconds=1), id="python-datetime"),
206208
pytest.param(np.timedelta64(1, "s"), id="numpy"),
207209
],
@@ -237,7 +239,7 @@ def test_timestamp_add__td_series_plus_ts_series(temporal_dfs, column, pd_dtype)
237239

238240
def test_timestamp_add__td_literal_plus_ts_series(temporal_dfs):
239241
bf_df, pd_df = temporal_dfs
240-
timedelta = pd.Timedelta(1, unit="s")
242+
timedelta = pd.Timedelta(1, unit="s").as_unit("us")
241243

242244
actual_result = (timedelta + bf_df["datetime_col"]).to_pandas()
243245

@@ -279,7 +281,7 @@ def test_timestamp_add_with_numpy_op(temporal_dfs, column, pd_dtype):
279281

280282
def test_timestamp_add_dataframes(temporal_dfs):
281283
columns = ["datetime_col", "timestamp_col"]
282-
timedelta = pd.Timedelta(1, unit="s")
284+
timedelta = pd.Timedelta(1, unit="s").as_unit("us")
283285
bf_df, pd_df = temporal_dfs
284286

285287
actual_result = (bf_df[columns] + timedelta).to_pandas()
@@ -363,7 +365,7 @@ def test_timestamp_sub_with_numpy_op(temporal_dfs, column, pd_dtype):
363365

364366
def test_timestamp_sub_dataframes(temporal_dfs):
365367
columns = ["datetime_col", "timestamp_col"]
366-
timedelta = pd.Timedelta(1, unit="s")
368+
timedelta = pd.Timedelta(1, unit="s").as_unit("us")
367369
bf_df, pd_df = temporal_dfs
368370

369371
actual_result = (bf_df[columns] - timedelta).to_pandas()
@@ -490,7 +492,7 @@ def test_timedelta_series_comparison(temporal_dfs, compare_func):
490492
)
491493
def test_timedelta_series_and_literal_comparison(temporal_dfs, compare_func):
492494
bf_df, pd_df = temporal_dfs
493-
literal = pd.Timedelta(3, "s")
495+
literal = pd.Timedelta(3, "s").as_unit("us")
494496

495497
actual_result = compare_func(literal, bf_df["timedelta_col_2"]).to_pandas()
496498

tests/system/small/test_pandas.py

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -817,9 +817,11 @@ def test_to_datetime_iterable(arg, utc, unit, format):
817817
.to_pandas()
818818
.astype("datetime64[ns, UTC]" if utc else "datetime64[ns]")
819819
)
820-
pd_result = pd.Series(
821-
pd.to_datetime(arg, utc=utc, unit=unit, format=format)
822-
).dt.floor("us")
820+
pd_result = (
821+
pd.Series(pd.to_datetime(arg, utc=utc, unit=unit, format=format))
822+
.dt.floor("us")
823+
.astype("datetime64[ns, UTC]" if utc else "datetime64[ns]")
824+
)
823825
bigframes.testing.assert_series_equal(
824826
bf_result, pd_result, check_index_type=False, check_names=False
825827
)
@@ -831,7 +833,9 @@ def test_to_datetime_series(scalars_dfs):
831833
bf_result = (
832834
bpd.to_datetime(scalars_df[col], unit="s").to_pandas().astype("datetime64[s]")
833835
)
834-
pd_result = pd.Series(pd.to_datetime(scalars_pandas_df[col], unit="s"))
836+
pd_result = pd.Series(pd.to_datetime(scalars_pandas_df[col], unit="s")).astype(
837+
"datetime64[s]"
838+
)
835839
bigframes.testing.assert_series_equal(
836840
bf_result, pd_result, check_index_type=False, check_names=False
837841
)
@@ -853,7 +857,11 @@ def test_to_datetime_series(scalars_dfs):
853857
)
854858
def test_to_datetime_unit_param(arg, unit):
855859
bf_result = bpd.to_datetime(arg, unit=unit).to_pandas().astype("datetime64[ns]")
856-
pd_result = pd.Series(pd.to_datetime(arg, unit=unit)).dt.floor("us")
860+
pd_result = (
861+
pd.Series(pd.to_datetime(arg, unit=unit))
862+
.dt.floor("us")
863+
.astype("datetime64[ns]")
864+
)
857865
bigframes.testing.assert_series_equal(
858866
bf_result, pd_result, check_index_type=False, check_names=False
859867
)
@@ -874,7 +882,11 @@ def test_to_datetime_format_param(arg, utc, format):
874882
.to_pandas()
875883
.astype("datetime64[ns, UTC]" if utc else "datetime64[ns]")
876884
)
877-
pd_result = pd.Series(pd.to_datetime(arg, utc=utc, format=format)).dt.floor("us")
885+
pd_result = (
886+
pd.Series(pd.to_datetime(arg, utc=utc, format=format))
887+
.dt.floor("us")
888+
.astype("datetime64[ns, UTC]" if utc else "datetime64[ns]")
889+
)
878890
bigframes.testing.assert_series_equal(
879891
bf_result, pd_result, check_index_type=False, check_names=False
880892
)
@@ -922,12 +934,17 @@ def test_to_datetime_format_param(arg, utc, format):
922934
],
923935
)
924936
def test_to_datetime_string_inputs(arg, utc, output_in_utc, format):
937+
normalized_type = "datetime64[ns, UTC]" if output_in_utc else "datetime64[ns]"
938+
925939
bf_result = (
926-
bpd.to_datetime(arg, utc=utc, format=format)
927-
.to_pandas()
928-
.astype("datetime64[ns, UTC]" if output_in_utc else "datetime64[ns]")
940+
bpd.to_datetime(arg, utc=utc, format=format).to_pandas().astype(normalized_type)
941+
)
942+
pd_result = (
943+
pd.Series(pd.to_datetime(arg, utc=utc, format=format))
944+
.dt.floor("us")
945+
.astype(normalized_type)
929946
)
930-
pd_result = pd.Series(pd.to_datetime(arg, utc=utc, format=format)).dt.floor("us")
947+
931948
bigframes.testing.assert_series_equal(
932949
bf_result, pd_result, check_index_type=False, check_names=False
933950
)
@@ -965,12 +982,13 @@ def test_to_datetime_string_inputs(arg, utc, output_in_utc, format):
965982
],
966983
)
967984
def test_to_datetime_timestamp_inputs(arg, utc, output_in_utc):
968-
bf_result = (
969-
bpd.to_datetime(arg, utc=utc)
970-
.to_pandas()
971-
.astype("datetime64[ns, UTC]" if output_in_utc else "datetime64[ns]")
985+
normalized_type = "datetime64[ns, UTC]" if output_in_utc else "datetime64[ns]"
986+
987+
bf_result = bpd.to_datetime(arg, utc=utc).to_pandas().astype(normalized_type)
988+
pd_result = (
989+
pd.Series(pd.to_datetime(arg, utc=utc)).dt.floor("us").astype(normalized_type)
972990
)
973-
pd_result = pd.Series(pd.to_datetime(arg, utc=utc)).dt.floor("us")
991+
974992
bigframes.testing.assert_series_equal(
975993
bf_result, pd_result, check_index_type=False, check_names=False
976994
)
@@ -1087,7 +1105,7 @@ def test_to_timedelta_on_timedelta_series__should_be_no_op(scalars_dfs):
10871105
bpd.to_timedelta(bf_series, unit="s").to_pandas().astype("timedelta64[ns]")
10881106
)
10891107

1090-
expected_result = pd.to_timedelta(pd_series, unit="s")
1108+
expected_result = pd.to_timedelta(pd_series, unit="s").astype("timedelta64[ns]")
10911109
bigframes.testing.assert_series_equal(
10921110
actual_result, expected_result, check_index_type=False
10931111
)

tests/unit/core/test_groupby.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import bigframes.core.utils as utils
2020
import bigframes.pandas as bpd
21-
from bigframes.testing.utils import assert_series_equal
21+
import bigframes.testing
2222

2323
pytest.importorskip("polars")
2424
pytest.importorskip("pandas", minversion="2.0.0")
@@ -33,7 +33,7 @@ def test_groupby_df_iter_by_key_singular(polars_session):
3333
bf_result = bf_group_df.to_pandas()
3434
pd_key, pd_result = pd_group
3535
assert bf_key == pd_key
36-
pandas.testing.assert_frame_equal(
36+
bigframes.testing.assert_frame_equal(
3737
bf_result, pd_result, check_dtype=False, check_index_type=False
3838
)
3939

@@ -47,7 +47,7 @@ def test_groupby_df_iter_by_key_list(polars_session):
4747
bf_result = bf_group_df.to_pandas()
4848
pd_key, pd_result = pd_group
4949
assert bf_key == pd_key
50-
pandas.testing.assert_frame_equal(
50+
bigframes.testing.assert_frame_equal(
5151
bf_result, pd_result, check_dtype=False, check_index_type=False
5252
)
5353

@@ -69,7 +69,7 @@ def test_groupby_df_iter_by_key_list_multiple(polars_session):
6969
bf_result = bf_group_df.to_pandas()
7070
pd_key, pd_result = pd_group
7171
assert bf_key == pd_key
72-
pandas.testing.assert_frame_equal(
72+
bigframes.testing.assert_frame_equal(
7373
bf_result, pd_result, check_dtype=False, check_index_type=False
7474
)
7575

@@ -85,7 +85,7 @@ def test_groupby_df_iter_by_level_singular(polars_session):
8585
bf_result = bf_group_df.to_pandas()
8686
pd_key, pd_result = pd_group
8787
assert bf_key == pd_key
88-
pandas.testing.assert_frame_equal(
88+
bigframes.testing.assert_frame_equal(
8989
bf_result, pd_result, check_dtype=False, check_index_type=False
9090
)
9191

@@ -109,7 +109,7 @@ def test_groupby_df_iter_by_level_list_one_item(polars_session):
109109
assert bf_key == tuple(pd_key)
110110
else:
111111
assert bf_key == (pd_key,)
112-
pandas.testing.assert_frame_equal(
112+
bigframes.testing.assert_frame_equal(
113113
bf_result, pd_result, check_dtype=False, check_index_type=False
114114
)
115115

@@ -131,7 +131,7 @@ def test_groupby_df_iter_by_level_list_multiple(polars_session):
131131
bf_result = bf_group_df.to_pandas()
132132
pd_key, pd_result = pd_group
133133
assert bf_key == pd_key
134-
pandas.testing.assert_frame_equal(
134+
bigframes.testing.assert_frame_equal(
135135
bf_result, pd_result, check_dtype=False, check_index_type=False
136136
)
137137

@@ -149,7 +149,7 @@ def test_groupby_series_iter_by_level_singular(polars_session):
149149
bf_result = bf_group_series.to_pandas()
150150
pd_key, pd_result = pd_group
151151
assert bf_key == pd_key
152-
pandas.testing.assert_series_equal(
152+
bigframes.testing.assert_series_equal(
153153
bf_result, pd_result, check_dtype=False, check_index_type=False
154154
)
155155

@@ -175,7 +175,7 @@ def test_groupby_series_iter_by_level_list_one_item(polars_session):
175175
assert bf_key == tuple(pd_key)
176176
else:
177177
assert bf_key == (pd_key,)
178-
pandas.testing.assert_series_equal(
178+
bigframes.testing.assert_series_equal(
179179
bf_result, pd_result, check_dtype=False, check_index_type=False
180180
)
181181

@@ -199,7 +199,7 @@ def test_groupby_series_iter_by_level_list_multiple(polars_session):
199199
bf_result = bf_group_df.to_pandas()
200200
pd_key, pd_result = pd_group
201201
assert bf_key == pd_key
202-
pandas.testing.assert_series_equal(
202+
bigframes.testing.assert_series_equal(
203203
bf_result, pd_result, check_dtype=False, check_index_type=False
204204
)
205205

@@ -218,7 +218,7 @@ def test_groupby_series_iter_by_series(polars_session):
218218
bf_result = bf_group_series.to_pandas()
219219
pd_key, pd_result = pd_group
220220
assert bf_key == pd_key
221-
assert_series_equal(
221+
bigframes.testing.assert_series_equal(
222222
bf_result, pd_result, check_dtype=False, check_index_type=False
223223
)
224224

@@ -237,7 +237,7 @@ def test_groupby_series_iter_by_series_list_one_item(polars_session):
237237
bf_result = bf_group_series.to_pandas()
238238
pd_key, pd_result = pd_group
239239
assert bf_key == pd_key
240-
assert_series_equal(
240+
bigframes.testing.assert_series_equal(
241241
bf_result, pd_result, check_dtype=False, check_index_type=False
242242
)
243243

@@ -259,6 +259,6 @@ def test_groupby_series_iter_by_series_list_multiple(polars_session):
259259
bf_result = bf_group_series.to_pandas()
260260
pd_key, pd_result = pd_group
261261
assert bf_key == pd_key
262-
assert_series_equal(
262+
bigframes.testing.assert_series_equal(
263263
bf_result, pd_result, check_dtype=False, check_index_type=False
264264
)

0 commit comments

Comments
 (0)