Skip to content

Commit 73aff68

Browse files
fix more tests
1 parent 3abb831 commit 73aff68

File tree

11 files changed

+328
-297
lines changed

11 files changed

+328
-297
lines changed

bigframes/testing/__init__.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@
1717
These modules are provided for testing the BigQuery DataFrames package. The
1818
interface is not considered stable.
1919
"""
20-
from bigframes.testing.utils import assert_frame_equal, assert_series_equal
20+
from bigframes.testing.utils import (
21+
assert_frame_equal,
22+
assert_index_equal,
23+
assert_series_equal,
24+
)
2125

22-
__all__ = [
23-
"assert_frame_equal",
24-
"assert_series_equal",
25-
]
26+
__all__ = ["assert_frame_equal", "assert_series_equal", "assert_index_equal"]

bigframes/testing/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,10 @@ def assert_series_equal(
157157
pd.testing.assert_series_equal(left, right, **kwargs)
158158

159159

160+
def assert_index_equal(left, right, **kwargs):
161+
pd.testing.assert_index_equal(left, right, **kwargs)
162+
163+
160164
def _standardize_index(idx):
161165
return pd.Index(list(idx), name=idx.name)
162166

tests/system/small/bigquery/test_struct.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import pandas as pd
1615
import pytest
1716

1817
import bigframes.bigquery as bbq
1918
import bigframes.series as series
19+
import bigframes.testing
2020

2121

2222
@pytest.mark.parametrize(
@@ -53,9 +53,10 @@ def test_struct_from_dataframe(columns_arg):
5353
srs = series.Series(
5454
columns_arg,
5555
)
56-
pd.testing.assert_series_equal(
56+
bigframes.testing.assert_series_equal(
5757
srs.to_pandas(),
5858
bbq.struct(srs.struct.explode()).to_pandas(),
5959
check_index_type=False,
6060
check_dtype=False,
61+
check_namesbool=False, # None vs nan version dependent
6162
)

tests/system/small/ml/test_metrics.py

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import bigframes
2222
from bigframes.ml import metrics
23+
import bigframes.testing
2324

2425

2526
def test_r2_score_perfect_fit(session):
@@ -161,7 +162,7 @@ def test_roc_curve_binary_classification_prediction_returns_expected(session):
161162
pd_tpr = tpr.to_pandas()
162163
pd_thresholds = thresholds.to_pandas()
163164

164-
pd.testing.assert_series_equal(
165+
bigframes.testing.assert_series_equal(
165166
# skip testing the first value, as it is redundant and inconsistent across sklearn versions
166167
pd_thresholds[1:],
167168
pd.Series(
@@ -171,7 +172,7 @@ def test_roc_curve_binary_classification_prediction_returns_expected(session):
171172
),
172173
check_index=False,
173174
)
174-
pd.testing.assert_series_equal(
175+
bigframes.testing.assert_series_equal(
175176
pd_fpr,
176177
pd.Series(
177178
[0.0, 0.0, 0.0, 0.25, 0.25, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0],
@@ -180,7 +181,7 @@ def test_roc_curve_binary_classification_prediction_returns_expected(session):
180181
),
181182
check_index_type=False,
182183
)
183-
pd.testing.assert_series_equal(
184+
bigframes.testing.assert_series_equal(
184185
pd_tpr,
185186
pd.Series(
186187
[
@@ -261,7 +262,7 @@ def test_roc_curve_binary_classification_decision_returns_expected(session):
261262
pd_tpr = tpr.to_pandas()
262263
pd_thresholds = thresholds.to_pandas()
263264

264-
pd.testing.assert_series_equal(
265+
bigframes.testing.assert_series_equal(
265266
# skip testing the first value, as it is redundant and inconsistent across sklearn versions
266267
pd_thresholds[1:],
267268
pd.Series(
@@ -271,7 +272,7 @@ def test_roc_curve_binary_classification_decision_returns_expected(session):
271272
),
272273
check_index=False,
273274
)
274-
pd.testing.assert_series_equal(
275+
bigframes.testing.assert_series_equal(
275276
pd_fpr,
276277
pd.Series(
277278
[0.0, 0.0, 1.0],
@@ -280,7 +281,7 @@ def test_roc_curve_binary_classification_decision_returns_expected(session):
280281
),
281282
check_index_type=False,
282283
)
283-
pd.testing.assert_series_equal(
284+
bigframes.testing.assert_series_equal(
284285
pd_tpr,
285286
pd.Series(
286287
[
@@ -353,7 +354,7 @@ def test_roc_curve_binary_classification_prediction_series(session):
353354
pd_tpr = tpr.to_pandas()
354355
pd_thresholds = thresholds.to_pandas()
355356

356-
pd.testing.assert_series_equal(
357+
bigframes.testing.assert_series_equal(
357358
# skip testing the first value, as it is redundant and inconsistent across sklearn versions
358359
pd_thresholds[1:],
359360
pd.Series(
@@ -363,7 +364,7 @@ def test_roc_curve_binary_classification_prediction_series(session):
363364
),
364365
check_index=False,
365366
)
366-
pd.testing.assert_series_equal(
367+
bigframes.testing.assert_series_equal(
367368
pd_fpr,
368369
pd.Series(
369370
[0.0, 0.0, 0.0, 0.25, 0.25, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0],
@@ -372,7 +373,7 @@ def test_roc_curve_binary_classification_prediction_series(session):
372373
),
373374
check_index_type=False,
374375
)
375-
pd.testing.assert_series_equal(
376+
bigframes.testing.assert_series_equal(
376377
pd_tpr,
377378
pd.Series(
378379
[
@@ -505,7 +506,7 @@ def test_confusion_matrix(session):
505506
2: [0, 1, 2],
506507
}
507508
).astype("int64")
508-
pd.testing.assert_frame_equal(
509+
bigframes.testing.assert_frame_equal(
509510
confusion_matrix, expected_pd_df, check_index_type=False
510511
)
511512

@@ -523,7 +524,7 @@ def test_confusion_matrix_column_index(session):
523524
{1: [1, 0, 1, 0], 2: [0, 0, 2, 0], 3: [0, 0, 0, 0], 4: [0, 1, 0, 1]},
524525
index=[1, 2, 3, 4],
525526
).astype("int64")
526-
pd.testing.assert_frame_equal(
527+
bigframes.testing.assert_frame_equal(
527528
confusion_matrix, expected_pd_df, check_index_type=False
528529
)
529530

@@ -542,7 +543,7 @@ def test_confusion_matrix_matches_sklearn(session):
542543
pd_df[["y_true"]], pd_df[["y_pred"]]
543544
)
544545
expected_pd_df = pd.DataFrame(expected_confusion_matrix)
545-
pd.testing.assert_frame_equal(
546+
bigframes.testing.assert_frame_equal(
546547
confusion_matrix, expected_pd_df, check_index_type=False
547548
)
548549

@@ -564,7 +565,7 @@ def test_confusion_matrix_str_matches_sklearn(session):
564565
expected_confusion_matrix, index=["ant", "bird", "cat"]
565566
)
566567
expected_pd_df.columns = pd.Index(["ant", "bird", "cat"])
567-
pd.testing.assert_frame_equal(
568+
bigframes.testing.assert_frame_equal(
568569
confusion_matrix, expected_pd_df, check_index_type=False
569570
)
570571

@@ -585,7 +586,7 @@ def test_confusion_matrix_series(session):
585586
2: [0, 1, 2],
586587
}
587588
).astype("int64")
588-
pd.testing.assert_frame_equal(
589+
bigframes.testing.assert_frame_equal(
589590
confusion_matrix, expected_pd_df, check_index_type=False
590591
)
591592

@@ -605,7 +606,9 @@ def test_recall_score(session):
605606
expected_index = [0, 1, 2]
606607
expected_recall = pd.Series(expected_values, index=expected_index)
607608

608-
pd.testing.assert_series_equal(recall, expected_recall, check_index_type=False)
609+
bigframes.testing.assert_series_equal(
610+
recall, expected_recall, check_index_type=False
611+
)
609612

610613

611614
def test_recall_score_matches_sklearn(session):
@@ -623,7 +626,9 @@ def test_recall_score_matches_sklearn(session):
623626
)
624627
expected_index = [0, 1, 2]
625628
expected_recall = pd.Series(expected_values, index=expected_index)
626-
pd.testing.assert_series_equal(recall, expected_recall, check_index_type=False)
629+
bigframes.testing.assert_series_equal(
630+
recall, expected_recall, check_index_type=False
631+
)
627632

628633

629634
def test_recall_score_str_matches_sklearn(session):
@@ -641,7 +646,9 @@ def test_recall_score_str_matches_sklearn(session):
641646
)
642647
expected_index = ["ant", "bird", "cat"]
643648
expected_recall = pd.Series(expected_values, index=expected_index)
644-
pd.testing.assert_series_equal(recall, expected_recall, check_index_type=False)
649+
bigframes.testing.assert_series_equal(
650+
recall, expected_recall, check_index_type=False
651+
)
645652

646653

647654
def test_recall_score_series(session):
@@ -657,7 +664,9 @@ def test_recall_score_series(session):
657664
expected_index = [0, 1, 2]
658665
expected_recall = pd.Series(expected_values, index=expected_index)
659666

660-
pd.testing.assert_series_equal(recall, expected_recall, check_index_type=False)
667+
bigframes.testing.assert_series_equal(
668+
recall, expected_recall, check_index_type=False
669+
)
661670

662671

663672
def test_precision_score(session):
@@ -675,7 +684,7 @@ def test_precision_score(session):
675684
expected_index = [0, 1, 2]
676685
expected_precision = pd.Series(expected_values, index=expected_index)
677686

678-
pd.testing.assert_series_equal(
687+
bigframes.testing.assert_series_equal(
679688
precision_score, expected_precision, check_index_type=False
680689
)
681690

@@ -698,7 +707,7 @@ def test_precision_score_matches_sklearn(session):
698707
)
699708
expected_index = [0, 1, 2]
700709
expected_precision = pd.Series(expected_values, index=expected_index)
701-
pd.testing.assert_series_equal(
710+
bigframes.testing.assert_series_equal(
702711
precision_score, expected_precision, check_index_type=False
703712
)
704713

@@ -720,7 +729,7 @@ def test_precision_score_str_matches_sklearn(session):
720729
)
721730
expected_index = ["ant", "bird", "cat"]
722731
expected_precision = pd.Series(expected_values, index=expected_index)
723-
pd.testing.assert_series_equal(
732+
bigframes.testing.assert_series_equal(
724733
precision_score, expected_precision, check_index_type=False
725734
)
726735

@@ -738,7 +747,7 @@ def test_precision_score_series(session):
738747
expected_index = [0, 1, 2]
739748
expected_precision = pd.Series(expected_values, index=expected_index)
740749

741-
pd.testing.assert_series_equal(
750+
bigframes.testing.assert_series_equal(
742751
precision_score, expected_precision, check_index_type=False
743752
)
744753

@@ -823,7 +832,7 @@ def test_f1_score(session):
823832
expected_index = [0, 1, 2]
824833
expected_f1 = pd.Series(expected_values, index=expected_index)
825834

826-
pd.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False)
835+
bigframes.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False)
827836

828837

829838
def test_f1_score_matches_sklearn(session):
@@ -841,7 +850,7 @@ def test_f1_score_matches_sklearn(session):
841850
)
842851
expected_index = [0, 1, 2]
843852
expected_f1 = pd.Series(expected_values, index=expected_index)
844-
pd.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False)
853+
bigframes.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False)
845854

846855

847856
def test_f1_score_str_matches_sklearn(session):
@@ -859,7 +868,7 @@ def test_f1_score_str_matches_sklearn(session):
859868
)
860869
expected_index = ["ant", "bird", "cat"]
861870
expected_f1 = pd.Series(expected_values, index=expected_index)
862-
pd.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False)
871+
bigframes.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False)
863872

864873

865874
def test_f1_score_series(session):
@@ -875,7 +884,7 @@ def test_f1_score_series(session):
875884
expected_index = [0, 1, 2]
876885
expected_f1 = pd.Series(expected_values, index=expected_index)
877886

878-
pd.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False)
887+
bigframes.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False)
879888

880889

881890
def test_mean_squared_error(session: bigframes.Session):

tests/system/small/operations/test_datetimes.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,12 @@
1717

1818
import numpy
1919
from packaging import version
20-
from pandas import testing
2120
import pandas as pd
2221
import pytest
2322

2423
import bigframes.pandas as bpd
2524
import bigframes.series
26-
from bigframes.testing.utils import assert_series_equal
25+
from bigframes.testing.utils import assert_frame_equal, assert_series_equal
2726

2827
DATETIME_COL_NAMES = [("datetime_col",), ("timestamp_col",)]
2928
DATE_COLUMNS = [
@@ -304,7 +303,7 @@ def test_dt_isocalendar(session):
304303
actual_result = bf_s.dt.isocalendar().to_pandas()
305304

306305
expected_result = pd_s.dt.isocalendar()
307-
testing.assert_frame_equal(
306+
assert_frame_equal(
308307
actual_result, expected_result, check_dtype=False, check_index_type=False
309308
)
310309

@@ -353,7 +352,7 @@ def test_dt_strftime(scalars_df_index, scalars_pandas_df_index, column, date_for
353352
pytest.importorskip("pandas", minversion="2.0.0")
354353
bf_result = scalars_df_index[column].dt.strftime(date_format).to_pandas()
355354
pd_result = scalars_pandas_df_index[column].dt.strftime(date_format)
356-
pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
355+
assert_series_equal(bf_result, pd_result, check_dtype=False)
357356
assert bf_result.dtype == "string[pyarrow]"
358357

359358

@@ -365,7 +364,7 @@ def test_dt_strftime_date():
365364
expected_result = pd.Series(["08/15/2014", "08/15/2215", "02/29/2016"])
366365
bf_result = bf_series.dt.strftime("%m/%d/%Y").to_pandas()
367366

368-
pd.testing.assert_series_equal(
367+
assert_series_equal(
369368
bf_result, expected_result, check_index_type=False, check_dtype=False
370369
)
371370
assert bf_result.dtype == "string[pyarrow]"
@@ -381,7 +380,7 @@ def test_dt_strftime_time():
381380
)
382381
bf_result = bf_series.dt.strftime("%X").to_pandas()
383382

384-
pd.testing.assert_series_equal(
383+
assert_series_equal(
385384
bf_result, expected_result, check_index_type=False, check_dtype=False
386385
)
387386
assert bf_result.dtype == "string[pyarrow]"
@@ -521,7 +520,7 @@ def test_timestamp_diff_two_dataframes(scalars_dfs):
521520
actual_result = (bf_df - bf_df).to_pandas()
522521

523522
expected_result = pd_df - pd_df
524-
testing.assert_frame_equal(actual_result, expected_result)
523+
assert_frame_equal(actual_result, expected_result)
525524

526525

527526
def test_timestamp_diff_two_series_with_different_types_raise_error(scalars_dfs):
@@ -575,7 +574,12 @@ def test_timestamp_series_diff_agg(scalars_dfs, column):
575574

576575
actual_result = bf_series.diff().to_pandas()
577576

578-
expected_result = pd_series.diff()
577+
# overflows for no good reason
578+
# related? https://github.com/apache/arrow/issues/43031
579+
expected_result = pd_series.ffill().diff()
580+
expected_result = expected_result.mask(
581+
pd_series.isnull() | pd_series.shift(1).isnull()
582+
)
579583
assert_series_equal(actual_result, expected_result)
580584

581585

@@ -630,6 +634,6 @@ def test_to_datetime(scalars_dfs, col):
630634
).to_pandas()
631635

632636
expected_result = pd.Series(pd.to_datetime(pd_df[col]))
633-
testing.assert_series_equal(
637+
assert_series_equal(
634638
actual_result, expected_result, check_dtype=False, check_index_type=False
635639
)

0 commit comments

Comments
 (0)