googleapis
diff --git a/‎bigframes/testing/__init__.py‎
Lines changed: 6 additions & 5 deletions b/‎bigframes/testing/__init__.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎bigframes/testing/utils.py‎
Lines changed: 4 additions & 0 deletions b/‎bigframes/testing/utils.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎tests/system/small/bigquery/test_struct.py‎
Lines changed: 3 additions & 2 deletions b/‎tests/system/small/bigquery/test_struct.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎tests/system/small/ml/test_metrics.py‎
Lines changed: 35 additions & 26 deletions b/‎tests/system/small/ml/test_metrics.py‎
Lines changed: 35 additions & 26 deletions
diff --git a/‎tests/system/small/operations/test_datetimes.py‎
Lines changed: 13 additions & 9 deletions b/‎tests/system/small/operations/test_datetimes.py‎
Lines changed: 13 additions & 9 deletions
@@ -17,9 +17,10 @@
 These modules are provided for testing the BigQuery DataFrames package. The
 interface is not considered stable.
 """
-from bigframes.testing.utils import assert_frame_equal, assert_series_equal
+from bigframes.testing.utils import (
+    assert_frame_equal,
+    assert_index_equal,
+    assert_series_equal,
+)
 
-__all__ = [
-    "assert_frame_equal",
-    "assert_series_equal",
-]
+__all__ = ["assert_frame_equal", "assert_series_equal", "assert_index_equal"]
@@ -157,6 +157,10 @@ def assert_series_equal(
     pd.testing.assert_series_equal(left, right, **kwargs)
 
 
+def assert_index_equal(left, right, **kwargs):
+    pd.testing.assert_index_equal(left, right, **kwargs)
+
+
 def _standardize_index(idx):
     return pd.Index(list(idx), name=idx.name)
 
 
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pandas as pd
 import pytest
 
 import bigframes.bigquery as bbq
 import bigframes.series as series
+import bigframes.testing
 
 
 @pytest.mark.parametrize(
@@ -53,9 +53,10 @@ def test_struct_from_dataframe(columns_arg):
     srs = series.Series(
         columns_arg,
     )
-    pd.testing.assert_series_equal(
+    bigframes.testing.assert_series_equal(
         srs.to_pandas(),
         bbq.struct(srs.struct.explode()).to_pandas(),
         check_index_type=False,
         check_dtype=False,
+        check_namesbool=False,  # None vs nan version dependent
     )
@@ -20,6 +20,7 @@
 
 import bigframes
 from bigframes.ml import metrics
+import bigframes.testing
 
 
 def test_r2_score_perfect_fit(session):
@@ -161,7 +162,7 @@ def test_roc_curve_binary_classification_prediction_returns_expected(session):
     pd_tpr = tpr.to_pandas()
     pd_thresholds = thresholds.to_pandas()
 
-    pd.testing.assert_series_equal(
+    bigframes.testing.assert_series_equal(
         # skip testing the first value, as it is redundant and inconsistent across sklearn versions
         pd_thresholds[1:],
         pd.Series(
@@ -171,7 +172,7 @@ def test_roc_curve_binary_classification_prediction_returns_expected(session):
         ),
         check_index=False,
     )
-    pd.testing.assert_series_equal(
+    bigframes.testing.assert_series_equal(
         pd_fpr,
         pd.Series(
             [0.0, 0.0, 0.0, 0.25, 0.25, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0],
@@ -180,7 +181,7 @@ def test_roc_curve_binary_classification_prediction_returns_expected(session):
         ),
         check_index_type=False,
     )
-    pd.testing.assert_series_equal(
+    bigframes.testing.assert_series_equal(
         pd_tpr,
         pd.Series(
             [
@@ -261,7 +262,7 @@ def test_roc_curve_binary_classification_decision_returns_expected(session):
     pd_tpr = tpr.to_pandas()
     pd_thresholds = thresholds.to_pandas()
 
-    pd.testing.assert_series_equal(
+    bigframes.testing.assert_series_equal(
         # skip testing the first value, as it is redundant and inconsistent across sklearn versions
         pd_thresholds[1:],
         pd.Series(
@@ -271,7 +272,7 @@ def test_roc_curve_binary_classification_decision_returns_expected(session):
         ),
         check_index=False,
     )
-    pd.testing.assert_series_equal(
+    bigframes.testing.assert_series_equal(
         pd_fpr,
         pd.Series(
             [0.0, 0.0, 1.0],
@@ -280,7 +281,7 @@ def test_roc_curve_binary_classification_decision_returns_expected(session):
         ),
         check_index_type=False,
     )
-    pd.testing.assert_series_equal(
+    bigframes.testing.assert_series_equal(
         pd_tpr,
         pd.Series(
             [
@@ -353,7 +354,7 @@ def test_roc_curve_binary_classification_prediction_series(session):
     pd_tpr = tpr.to_pandas()
     pd_thresholds = thresholds.to_pandas()
 
-    pd.testing.assert_series_equal(
+    bigframes.testing.assert_series_equal(
         # skip testing the first value, as it is redundant and inconsistent across sklearn versions
         pd_thresholds[1:],
         pd.Series(
@@ -363,7 +364,7 @@ def test_roc_curve_binary_classification_prediction_series(session):
         ),
         check_index=False,
     )
-    pd.testing.assert_series_equal(
+    bigframes.testing.assert_series_equal(
         pd_fpr,
         pd.Series(
             [0.0, 0.0, 0.0, 0.25, 0.25, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0],
@@ -372,7 +373,7 @@ def test_roc_curve_binary_classification_prediction_series(session):
         ),
         check_index_type=False,
     )
-    pd.testing.assert_series_equal(
+    bigframes.testing.assert_series_equal(
         pd_tpr,
         pd.Series(
             [
@@ -505,7 +506,7 @@ def test_confusion_matrix(session):
             2: [0, 1, 2],
         }
     ).astype("int64")
-    pd.testing.assert_frame_equal(
+    bigframes.testing.assert_frame_equal(
         confusion_matrix, expected_pd_df, check_index_type=False
     )
 
@@ -523,7 +524,7 @@ def test_confusion_matrix_column_index(session):
         {1: [1, 0, 1, 0], 2: [0, 0, 2, 0], 3: [0, 0, 0, 0], 4: [0, 1, 0, 1]},
         index=[1, 2, 3, 4],
     ).astype("int64")
-    pd.testing.assert_frame_equal(
+    bigframes.testing.assert_frame_equal(
         confusion_matrix, expected_pd_df, check_index_type=False
     )
 
@@ -542,7 +543,7 @@ def test_confusion_matrix_matches_sklearn(session):
         pd_df[["y_true"]], pd_df[["y_pred"]]
     )
     expected_pd_df = pd.DataFrame(expected_confusion_matrix)
-    pd.testing.assert_frame_equal(
+    bigframes.testing.assert_frame_equal(
         confusion_matrix, expected_pd_df, check_index_type=False
     )
 
@@ -564,7 +565,7 @@ def test_confusion_matrix_str_matches_sklearn(session):
         expected_confusion_matrix, index=["ant", "bird", "cat"]
     )
     expected_pd_df.columns = pd.Index(["ant", "bird", "cat"])
-    pd.testing.assert_frame_equal(
+    bigframes.testing.assert_frame_equal(
         confusion_matrix, expected_pd_df, check_index_type=False
     )
 
@@ -585,7 +586,7 @@ def test_confusion_matrix_series(session):
             2: [0, 1, 2],
         }
     ).astype("int64")
-    pd.testing.assert_frame_equal(
+    bigframes.testing.assert_frame_equal(
         confusion_matrix, expected_pd_df, check_index_type=False
     )
 
@@ -605,7 +606,9 @@ def test_recall_score(session):
     expected_index = [0, 1, 2]
     expected_recall = pd.Series(expected_values, index=expected_index)
 
-    pd.testing.assert_series_equal(recall, expected_recall, check_index_type=False)
+    bigframes.testing.assert_series_equal(
+        recall, expected_recall, check_index_type=False
+    )
 
 
 def test_recall_score_matches_sklearn(session):
@@ -623,7 +626,9 @@ def test_recall_score_matches_sklearn(session):
     )
     expected_index = [0, 1, 2]
     expected_recall = pd.Series(expected_values, index=expected_index)
-    pd.testing.assert_series_equal(recall, expected_recall, check_index_type=False)
+    bigframes.testing.assert_series_equal(
+        recall, expected_recall, check_index_type=False
+    )
 
 
 def test_recall_score_str_matches_sklearn(session):
@@ -641,7 +646,9 @@ def test_recall_score_str_matches_sklearn(session):
     )
     expected_index = ["ant", "bird", "cat"]
     expected_recall = pd.Series(expected_values, index=expected_index)
-    pd.testing.assert_series_equal(recall, expected_recall, check_index_type=False)
+    bigframes.testing.assert_series_equal(
+        recall, expected_recall, check_index_type=False
+    )
 
 
 def test_recall_score_series(session):
@@ -657,7 +664,9 @@ def test_recall_score_series(session):
     expected_index = [0, 1, 2]
     expected_recall = pd.Series(expected_values, index=expected_index)
 
-    pd.testing.assert_series_equal(recall, expected_recall, check_index_type=False)
+    bigframes.testing.assert_series_equal(
+        recall, expected_recall, check_index_type=False
+    )
 
 
 def test_precision_score(session):
@@ -675,7 +684,7 @@ def test_precision_score(session):
     expected_index = [0, 1, 2]
     expected_precision = pd.Series(expected_values, index=expected_index)
 
-    pd.testing.assert_series_equal(
+    bigframes.testing.assert_series_equal(
         precision_score, expected_precision, check_index_type=False
     )
 
@@ -698,7 +707,7 @@ def test_precision_score_matches_sklearn(session):
     )
     expected_index = [0, 1, 2]
     expected_precision = pd.Series(expected_values, index=expected_index)
-    pd.testing.assert_series_equal(
+    bigframes.testing.assert_series_equal(
         precision_score, expected_precision, check_index_type=False
     )
 
@@ -720,7 +729,7 @@ def test_precision_score_str_matches_sklearn(session):
     )
     expected_index = ["ant", "bird", "cat"]
     expected_precision = pd.Series(expected_values, index=expected_index)
-    pd.testing.assert_series_equal(
+    bigframes.testing.assert_series_equal(
         precision_score, expected_precision, check_index_type=False
     )
 
@@ -738,7 +747,7 @@ def test_precision_score_series(session):
     expected_index = [0, 1, 2]
     expected_precision = pd.Series(expected_values, index=expected_index)
 
-    pd.testing.assert_series_equal(
+    bigframes.testing.assert_series_equal(
         precision_score, expected_precision, check_index_type=False
     )
 
@@ -823,7 +832,7 @@ def test_f1_score(session):
     expected_index = [0, 1, 2]
     expected_f1 = pd.Series(expected_values, index=expected_index)
 
-    pd.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False)
+    bigframes.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False)
 
 
 def test_f1_score_matches_sklearn(session):
@@ -841,7 +850,7 @@ def test_f1_score_matches_sklearn(session):
     )
     expected_index = [0, 1, 2]
     expected_f1 = pd.Series(expected_values, index=expected_index)
-    pd.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False)
+    bigframes.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False)
 
 
 def test_f1_score_str_matches_sklearn(session):
@@ -859,7 +868,7 @@ def test_f1_score_str_matches_sklearn(session):
     )
     expected_index = ["ant", "bird", "cat"]
     expected_f1 = pd.Series(expected_values, index=expected_index)
-    pd.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False)
+    bigframes.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False)
 
 
 def test_f1_score_series(session):
@@ -875,7 +884,7 @@ def test_f1_score_series(session):
     expected_index = [0, 1, 2]
     expected_f1 = pd.Series(expected_values, index=expected_index)
 
-    pd.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False)
+    bigframes.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False)
 
 
 def test_mean_squared_error(session: bigframes.Session):
 
@@ -17,13 +17,12 @@
 
 import numpy
 from packaging import version
-from pandas import testing
 import pandas as pd
 import pytest
 
 import bigframes.pandas as bpd
 import bigframes.series
-from bigframes.testing.utils import assert_series_equal
+from bigframes.testing.utils import assert_frame_equal, assert_series_equal
 
 DATETIME_COL_NAMES = [("datetime_col",), ("timestamp_col",)]
 DATE_COLUMNS = [
@@ -304,7 +303,7 @@ def test_dt_isocalendar(session):
     actual_result = bf_s.dt.isocalendar().to_pandas()
 
     expected_result = pd_s.dt.isocalendar()
-    testing.assert_frame_equal(
+    assert_frame_equal(
         actual_result, expected_result, check_dtype=False, check_index_type=False
     )
 
@@ -353,7 +352,7 @@ def test_dt_strftime(scalars_df_index, scalars_pandas_df_index, column, date_for
     pytest.importorskip("pandas", minversion="2.0.0")
     bf_result = scalars_df_index[column].dt.strftime(date_format).to_pandas()
     pd_result = scalars_pandas_df_index[column].dt.strftime(date_format)
-    pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
+    assert_series_equal(bf_result, pd_result, check_dtype=False)
     assert bf_result.dtype == "string[pyarrow]"
 
 
@@ -365,7 +364,7 @@ def test_dt_strftime_date():
     expected_result = pd.Series(["08/15/2014", "08/15/2215", "02/29/2016"])
     bf_result = bf_series.dt.strftime("%m/%d/%Y").to_pandas()
 
-    pd.testing.assert_series_equal(
+    assert_series_equal(
         bf_result, expected_result, check_index_type=False, check_dtype=False
     )
     assert bf_result.dtype == "string[pyarrow]"
@@ -381,7 +380,7 @@ def test_dt_strftime_time():
     )
     bf_result = bf_series.dt.strftime("%X").to_pandas()
 
-    pd.testing.assert_series_equal(
+    assert_series_equal(
         bf_result, expected_result, check_index_type=False, check_dtype=False
     )
     assert bf_result.dtype == "string[pyarrow]"
@@ -521,7 +520,7 @@ def test_timestamp_diff_two_dataframes(scalars_dfs):
     actual_result = (bf_df - bf_df).to_pandas()
 
     expected_result = pd_df - pd_df
-    testing.assert_frame_equal(actual_result, expected_result)
+    assert_frame_equal(actual_result, expected_result)
 
 
 def test_timestamp_diff_two_series_with_different_types_raise_error(scalars_dfs):
@@ -575,7 +574,12 @@ def test_timestamp_series_diff_agg(scalars_dfs, column):
 
     actual_result = bf_series.diff().to_pandas()
 
-    expected_result = pd_series.diff()
+    # overflows for no good reason
+    # related? https://github.com/apache/arrow/issues/43031
+    expected_result = pd_series.ffill().diff()
+    expected_result = expected_result.mask(
+        pd_series.isnull() | pd_series.shift(1).isnull()
+    )
     assert_series_equal(actual_result, expected_result)
 
 
@@ -630,6 +634,6 @@ def test_to_datetime(scalars_dfs, col):
     ).to_pandas()
 
     expected_result = pd.Series(pd.to_datetime(pd_df[col]))
-    testing.assert_series_equal(
+    assert_series_equal(
         actual_result, expected_result, check_dtype=False, check_index_type=False
     )