From 71db310a328a0dfa194ef0fe2b95238817b4f419 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Sat, 14 Jan 2023 06:21:24 +0100 Subject: [PATCH] Backport PR #50396 on branch 1.5.x (BUG/COMPAT: fix assert_* functions for nested arrays with latest numpy) (#50739) Backport PR #50396: BUG/COMPAT: fix assert_* functions for nested arrays with latest numpy Co-authored-by: Joris Van den Bossche --- doc/source/whatsnew/v1.5.3.rst | 1 + pandas/core/dtypes/missing.py | 4 + pandas/tests/dtypes/test_missing.py | 116 ++++++++++++++++-- pandas/tests/util/test_assert_almost_equal.py | 84 +++++++++++++ 4 files changed, 198 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.5.3.rst b/doc/source/whatsnew/v1.5.3.rst index 51e36359946e7..9d17d3cf7bea9 100644 --- a/doc/source/whatsnew/v1.5.3.rst +++ b/doc/source/whatsnew/v1.5.3.rst @@ -31,6 +31,7 @@ Bug fixes - Bug in :meth:`Series.quantile` emitting warning from NumPy when :class:`Series` has only ``NA`` values (:issue:`50681`) - Bug when chaining several :meth:`.Styler.concat` calls, only the last styler was concatenated (:issue:`49207`) - Fixed bug when instantiating a :class:`DataFrame` subclass inheriting from ``typing.Generic`` that triggered a ``UserWarning`` on python 3.11 (:issue:`49649`) +- Bug in :func:`pandas.testing.assert_series_equal` (and equivalent ``assert_`` functions) when having nested data and using numpy >= 1.25 (:issue:`50360`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 189ffc3d485bf..e7f57ae0e6658 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -594,6 +594,10 @@ def _array_equivalent_object(left: np.ndarray, right: np.ndarray, strict_nan: bo if "boolean value of NA is ambiguous" in str(err): return False raise + except ValueError: + # numpy can raise a ValueError if left and right cannot be + # compared (e.g. nested arrays) + return False return True diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 9f761c505075b..9a242e9491537 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -525,18 +525,120 @@ def test_array_equivalent_str(dtype): ) -def test_array_equivalent_nested(): +@pytest.mark.parametrize( + "strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False] +) +def test_array_equivalent_nested(strict_nan): # reached in groupby aggregations, make sure we use np.any when checking # if the comparison is truthy - left = np.array([np.array([50, 70, 90]), np.array([20, 30, 40])], dtype=object) - right = np.array([np.array([50, 70, 90]), np.array([20, 30, 40])], dtype=object) + left = np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object) + right = np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object) - assert array_equivalent(left, right, strict_nan=True) - assert not array_equivalent(left, right[::-1], strict_nan=True) + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) - left = np.array([np.array([50, 50, 50]), np.array([40, 40, 40])], dtype=object) + left = np.empty(2, dtype=object) + left[:] = [np.array([50, 70, 90]), np.array([20, 30, 40])] + right = np.empty(2, dtype=object) + right[:] = [np.array([50, 70, 90]), np.array([20, 30, 40])] + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + left = np.array([np.array([50, 50, 50]), np.array([40, 40])], dtype=object) right = np.array([50, 40]) - assert not array_equivalent(left, right, strict_nan=True) + assert not array_equivalent(left, right, strict_nan=strict_nan) + + +@pytest.mark.parametrize( + "strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False] +) +def test_array_equivalent_nested2(strict_nan): + # more than one level of nesting + left = np.array( + [ + np.array([np.array([50, 70]), np.array([90])], dtype=object), + np.array([np.array([20, 30])], dtype=object), + ], + dtype=object, + ) + right = np.array( + [ + np.array([np.array([50, 70]), np.array([90])], dtype=object), + np.array([np.array([20, 30])], dtype=object), + ], + dtype=object, + ) + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + left = np.array([np.array([np.array([50, 50, 50])], dtype=object)], dtype=object) + right = np.array([50]) + assert not array_equivalent(left, right, strict_nan=strict_nan) + + +@pytest.mark.parametrize( + "strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False] +) +def test_array_equivalent_nested_list(strict_nan): + left = np.array([[50, 70, 90], [20, 30]], dtype=object) + right = np.array([[50, 70, 90], [20, 30]], dtype=object) + + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + left = np.array([[50, 50, 50], [40, 40]], dtype=object) + right = np.array([50, 40]) + assert not array_equivalent(left, right, strict_nan=strict_nan) + + +@pytest.mark.xfail(reason="failing") +@pytest.mark.parametrize("strict_nan", [True, False]) +def test_array_equivalent_nested_mixed_list(strict_nan): + # mixed arrays / lists in left and right + # https://github.com/pandas-dev/pandas/issues/50360 + left = np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object) + right = np.array([[1, 2, 3], [4, 5]], dtype=object) + + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + # multiple levels of nesting + left = np.array( + [ + np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object), + np.array([np.array([6]), np.array([7, 8]), np.array([9])], dtype=object), + ], + dtype=object, + ) + right = np.array([[[1, 2, 3], [4, 5]], [[6], [7, 8], [9]]], dtype=object) + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + # same-length lists + subarr = np.empty(2, dtype=object) + subarr[:] = [ + np.array([None, "b"], dtype=object), + np.array(["c", "d"], dtype=object), + ] + left = np.array([subarr, None], dtype=object) + right = np.array([list([[None, "b"], ["c", "d"]]), None], dtype=object) + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + +@pytest.mark.xfail(reason="failing") +@pytest.mark.parametrize("strict_nan", [True, False]) +def test_array_equivalent_nested_dicts(strict_nan): + left = np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object) + right = np.array( + [{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object + ) + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + right2 = np.array([{"f1": 1, "f2": ["a", "b"]}], dtype=object) + assert array_equivalent(left, right2, strict_nan=strict_nan) + assert not array_equivalent(left, right2[::-1], strict_nan=strict_nan) @pytest.mark.parametrize( diff --git a/pandas/tests/util/test_assert_almost_equal.py b/pandas/tests/util/test_assert_almost_equal.py index ab53707771be6..4e6c420341bc2 100644 --- a/pandas/tests/util/test_assert_almost_equal.py +++ b/pandas/tests/util/test_assert_almost_equal.py @@ -458,3 +458,87 @@ def test_assert_almost_equal_iterable_values_mismatch(): with pytest.raises(AssertionError, match=msg): tm.assert_almost_equal([1, 2], [1, 3]) + + +subarr = np.empty(2, dtype=object) +subarr[:] = [np.array([None, "b"], dtype=object), np.array(["c", "d"], dtype=object)] + +NESTED_CASES = [ + # nested array + ( + np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object), + np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object), + ), + # >1 level of nesting + ( + np.array( + [ + np.array([np.array([50, 70]), np.array([90])], dtype=object), + np.array([np.array([20, 30])], dtype=object), + ], + dtype=object, + ), + np.array( + [ + np.array([np.array([50, 70]), np.array([90])], dtype=object), + np.array([np.array([20, 30])], dtype=object), + ], + dtype=object, + ), + ), + # lists + ( + np.array([[50, 70, 90], [20, 30]], dtype=object), + np.array([[50, 70, 90], [20, 30]], dtype=object), + ), + # mixed array/list + ( + np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object), + np.array([[1, 2, 3], [4, 5]], dtype=object), + ), + ( + np.array( + [ + np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object), + np.array( + [np.array([6]), np.array([7, 8]), np.array([9])], dtype=object + ), + ], + dtype=object, + ), + np.array([[[1, 2, 3], [4, 5]], [[6], [7, 8], [9]]], dtype=object), + ), + # same-length lists + ( + np.array([subarr, None], dtype=object), + np.array([list([[None, "b"], ["c", "d"]]), None], dtype=object), + ), + # dicts + ( + np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object), + np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object), + ), + ( + np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object), + np.array([{"f1": 1, "f2": ["a", "b"]}], dtype=object), + ), + # array/list of dicts + ( + np.array( + [ + np.array( + [{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object + ), + np.array([], dtype=object), + ], + dtype=object, + ), + np.array([[{"f1": 1, "f2": ["a", "b"]}], []], dtype=object), + ), +] + + +@pytest.mark.filterwarnings("ignore:elementwise comparison failed:DeprecationWarning") +@pytest.mark.parametrize("a,b", NESTED_CASES) +def test_assert_almost_equal_array_nested(a, b): + _assert_almost_equal_both(a, b)