Skip to content

Commit

Permalink
Backport PR #50396 on branch 1.5.x (BUG/COMPAT: fix assert_* function…
Browse files Browse the repository at this point in the history
…s for nested arrays with latest numpy) (#50739)

Backport PR #50396: BUG/COMPAT: fix assert_* functions for nested arrays with latest numpy

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
  • Loading branch information
meeseeksmachine and jorisvandenbossche authored Jan 14, 2023
1 parent 060345a commit 71db310
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Bug fixes
- Bug in :meth:`Series.quantile` emitting warning from NumPy when :class:`Series` has only ``NA`` values (:issue:`50681`)
- Bug when chaining several :meth:`.Styler.concat` calls, only the last styler was concatenated (:issue:`49207`)
- Fixed bug when instantiating a :class:`DataFrame` subclass inheriting from ``typing.Generic`` that triggered a ``UserWarning`` on python 3.11 (:issue:`49649`)
- Bug in :func:`pandas.testing.assert_series_equal` (and equivalent ``assert_`` functions) when having nested data and using numpy >= 1.25 (:issue:`50360`)
-

.. ---------------------------------------------------------------------------
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,10 @@ def _array_equivalent_object(left: np.ndarray, right: np.ndarray, strict_nan: bo
if "boolean value of NA is ambiguous" in str(err):
return False
raise
except ValueError:
# numpy can raise a ValueError if left and right cannot be
# compared (e.g. nested arrays)
return False
return True


Expand Down
116 changes: 109 additions & 7 deletions pandas/tests/dtypes/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,18 +525,120 @@ def test_array_equivalent_str(dtype):
)


def test_array_equivalent_nested():
@pytest.mark.parametrize(
"strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False]
)
def test_array_equivalent_nested(strict_nan):
# reached in groupby aggregations, make sure we use np.any when checking
# if the comparison is truthy
left = np.array([np.array([50, 70, 90]), np.array([20, 30, 40])], dtype=object)
right = np.array([np.array([50, 70, 90]), np.array([20, 30, 40])], dtype=object)
left = np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object)
right = np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object)

assert array_equivalent(left, right, strict_nan=True)
assert not array_equivalent(left, right[::-1], strict_nan=True)
assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

left = np.array([np.array([50, 50, 50]), np.array([40, 40, 40])], dtype=object)
left = np.empty(2, dtype=object)
left[:] = [np.array([50, 70, 90]), np.array([20, 30, 40])]
right = np.empty(2, dtype=object)
right[:] = [np.array([50, 70, 90]), np.array([20, 30, 40])]
assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

left = np.array([np.array([50, 50, 50]), np.array([40, 40])], dtype=object)
right = np.array([50, 40])
assert not array_equivalent(left, right, strict_nan=True)
assert not array_equivalent(left, right, strict_nan=strict_nan)


@pytest.mark.parametrize(
"strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False]
)
def test_array_equivalent_nested2(strict_nan):
# more than one level of nesting
left = np.array(
[
np.array([np.array([50, 70]), np.array([90])], dtype=object),
np.array([np.array([20, 30])], dtype=object),
],
dtype=object,
)
right = np.array(
[
np.array([np.array([50, 70]), np.array([90])], dtype=object),
np.array([np.array([20, 30])], dtype=object),
],
dtype=object,
)
assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

left = np.array([np.array([np.array([50, 50, 50])], dtype=object)], dtype=object)
right = np.array([50])
assert not array_equivalent(left, right, strict_nan=strict_nan)


@pytest.mark.parametrize(
"strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False]
)
def test_array_equivalent_nested_list(strict_nan):
left = np.array([[50, 70, 90], [20, 30]], dtype=object)
right = np.array([[50, 70, 90], [20, 30]], dtype=object)

assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

left = np.array([[50, 50, 50], [40, 40]], dtype=object)
right = np.array([50, 40])
assert not array_equivalent(left, right, strict_nan=strict_nan)


@pytest.mark.xfail(reason="failing")
@pytest.mark.parametrize("strict_nan", [True, False])
def test_array_equivalent_nested_mixed_list(strict_nan):
# mixed arrays / lists in left and right
# https://github.com/pandas-dev/pandas/issues/50360
left = np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object)
right = np.array([[1, 2, 3], [4, 5]], dtype=object)

assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

# multiple levels of nesting
left = np.array(
[
np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object),
np.array([np.array([6]), np.array([7, 8]), np.array([9])], dtype=object),
],
dtype=object,
)
right = np.array([[[1, 2, 3], [4, 5]], [[6], [7, 8], [9]]], dtype=object)
assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

# same-length lists
subarr = np.empty(2, dtype=object)
subarr[:] = [
np.array([None, "b"], dtype=object),
np.array(["c", "d"], dtype=object),
]
left = np.array([subarr, None], dtype=object)
right = np.array([list([[None, "b"], ["c", "d"]]), None], dtype=object)
assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)


@pytest.mark.xfail(reason="failing")
@pytest.mark.parametrize("strict_nan", [True, False])
def test_array_equivalent_nested_dicts(strict_nan):
left = np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object)
right = np.array(
[{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object
)
assert array_equivalent(left, right, strict_nan=strict_nan)
assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)

right2 = np.array([{"f1": 1, "f2": ["a", "b"]}], dtype=object)
assert array_equivalent(left, right2, strict_nan=strict_nan)
assert not array_equivalent(left, right2[::-1], strict_nan=strict_nan)


@pytest.mark.parametrize(
Expand Down
84 changes: 84 additions & 0 deletions pandas/tests/util/test_assert_almost_equal.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,3 +458,87 @@ def test_assert_almost_equal_iterable_values_mismatch():

with pytest.raises(AssertionError, match=msg):
tm.assert_almost_equal([1, 2], [1, 3])


subarr = np.empty(2, dtype=object)
subarr[:] = [np.array([None, "b"], dtype=object), np.array(["c", "d"], dtype=object)]

NESTED_CASES = [
# nested array
(
np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object),
np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object),
),
# >1 level of nesting
(
np.array(
[
np.array([np.array([50, 70]), np.array([90])], dtype=object),
np.array([np.array([20, 30])], dtype=object),
],
dtype=object,
),
np.array(
[
np.array([np.array([50, 70]), np.array([90])], dtype=object),
np.array([np.array([20, 30])], dtype=object),
],
dtype=object,
),
),
# lists
(
np.array([[50, 70, 90], [20, 30]], dtype=object),
np.array([[50, 70, 90], [20, 30]], dtype=object),
),
# mixed array/list
(
np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object),
np.array([[1, 2, 3], [4, 5]], dtype=object),
),
(
np.array(
[
np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object),
np.array(
[np.array([6]), np.array([7, 8]), np.array([9])], dtype=object
),
],
dtype=object,
),
np.array([[[1, 2, 3], [4, 5]], [[6], [7, 8], [9]]], dtype=object),
),
# same-length lists
(
np.array([subarr, None], dtype=object),
np.array([list([[None, "b"], ["c", "d"]]), None], dtype=object),
),
# dicts
(
np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object),
np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object),
),
(
np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object),
np.array([{"f1": 1, "f2": ["a", "b"]}], dtype=object),
),
# array/list of dicts
(
np.array(
[
np.array(
[{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object
),
np.array([], dtype=object),
],
dtype=object,
),
np.array([[{"f1": 1, "f2": ["a", "b"]}], []], dtype=object),
),
]


@pytest.mark.filterwarnings("ignore:elementwise comparison failed:DeprecationWarning")
@pytest.mark.parametrize("a,b", NESTED_CASES)
def test_assert_almost_equal_array_nested(a, b):
_assert_almost_equal_both(a, b)

0 comments on commit 71db310

Please sign in to comment.