From 0568de66ef7d807ab4c4ab20897c7d5341a01470 Mon Sep 17 00:00:00 2001 From: Mike Lui Date: Sun, 26 Mar 2023 01:11:57 -0400 Subject: [PATCH] [C++/Python] Fix bug preventing nested pydict to arrow conversion --- .../src/arrow/python/python_to_arrow.cc | 2 +- python/pyarrow/tests/test_pandas.py | 26 ++++++++++--------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc index 9e7f07ef81296..48af2d94f07c9 100644 --- a/python/pyarrow/src/arrow/python/python_to_arrow.cc +++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc @@ -762,7 +762,7 @@ class PyListConverter : public ListConverter { RETURN_NOT_OK(AppendSequence(value)); } else if (PySet_Check(value) || (Py_TYPE(value) == &PyDictValues_Type)) { RETURN_NOT_OK(AppendIterable(value)); - } else if (PyDict_Check(value) && this->options_.type->id() == Type::MAP) { + } else if (PyDict_Check(value) && this->type()->id() == Type::MAP) { // Branch to support Python Dict with `map` DataType. auto items = PyDict_Items(value); OwnedRef item_ref(items); diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 74569dba90be2..f22f1b1069773 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -2153,7 +2153,7 @@ def test_to_list_of_maps_pandas(self): offsets = pa.array([0, 2, 5, 6], pa.int32()) keys = pa.array(['foo', 'bar', 'baz', 'qux', 'quux', 'quz']) items = pa.array([['a', 'b'], ['c', 'd'], [], None, [None, 'e'], ['f', 'g']], - pa.list_(pa.string())) + pa.list_(pa.string())) maps = pa.MapArray.from_arrays(offsets, keys, items) data = pa.ListArray.from_arrays([0, 1, 3], maps) @@ -4622,20 +4622,22 @@ def test_does_not_mutate_timedelta_nested(): def test_roundtrip_nested_map_table_with_pydicts(): - schema = pa.schema([pa.field("a", pa.list_(pa.map_(pa.int8(), pa.struct([pa.field("b", pa.binary())]))))]) + schema = pa.schema( + [pa.field("a", pa.list_(pa.map_(pa.int8(), pa.struct([pa.field("b", pa.binary())]))))]) table = pa.table([[ - [[(1, None)]], - None, - [ - [(2, {"b": b"abc"})], - [(3, {"b": None}), (4, {"b": b"def"})], - ] - ]], + [[(1, None)]], + None, + [ + [(2, {"b": b"abc"})], + [(3, {"b": None}), (4, {"b": b"def"})], + ] + ]], schema=schema, ) expected_default_df = pd.DataFrame( - {"a": [[[(1, None)]], None, [[(2, {"b": b"abc"})], [(3, {"b": None}), (4, {"b": b"def"})]]]} + {"a": [[[(1, None)]], None, [[(2, {"b": b"abc"})], + [(3, {"b": None}), (4, {"b": b"def"})]]]} ) expected_as_pydicts_df = pd.DataFrame( {"a": [[{1: None}], None, [{2: {"b": b"abc"}}, {3: {"b": None}, 4: {"b": b"def"}}]]} @@ -4650,5 +4652,5 @@ def test_roundtrip_nested_map_table_with_pydicts(): table_default_roundtrip = pa.Table.from_pandas(default_df, schema=schema) assert table.equals(table_default_roundtrip) - with pytest.raises(pa.ArrowTypeError): - table_as_pydicts_roundtrip = pa.Table.from_pandas(as_pydicts_df, schema=schema) \ No newline at end of file + table_as_pydicts_roundtrip = pa.Table.from_pandas(as_pydicts_df, schema=schema) + assert table.equals(table_as_pydicts_roundtrip)