Skip to content

Commit

Permalink
[C++/Python] Fix bug preventing nested pydict to arrow conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
Mike Lui authored and mikelui committed Mar 31, 2023
1 parent 8501088 commit 0568de6
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 13 deletions.
2 changes: 1 addition & 1 deletion python/pyarrow/src/arrow/python/python_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -762,7 +762,7 @@ class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {
RETURN_NOT_OK(AppendSequence(value));
} else if (PySet_Check(value) || (Py_TYPE(value) == &PyDictValues_Type)) {
RETURN_NOT_OK(AppendIterable(value));
} else if (PyDict_Check(value) && this->options_.type->id() == Type::MAP) {
} else if (PyDict_Check(value) && this->type()->id() == Type::MAP) {
// Branch to support Python Dict with `map` DataType.
auto items = PyDict_Items(value);
OwnedRef item_ref(items);
Expand Down
26 changes: 14 additions & 12 deletions python/pyarrow/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -2153,7 +2153,7 @@ def test_to_list_of_maps_pandas(self):
offsets = pa.array([0, 2, 5, 6], pa.int32())
keys = pa.array(['foo', 'bar', 'baz', 'qux', 'quux', 'quz'])
items = pa.array([['a', 'b'], ['c', 'd'], [], None, [None, 'e'], ['f', 'g']],
pa.list_(pa.string()))
pa.list_(pa.string()))
maps = pa.MapArray.from_arrays(offsets, keys, items)
data = pa.ListArray.from_arrays([0, 1, 3], maps)

Expand Down Expand Up @@ -4622,20 +4622,22 @@ def test_does_not_mutate_timedelta_nested():


def test_roundtrip_nested_map_table_with_pydicts():
schema = pa.schema([pa.field("a", pa.list_(pa.map_(pa.int8(), pa.struct([pa.field("b", pa.binary())]))))])
schema = pa.schema(
[pa.field("a", pa.list_(pa.map_(pa.int8(), pa.struct([pa.field("b", pa.binary())]))))])
table = pa.table([[
[[(1, None)]],
None,
[
[(2, {"b": b"abc"})],
[(3, {"b": None}), (4, {"b": b"def"})],
]
]],
[[(1, None)]],
None,
[
[(2, {"b": b"abc"})],
[(3, {"b": None}), (4, {"b": b"def"})],
]
]],
schema=schema,
)

expected_default_df = pd.DataFrame(
{"a": [[[(1, None)]], None, [[(2, {"b": b"abc"})], [(3, {"b": None}), (4, {"b": b"def"})]]]}
{"a": [[[(1, None)]], None, [[(2, {"b": b"abc"})],
[(3, {"b": None}), (4, {"b": b"def"})]]]}
)
expected_as_pydicts_df = pd.DataFrame(
{"a": [[{1: None}], None, [{2: {"b": b"abc"}}, {3: {"b": None}, 4: {"b": b"def"}}]]}
Expand All @@ -4650,5 +4652,5 @@ def test_roundtrip_nested_map_table_with_pydicts():
table_default_roundtrip = pa.Table.from_pandas(default_df, schema=schema)
assert table.equals(table_default_roundtrip)

with pytest.raises(pa.ArrowTypeError):
table_as_pydicts_roundtrip = pa.Table.from_pandas(as_pydicts_df, schema=schema)
table_as_pydicts_roundtrip = pa.Table.from_pandas(as_pydicts_df, schema=schema)
assert table.equals(table_as_pydicts_roundtrip)

0 comments on commit 0568de6

Please sign in to comment.