Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions python/pyarrow/array.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,12 @@ cdef class Array:

return PyObject_to_object(np_arr)

def to_pylist(self):
"""
Convert to an list of native Python objects.
"""
return [x.as_py() for x in self]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At some point we may want to do this marshalling in C to avoid the boxing overhead

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mainly made this so I can have simpler unit tests in thirdparty libs using Arrow without using NumPy. Performance is not yet critical.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed



cdef class NullArray(Array):
pass
Expand Down
4 changes: 3 additions & 1 deletion python/pyarrow/scalar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,9 @@ cdef object box_arrow_scalar(DataType type,
const shared_ptr[CArray]& sp_array,
int index):
cdef ArrayValue val
if sp_array.get().IsNull(index):
if type.type.type == Type_NA:
return NA
elif sp_array.get().IsNull(index):
return NA
else:
val = _scalar_classes[type.type.type]()
Expand Down
15 changes: 15 additions & 0 deletions python/pyarrow/table.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,15 @@ cdef class ChunkedArray:
for i in range(self.num_chunks):
yield self.chunk(i)

def to_pylist(self):
"""
Convert to a list of native Python objects.
"""
result = []
for i in range(self.num_chunks):
result += self.chunk(i).to_pylist()
return result


cdef class Column:
"""
Expand Down Expand Up @@ -143,6 +152,12 @@ cdef class Column:

return pd.Series(PyObject_to_object(arr), name=self.name)

def to_pylist(self):
"""
Convert to a list of native Python objects.
"""
return self.data.to_pylist()

cdef _check_nullptr(self):
if self.column == NULL:
raise ReferenceError("Column object references a NULL pointer."
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/tests/test_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def test_basics(self):
assert column.length() == 5
assert len(column) == 5
assert column.shape == (5,)
assert column.to_pylist() == [-10, -5, 0, 5, 10]

def test_pandas(self):
data = [
Expand Down
13 changes: 11 additions & 2 deletions python/pyarrow/tests/test_convert_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,28 +22,34 @@
class TestConvertList(unittest.TestCase):

def test_boolean(self):
arr = pyarrow.from_pylist([True, None, False, None])
expected = [True, None, False, None]
arr = pyarrow.from_pylist(expected)
assert len(arr) == 4
assert arr.null_count == 2
assert arr.type == pyarrow.bool_()
assert arr.to_pylist() == expected

def test_empty_list(self):
arr = pyarrow.from_pylist([])
assert len(arr) == 0
assert arr.null_count == 0
assert arr.type == pyarrow.null()
assert arr.to_pylist() == []

def test_all_none(self):
arr = pyarrow.from_pylist([None, None])
assert len(arr) == 2
assert arr.null_count == 2
assert arr.type == pyarrow.null()
assert arr.to_pylist() == [None, None]

def test_integer(self):
arr = pyarrow.from_pylist([1, None, 3, None])
expected = [1, None, 3, None]
arr = pyarrow.from_pylist(expected)
assert len(arr) == 4
assert arr.null_count == 2
assert arr.type == pyarrow.int64()
assert arr.to_pylist() == expected

def test_garbage_collection(self):
import gc
Expand All @@ -62,13 +68,15 @@ def test_double(self):
assert len(arr) == 6
assert arr.null_count == 3
assert arr.type == pyarrow.double()
assert arr.to_pylist() == data

def test_string(self):
data = ['foo', b'bar', None, 'arrow']
arr = pyarrow.from_pylist(data)
assert len(arr) == 4
assert arr.null_count == 1
assert arr.type == pyarrow.string()
assert arr.to_pylist() == ['foo', 'bar', None, 'arrow']

def test_mixed_nesting_levels(self):
pyarrow.from_pylist([1, 2, None])
Expand All @@ -90,3 +98,4 @@ def test_list_of_int(self):
assert len(arr) == 4
assert arr.null_count == 1
assert arr.type == pyarrow.list_(pyarrow.int64())
assert arr.to_pylist() == data