Skip to content

Commit c369709

Browse files
xhochywesm
authored andcommitted
ARROW-426: Python: Conversion from pyarrow.Array to a Python list
Author: Uwe L. Korn <uwelk@xhochy.com> Closes #242 from xhochy/ARROW-426 and squashes the following commits: 10739ac [Uwe L. Korn] ARROW-426: Python: Conversion from pyarrow.Array to a Python list
1 parent a2ead2f commit c369709

File tree

5 files changed

+36
-3
lines changed

5 files changed

+36
-3
lines changed

python/pyarrow/array.pyx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,12 @@ cdef class Array:
167167

168168
return PyObject_to_object(np_arr)
169169

170+
def to_pylist(self):
171+
"""
172+
Convert to an list of native Python objects.
173+
"""
174+
return [x.as_py() for x in self]
175+
170176

171177
cdef class NullArray(Array):
172178
pass

python/pyarrow/scalar.pyx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,9 @@ cdef object box_arrow_scalar(DataType type,
194194
const shared_ptr[CArray]& sp_array,
195195
int index):
196196
cdef ArrayValue val
197-
if sp_array.get().IsNull(index):
197+
if type.type.type == Type_NA:
198+
return NA
199+
elif sp_array.get().IsNull(index):
198200
return NA
199201
else:
200202
val = _scalar_classes[type.type.type]()

python/pyarrow/table.pyx

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,15 @@ cdef class ChunkedArray:
108108
for i in range(self.num_chunks):
109109
yield self.chunk(i)
110110

111+
def to_pylist(self):
112+
"""
113+
Convert to a list of native Python objects.
114+
"""
115+
result = []
116+
for i in range(self.num_chunks):
117+
result += self.chunk(i).to_pylist()
118+
return result
119+
111120

112121
cdef class Column:
113122
"""
@@ -143,6 +152,12 @@ cdef class Column:
143152

144153
return pd.Series(PyObject_to_object(arr), name=self.name)
145154

155+
def to_pylist(self):
156+
"""
157+
Convert to a list of native Python objects.
158+
"""
159+
return self.data.to_pylist()
160+
146161
cdef _check_nullptr(self):
147162
if self.column == NULL:
148163
raise ReferenceError("Column object references a NULL pointer."

python/pyarrow/tests/test_column.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def test_basics(self):
3535
assert column.length() == 5
3636
assert len(column) == 5
3737
assert column.shape == (5,)
38+
assert column.to_pylist() == [-10, -5, 0, 5, 10]
3839

3940
def test_pandas(self):
4041
data = [

python/pyarrow/tests/test_convert_builtin.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,28 +22,34 @@
2222
class TestConvertList(unittest.TestCase):
2323

2424
def test_boolean(self):
25-
arr = pyarrow.from_pylist([True, None, False, None])
25+
expected = [True, None, False, None]
26+
arr = pyarrow.from_pylist(expected)
2627
assert len(arr) == 4
2728
assert arr.null_count == 2
2829
assert arr.type == pyarrow.bool_()
30+
assert arr.to_pylist() == expected
2931

3032
def test_empty_list(self):
3133
arr = pyarrow.from_pylist([])
3234
assert len(arr) == 0
3335
assert arr.null_count == 0
3436
assert arr.type == pyarrow.null()
37+
assert arr.to_pylist() == []
3538

3639
def test_all_none(self):
3740
arr = pyarrow.from_pylist([None, None])
3841
assert len(arr) == 2
3942
assert arr.null_count == 2
4043
assert arr.type == pyarrow.null()
44+
assert arr.to_pylist() == [None, None]
4145

4246
def test_integer(self):
43-
arr = pyarrow.from_pylist([1, None, 3, None])
47+
expected = [1, None, 3, None]
48+
arr = pyarrow.from_pylist(expected)
4449
assert len(arr) == 4
4550
assert arr.null_count == 2
4651
assert arr.type == pyarrow.int64()
52+
assert arr.to_pylist() == expected
4753

4854
def test_garbage_collection(self):
4955
import gc
@@ -62,13 +68,15 @@ def test_double(self):
6268
assert len(arr) == 6
6369
assert arr.null_count == 3
6470
assert arr.type == pyarrow.double()
71+
assert arr.to_pylist() == data
6572

6673
def test_string(self):
6774
data = ['foo', b'bar', None, 'arrow']
6875
arr = pyarrow.from_pylist(data)
6976
assert len(arr) == 4
7077
assert arr.null_count == 1
7178
assert arr.type == pyarrow.string()
79+
assert arr.to_pylist() == ['foo', 'bar', None, 'arrow']
7280

7381
def test_mixed_nesting_levels(self):
7482
pyarrow.from_pylist([1, 2, None])
@@ -90,3 +98,4 @@ def test_list_of_int(self):
9098
assert len(arr) == 4
9199
assert arr.null_count == 1
92100
assert arr.type == pyarrow.list_(pyarrow.int64())
101+
assert arr.to_pylist() == data

0 commit comments

Comments
 (0)