Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,6 @@ cdef class Array:
def __iter__(self):
for i in range(len(self)):
yield self.getitem(i)
raise StopIteration

def __repr__(self):
from pyarrow.formatting import array_format
Expand All @@ -526,8 +525,6 @@ cdef class Array:
raise NotImplemented

def __getitem__(self, key):
cdef Py_ssize_t n = len(self)

if PySlice_Check(key):
return _normalize_slice(self, key)

Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/lib.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ cdef class ChunkedArray:
CChunkedArray* chunked_array

cdef void init(self, const shared_ptr[CChunkedArray]& chunked_array)
cdef getitem(self, int64_t i)


cdef class Column:
Expand Down
26 changes: 16 additions & 10 deletions python/pyarrow/table.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -72,23 +72,29 @@ cdef class ChunkedArray:
"""
return self.chunked_array.null_count()

def __getitem__(self, key):
cdef:
int64_t item
int i
def __iter__(self):
for chunk in self.iterchunks():
for item in chunk:
yield item

def __getitem__(self, key):
if isinstance(key, slice):
return _normalize_slice(self, key)
elif isinstance(key, six.integer_types):
index = _normalize_index(key, self.chunked_array.length())
for i in range(self.num_chunks):
if index < self.chunked_array.chunk(i).get().length():
return self.chunk(i)[index]
else:
index -= self.chunked_array.chunk(i).get().length()
return self.getitem(key)
else:
raise TypeError("key must either be a slice or integer")

cdef getitem(self, int64_t i):
cdef int j

index = _normalize_index(i, self.chunked_array.length())
for j in range(self.num_chunks):
if index < self.chunked_array.chunk(j).get().length():
return self.chunk(j)[index]
else:
index -= self.chunked_array.chunk(j).get().length()

def slice(self, offset=0, length=None):
"""
Compute zero-copy slice of this ChunkedArray
Expand Down
10 changes: 10 additions & 0 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.

import collections
import datetime
import pytest
import struct
Expand Down Expand Up @@ -162,6 +163,15 @@ def test_array_slice():
assert arr[start:stop].to_pylist() == arr.to_pylist()[start:stop]


def test_array_iter():
arr = pa.array(range(10))

for i, j in zip(range(10), arr):
assert i == j

assert isinstance(arr, collections.Iterable)


def test_struct_array_slice():
# ARROW-2311: slicing nested arrays needs special care
ty = pa.struct([pa.field('a', pa.int8()),
Expand Down
17 changes: 16 additions & 1 deletion python/pyarrow/tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.

from collections import OrderedDict
from collections import OrderedDict, Iterable
import numpy as np
from pandas.util.testing import assert_frame_equal
import pandas as pd
Expand Down Expand Up @@ -49,6 +49,21 @@ def test_chunked_array_getitem():
assert data_slice.to_pylist() == []


def test_chunked_array_iter():
data = [
pa.array([0]),
pa.array([1, 2, 3]),
pa.array([4, 5, 6]),
pa.array([7, 8, 9])
]
arr = pa.chunked_array(data)

for i, j in zip(range(10), arr):
assert i == j

assert isinstance(arr, Iterable)


def test_column_basics():
data = [
pa.array([-10, -5, 0, 5, 10])
Expand Down