apache · kszucs · Jun 6, 2018
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
@@ -502,7 +502,6 @@ cdef class Array:
     def __iter__(self):
         for i in range(len(self)):
             yield self.getitem(i)
-        raise StopIteration
 
     def __repr__(self):
         from pyarrow.formatting import array_format
@@ -526,8 +525,6 @@ cdef class Array:
         raise NotImplemented
 
     def __getitem__(self, key):
-        cdef Py_ssize_t n = len(self)
-
         if PySlice_Check(key):
             return _normalize_slice(self, key)
 

diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
@@ -297,6 +297,7 @@ cdef class ChunkedArray:
         CChunkedArray* chunked_array
 
     cdef void init(self, const shared_ptr[CChunkedArray]& chunked_array)
+    cdef getitem(self, int64_t i)
 
 
 cdef class Column:

diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
@@ -72,23 +72,29 @@ cdef class ChunkedArray:
         """
         return self.chunked_array.null_count()
 
-    def __getitem__(self, key):
-        cdef:
-            int64_t item
-            int i
+    def __iter__(self):
+        for chunk in self.iterchunks():
+            for item in chunk:
+                yield item
 
+    def __getitem__(self, key):
         if isinstance(key, slice):
             return _normalize_slice(self, key)
         elif isinstance(key, six.integer_types):
-            index = _normalize_index(key, self.chunked_array.length())
-            for i in range(self.num_chunks):
-                if index < self.chunked_array.chunk(i).get().length():
-                    return self.chunk(i)[index]
-                else:
-                    index -= self.chunked_array.chunk(i).get().length()
+            return self.getitem(key)
         else:
             raise TypeError("key must either be a slice or integer")
 
+    cdef getitem(self, int64_t i):
+        cdef int j
+
+        index = _normalize_index(i, self.chunked_array.length())
+        for j in range(self.num_chunks):
+            if index < self.chunked_array.chunk(j).get().length():
+                return self.chunk(j)[index]
+            else:
+                index -= self.chunked_array.chunk(j).get().length()
+
     def slice(self, offset=0, length=None):
         """
         Compute zero-copy slice of this ChunkedArray

diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import collections
 import datetime
 import pytest
 import struct
@@ -162,6 +163,15 @@ def test_array_slice():
             assert arr[start:stop].to_pylist() == arr.to_pylist()[start:stop]
 
 
+def test_array_iter():
+    arr = pa.array(range(10))
+
+    for i, j in zip(range(10), arr):
+        assert i == j
+
+    assert isinstance(arr, collections.Iterable)
+
+
 def test_struct_array_slice():
     # ARROW-2311: slicing nested arrays needs special care
     ty = pa.struct([pa.field('a', pa.int8()),

diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from collections import OrderedDict
+from collections import OrderedDict, Iterable
 import numpy as np
 from pandas.util.testing import assert_frame_equal
 import pandas as pd
@@ -49,6 +49,21 @@ def test_chunked_array_getitem():
     assert data_slice.to_pylist() == []
 
 
+def test_chunked_array_iter():
+    data = [
+        pa.array([0]),
+        pa.array([1, 2, 3]),
+        pa.array([4, 5, 6]),
+        pa.array([7, 8, 9])
+    ]
+    arr = pa.chunked_array(data)
+
+    for i, j in zip(range(10), arr):
+        assert i == j
+
+    assert isinstance(arr, Iterable)
+
+
 def test_column_basics():
     data = [
         pa.array([-10, -5, 0, 5, 10])