apache · wesm · Jun 12, 2018 · Jun 15, 2018 · Jun 15, 2018 · xhochy
diff --git a/ci/travis_lint.sh b/ci/travis_lint.sh
@@ -43,9 +43,9 @@ if [ "$ARROW_CI_PYTHON_AFFECTED" != "0" ]; then
 
   PYTHON_DIR=$TRAVIS_BUILD_DIR/python
 
-  flake8 --count $PYTHON_DIR/pyarrow
+  flake8 --count $PYTHON_DIR
 
   # Check Cython files with some checks turned off
   flake8 --count --config=$PYTHON_DIR/.flake8.cython \
-         $PYTHON_DIR/pyarrow
+         $PYTHON_DIR
 fi
diff --git a/python/benchmarks/__init__.py b/python/benchmarks/__init__.py
@@ -14,4 +14,3 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
diff --git a/python/benchmarks/array_ops.py b/python/benchmarks/array_ops.py
@@ -15,7 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import numpy as np
 import pyarrow as pa
 
 

diff --git a/python/benchmarks/common.py b/python/benchmarks/common.py
@@ -19,7 +19,6 @@
 import decimal
 from functools import partial
 import itertools
-import os
 import sys
 import unicodedata
 
@@ -39,7 +38,7 @@ def _multiplicate_sequence(base, target_size):
     return [base] * q + [base[:r]]
 
 
-def get_random_bytes(n, *, seed=42):
+def get_random_bytes(n, seed=42):
     """
     Generate a random bytes object of size *n*.
     Note the result might be compressible.
@@ -58,7 +57,7 @@ def get_random_bytes(n, *, seed=42):
     return result
 
 
-def get_random_ascii(n, *, seed=42):
+def get_random_ascii(n, seed=42):
     """
     Get a random ASCII-only unicode string of size *n*.
     """
@@ -69,7 +68,7 @@ def get_random_ascii(n, *, seed=42):
     return result
 
 
-def _random_unicode_letters(n, *, seed=42):
+def _random_unicode_letters(n, seed=42):
     """
     Generate a string of random unicode letters (slow).
     """
@@ -93,7 +92,7 @@ def _get_more_candidates():
 _1024_random_unicode_letters = _random_unicode_letters(1024)
 
 
-def get_random_unicode(n, *, seed=42):
+def get_random_unicode(n, seed=42):
     """
     Get a random non-ASCII unicode string of size *n*.
     """
@@ -179,7 +178,8 @@ def generate_object_list(self, n, none_prob=DEFAULT_NONE_PROB):
         self.sprinkle_nones(data, none_prob)
         return data
 
-    def _generate_varying_sequences(self, random_factory, n, min_size, max_size, none_prob):
+    def _generate_varying_sequences(self, random_factory, n, min_size,
+                                    max_size, none_prob):
         """
         Generate a list of *n* sequences of varying size between *min_size*
         and *max_size*, with *none_prob* probability of an entry being None.
@@ -207,7 +207,6 @@ def generate_fixed_binary_list(self, n, size, none_prob=DEFAULT_NONE_PROB):
         return self._generate_varying_sequences(get_random_bytes, n,
                                                 size, size, none_prob)
 
-
     def generate_varying_binary_list(self, n, min_size, max_size,
                                      none_prob=DEFAULT_NONE_PROB):
         """
@@ -217,7 +216,6 @@ def generate_varying_binary_list(self, n, min_size, max_size,
         return self._generate_varying_sequences(get_random_bytes, n,
                                                 min_size, max_size, none_prob)
 
-
     def generate_ascii_string_list(self, n, min_size, max_size,
                                    none_prob=DEFAULT_NONE_PROB):
         """
@@ -227,7 +225,6 @@ def generate_ascii_string_list(self, n, min_size, max_size,
         return self._generate_varying_sequences(get_random_ascii, n,
                                                 min_size, max_size, none_prob)
 
-
     def generate_unicode_string_list(self, n, min_size, max_size,
                                      none_prob=DEFAULT_NONE_PROB):
         """
@@ -237,7 +234,6 @@ def generate_unicode_string_list(self, n, min_size, max_size,
         return self._generate_varying_sequences(get_random_unicode, n,
                                                 min_size, max_size, none_prob)
 
-
     def generate_int_list_list(self, n, min_size, max_size,
                                none_prob=DEFAULT_NONE_PROB):
         """
@@ -263,7 +259,9 @@ def generate_tuple_list(self, n, none_prob=DEFAULT_NONE_PROB):
     def generate_dict_list(self, n, none_prob=DEFAULT_NONE_PROB):
         """
         Generate a list of dicts with random values.
-        Each dict has the form `{'u': int value, 'v': float value, 'w': bool value}`
+        Each dict has the form
+
+            `{'u': int value, 'v': float value, 'w': bool value}`
         """
         ints = self.generate_int_list(n, none_prob=none_prob)
         floats = self.generate_float_list(n, none_prob=none_prob)

diff --git a/python/benchmarks/microbenchmarks.py b/python/benchmarks/microbenchmarks.py
@@ -15,7 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import pyarrow as pa
 import pyarrow.benchmark as pb
 
 from . import common
@@ -44,4 +43,3 @@ def setup(self, type_name):
 
     def time_PandasObjectIsNull(self, *args):
         pb.benchmark_PandasObjectIsNull(self.lst)
-
diff --git a/python/benchmarks/plasma.py b/python/benchmarks/plasma.py
@@ -18,11 +18,8 @@
 import numpy as np
 import timeit
 
-import pyarrow as pa
 import pyarrow.plasma as plasma
 
-from . import common
-
 
 class SimplePlasmaThroughput(object):
     """Benchmark plasma store throughput with a single client."""
@@ -32,7 +29,8 @@ class SimplePlasmaThroughput(object):
     timer = timeit.default_timer
 
     def setup(self, size):
-        self.plasma_store_ctx = plasma.start_plasma_store(plasma_store_memory=10**9)
+        self.plasma_store_ctx = plasma.start_plasma_store(
+            plasma_store_memory=10**9)
         plasma_store_name, p = self.plasma_store_ctx.__enter__()
         self.plasma_client = plasma.connect(plasma_store_name, "", 64)
 
@@ -51,7 +49,8 @@ class SimplePlasmaLatency(object):
     timer = timeit.default_timer
 
     def setup(self):
-        self.plasma_store_ctx = plasma.start_plasma_store(plasma_store_memory=10**9)
+        self.plasma_store_ctx = plasma.start_plasma_store(
+            plasma_store_memory=10**9)
         plasma_store_name, p = self.plasma_store_ctx.__enter__()
         self.plasma_client = plasma.connect(plasma_store_name, "", 64)
 

diff --git a/python/benchmarks/streaming.py b/python/benchmarks/streaming.py
@@ -26,12 +26,15 @@
 def generate_chunks(total_size, nchunks, ncols, dtype=np.dtype('int64')):
     rowsize = total_size // nchunks // ncols
     assert rowsize % dtype.itemsize == 0
+
+    def make_column(col, chunk):
+        return np.frombuffer(common.get_random_bytes(
+            rowsize, seed=col + 997 * chunk)).view(dtype)
+
     return [pd.DataFrame({
-            'c' + str(col): np.frombuffer(
-                common.get_random_bytes(rowsize, seed=col + 997 * chunk)).view(dtype)
-            for col in range(ncols)
-        })
-        for chunk in range(nchunks)]
+            'c' + str(col): make_column(col, chunk)
+            for col in range(ncols)})
+            for chunk in range(nchunks)]
 
 
 class StreamReader(object):
@@ -64,4 +67,4 @@ def setup(self, chunk_size):
     def time_read_to_dataframe(self, *args):
         reader = pa.RecordBatchStreamReader(self.source)
         table = reader.read_all()
-        df = table.to_pandas()
+        df = table.to_pandas()  # noqa
diff --git a/python/doc/source/conf.py b/python/doc/source/conf.py
@@ -30,6 +30,7 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
+import glob
 import os
 import sys
 
@@ -77,7 +78,6 @@
 # source_suffix = ['.rst', '.md']
 source_suffix = '.rst'
 
-import glob
 autosummary_generate = glob.glob("*.rst")
 
 # The encoding of source files.
@@ -187,8 +187,8 @@
 # html_logo = None
 
 # The name of an image file (relative to this directory) to use as a favicon of
-# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
+# the docs.  This file should be a Windows icon file (.ico) being 16x16 or
+# 32x32 pixels large.
 #
 # html_favicon = None
 

diff --git a/python/examples/plasma/sorting/multimerge.pyx b/python/examples/plasma/sorting/multimerge.pyx
@@ -23,9 +23,9 @@ from libc.stdint cimport uintptr_t
 from libcpp.vector cimport vector
 from libcpp.pair cimport pair
 
-cimport numpy as np
 import numpy as np
 
+cimport numpy as np
 
 cdef extern from "<queue>" namespace "std" nogil:
     cdef cppclass priority_queue[T]:
@@ -44,7 +44,7 @@ def multimerge2d(*arrays):
     This assumes C style ordering for both input and output arrays. For
     each input array we have array[i,0] <= array[i+1,0] and for the output
     array the same will hold.
-    
+
     Ideally this code would be simpler and also support both C style
     and Fortran style ordering.
     """

diff --git a/python/examples/plasma/sorting/sort_df.py b/python/examples/plasma/sorting/sort_df.py
@@ -17,7 +17,6 @@
 
 from multiprocessing import Pool
 import numpy as np
-import os
 import pandas as pd
 import pyarrow as pa
 import pyarrow.plasma as plasma

diff --git a/python/scripts/test_leak.py b/python/scripts/test_leak.py
@@ -57,4 +57,5 @@ def leak2():
 
         gc.collect()
 
+
 leak2()
diff --git a/python/setup.py b/python/setup.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python
-
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -102,7 +101,8 @@ def run(self):
                      ('with-static-boost', None, 'link boost statically'),
                      ('with-plasma', None, 'build the Plasma extension'),
                      ('with-orc', None, 'build the ORC extension'),
-                     ('generate-coverage', None, 'enable Cython code coverage'),
+                     ('generate-coverage', None,
+                      'enable Cython code coverage'),
                      ('bundle-boost', None,
                       'bundle the (shared) Boost libraries'),
                      ('bundle-arrow-cpp', None,
@@ -116,7 +116,8 @@ def initialize_options(self):
             self.cmake_generator = 'Visual Studio 14 2015 Win64'
         self.extra_cmake_args = os.environ.get('PYARROW_CMAKE_OPTIONS', '')
         self.build_type = os.environ.get('PYARROW_BUILD_TYPE', 'debug').lower()
-        self.boost_namespace = os.environ.get('PYARROW_BOOST_NAMESPACE', 'boost')
+        self.boost_namespace = os.environ.get('PYARROW_BOOST_NAMESPACE',
+                                              'boost')
 
         self.cmake_cxxflags = os.environ.get('PYARROW_CXXFLAGS', '')
 
@@ -252,7 +253,8 @@ def _run_cmake(self):
                 print("-- Finished cmake for pyarrow")
                 # Do the build
                 print("-- Running cmake --build for pyarrow")
-                self.spawn(['cmake', '--build', '.', '--config', self.build_type])
+                self.spawn(['cmake', '--build', '.', '--config',
+                            self.build_type])
                 print("-- Finished cmake --build for pyarrow")
 
             if self.inplace:
@@ -297,14 +299,16 @@ def _run_cmake(self):
             shutil.move(pjoin(build_prefix, 'include'),
                         pjoin(build_lib, 'pyarrow'))
 
-            # Move the built C-extension to the place expected by the Python build
+            # Move the built C-extension to the place expected by the Python
+            # build
             self._found_names = []
             for name in self.CYTHON_MODULE_NAMES:
                 built_path = self.get_ext_built(name)
                 if not os.path.exists(built_path):
                     print(built_path)
                     if self._failure_permitted(name):
-                        print('Cython module {0} failure permitted'.format(name))
+                        print('Cython module {0} failure permitted'
+                              .format(name))
                         continue
                     raise RuntimeError('pyarrow C-extension failed to build:',
                                        os.path.abspath(built_path))
@@ -337,11 +341,11 @@ def _run_cmake(self):
 
                 if os.path.exists(self.get_ext_built_api_header(name)):
                     shutil.move(self.get_ext_built_api_header(name),
-                                pjoin(os.path.dirname(ext_path), name + '_api.h'))
+                                pjoin(os.path.dirname(ext_path),
+                                      name + '_api.h'))
 
             # Move the plasma store
             if self.with_plasma:
-                build_py = self.get_finalized_command('build_py')
                 source = os.path.join(self.build_type, "plasma_store")
                 target = os.path.join(build_lib,
                                       self._get_build_dir(),
@@ -486,7 +490,8 @@ def has_ext_modules(foo):
 def parse_version(root):
     from setuptools_scm import version_from_scm
     import setuptools_scm.git
-    describe = setuptools_scm.git.DEFAULT_DESCRIBE + " --match 'apache-arrow-[0-9]*'"
+    describe = (setuptools_scm.git.DEFAULT_DESCRIBE +
+                " --match 'apache-arrow-[0-9]*'")
     # Strip catchall from the commandline
     describe = describe.replace("--match *.*", "")
     version = setuptools_scm.git.parse(root, describe)
@@ -520,7 +525,8 @@ def parse_version(root):
             'plasma_store = pyarrow:_plasma_store_entry_point'
         ]
     },
-    use_scm_version={"root": "..", "relative_to": __file__, "parse": parse_version},
+    use_scm_version={"root": "..", "relative_to": __file__,
+                     "parse": parse_version},
     setup_requires=['setuptools_scm', 'cython >= 0.27'] + setup_requires,
     install_requires=install_requires,
     tests_require=['pytest', 'pandas'],

diff --git a/python/testing/parquet_interop.py b/python/testing/parquet_interop.py
@@ -16,10 +16,8 @@
 # under the License.
 
 import os
-import pytest
 
 import fastparquet
-import pandas as pd
 import pyarrow as pa
 import pyarrow.parquet as pq
 import pandas.util.testing as tm
Original file line number	Diff line number	Diff line change
Expand Up		@@ -14,4 +14,3 @@
		# KIND, either express or implied. See the License for the
		# specific language governing permissions and limitations
		# under the License.