Skip to content

Commit 25e0106

Browse files
committed
ARROW-323: [Python] Opt-in to pyarrow.parquet extension rather than attempting and failing silently
Added a couple ways to do this, either via the `--with-parquet` command line option (preferred) or by passing through an option to CMake Author: Wes McKinney <wes.mckinney@twosigma.com> Closes apache#194 from wesm/ARROW-323 and squashes the following commits: 07c05cc [Wes McKinney] Update readme to illustrate proper use of with build_ext 3bd9a8d [Wes McKinney] Add --with-parquet option to setup.py 374e254 [Wes McKinney] Add to README about building the parquet extension cab55cb [Wes McKinney] Opt in to building the pyarrow.parquet extension, do not silently fail
1 parent 17c9ae7 commit 25e0106

File tree

3 files changed

+50
-16
lines changed

3 files changed

+50
-16
lines changed

python/CMakeLists.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
5050
option(PYARROW_BUILD_TESTS
5151
"Build the PyArrow C++ googletest unit tests"
5252
OFF)
53+
option(PYARROW_BUILD_PARQUET
54+
"Build the PyArrow Parquet integration"
55+
OFF)
5356
endif()
5457

5558
find_program(CCACHE_FOUND ccache)
@@ -445,7 +448,10 @@ set(LINK_LIBS
445448
arrow_ipc
446449
)
447450

448-
if(PARQUET_FOUND AND PARQUET_ARROW_FOUND)
451+
if (PYARROW_BUILD_PARQUET)
452+
if(NOT (PARQUET_FOUND AND PARQUET_ARROW_FOUND))
453+
message(FATAL_ERROR "Unable to locate Parquet libraries")
454+
endif()
449455
ADD_THIRDPARTY_LIB(parquet_arrow
450456
SHARED_LIB ${PARQUET_ARROW_SHARED_LIB})
451457
set(LINK_LIBS

python/README.md

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ python setup.py build_ext --inplace
4848
py.test pyarrow
4949
```
5050

51-
To change the build type, use the `--build-type` option:
51+
To change the build type, use the `--build-type` option or set
52+
`$PYARROW_BUILD_TYPE`:
5253

5354
```bash
5455
python setup.py build_ext --build-type=release --inplace
@@ -57,9 +58,26 @@ python setup.py build_ext --build-type=release --inplace
5758
To pass through other build options to CMake, set the environment variable
5859
`$PYARROW_CMAKE_OPTIONS`.
5960

61+
#### Build the pyarrow Parquet file extension
62+
63+
To build the integration with [parquet-cpp][1], pass `--with-parquet` to
64+
the `build_ext` option in setup.py:
65+
66+
```
67+
python setup.py build_ext --with-parquet install
68+
```
69+
70+
Alternately, add `-DPYARROW_BUILD_PARQUET=on` to the general CMake options.
71+
72+
```
73+
export PYARROW_CMAKE_OPTIONS=-DPYARROW_BUILD_PARQUET=on
74+
```
75+
6076
#### Build the documentation
6177

6278
```bash
6379
pip install -r doc/requirements.txt
6480
python setup.py build_sphinx
6581
```
82+
83+
[1]: https://github.com/apache/parquet-cpp

python/setup.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,15 @@ def run(self):
9797

9898
description = "Build the C-extensions for arrow"
9999
user_options = ([('extra-cmake-args=', None, 'extra arguments for CMake'),
100-
('build-type=', None, 'build type (debug or release)')]
101-
+ _build_ext.user_options)
100+
('build-type=', None, 'build type (debug or release)'),
101+
('with-parquet', None, 'build the Parquet extension')] +
102+
_build_ext.user_options)
102103

103104
def initialize_options(self):
104105
_build_ext.initialize_options(self)
105106
self.extra_cmake_args = os.environ.get('PYARROW_CMAKE_OPTIONS', '')
106107
self.build_type = os.environ.get('PYARROW_BUILD_TYPE', 'debug').lower()
108+
self.with_parquet = False
107109

108110
CYTHON_MODULE_NAMES = [
109111
'array',
@@ -116,8 +118,6 @@ def initialize_options(self):
116118
'schema',
117119
'table']
118120

119-
CYTHON_ALLOWED_FAILURES = ['parquet']
120-
121121
def _run_cmake(self):
122122
# The directory containing this setup.py
123123
source = osp.dirname(osp.abspath(__file__))
@@ -141,17 +141,24 @@ def _run_cmake(self):
141141
if (cachedir != build_temp):
142142
return
143143

144-
pyexe_option = '-DPYTHON_EXECUTABLE=%s' % sys.executable
145144
static_lib_option = ''
146145
build_tests_option = ''
147146

148-
build_type_option = '-DCMAKE_BUILD_TYPE={0}'.format(self.build_type)
147+
cmake_options = [
148+
'-DPYTHON_EXECUTABLE=%s' % sys.executable,
149+
static_lib_option,
150+
build_tests_option,
151+
]
152+
153+
if self.with_parquet:
154+
cmake_options.append('-DPYARROW_BUILD_PARQUET=on')
149155

150156
if sys.platform != 'win32':
151-
cmake_command = ['cmake', self.extra_cmake_args, pyexe_option,
152-
build_tests_option,
153-
build_type_option,
154-
static_lib_option, source]
157+
cmake_options.append('-DCMAKE_BUILD_TYPE={0}'
158+
.format(self.build_type))
159+
160+
cmake_command = (['cmake', self.extra_cmake_args] +
161+
cmake_options + [source])
155162

156163
self.spawn(cmake_command)
157164
args = ['make', 'VERBOSE=1']
@@ -166,10 +173,8 @@ def _run_cmake(self):
166173
# Generate the build files
167174
extra_cmake_args = shlex.split(self.extra_cmake_args)
168175
cmake_command = (['cmake'] + extra_cmake_args +
176+
cmake_options +
169177
[source,
170-
pyexe_option,
171-
static_lib_option,
172-
build_tests_option,
173178
'-G', cmake_generator])
174179
if "-G" in self.extra_cmake_args:
175180
cmake_command = cmake_command[:-2]
@@ -202,7 +207,7 @@ def _run_cmake(self):
202207
built_path = self.get_ext_built(name)
203208
if not os.path.exists(built_path):
204209
print(built_path)
205-
if name in self.CYTHON_ALLOWED_FAILURES:
210+
if self._failure_permitted(name):
206211
print('Cython module {0} failure permitted'.format(name))
207212
continue
208213
raise RuntimeError('libpyarrow C-extension failed to build:',
@@ -219,6 +224,11 @@ def _run_cmake(self):
219224

220225
os.chdir(saved_cwd)
221226

227+
def _failure_permitted(self, name):
228+
if name == 'parquet' and not self.with_parquet:
229+
return True
230+
return False
231+
222232
def _get_inplace_dir(self):
223233
pass
224234

0 commit comments

Comments
 (0)