Skip to content

Commit 637584b

Browse files
committed
ARROW-284: Disable arrow_parquet module in Travis CI to triage builds
Author: Wes McKinney <wes.mckinney@twosigma.com> Closes #132 from wesm/ARROW-284 and squashes the following commits: e3410cf [Wes McKinney] Install miniconda in $HOME to avoid long prefix issues in conda-build 2.0 9fd94f5 [Wes McKinney] Do not run death test when valgrind is enabled. Gracefully skip pyarrow.parquet when ARROW_PARQUET=off ccf56f8 [Wes McKinney] Disable arrow_parquet module in Travis CI
1 parent 2d8ec78 commit 637584b

File tree

11 files changed

+101
-60
lines changed

11 files changed

+101
-60
lines changed

ci/travis_before_script_cpp.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ echo $GTEST_HOME
2525

2626
CMAKE_COMMON_FLAGS="\
2727
-DARROW_BUILD_BENCHMARKS=ON \
28-
-DARROW_PARQUET=ON \
29-
-DARROW_HDFS=on \
28+
-DARROW_PARQUET=OFF \
29+
-DARROW_HDFS=ON \
3030
-DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL"
3131

3232
if [ $TRAVIS_OS_NAME == "linux" ]; then

ci/travis_install_conda.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ else
99
fi
1010

1111
wget -O miniconda.sh $MINICONDA_URL
12-
export MINICONDA=$TRAVIS_BUILD_DIR/miniconda
12+
13+
export MINICONDA=$HOME/miniconda
14+
1315
bash miniconda.sh -b -p $MINICONDA
1416
export PATH="$MINICONDA/bin:$PATH"
1517
conda update -y -q conda

ci/travis_script_python.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ set -e
55
PYTHON_DIR=$TRAVIS_BUILD_DIR/python
66

77
# Re-use conda installation from C++
8-
export MINICONDA=$TRAVIS_BUILD_DIR/miniconda
8+
export MINICONDA=$HOME/miniconda
99
export PATH="$MINICONDA/bin:$PATH"
1010
export PARQUET_HOME=$MINICONDA
1111

@@ -31,7 +31,9 @@ python_version_tests() {
3131
# Expensive dependencies install from Continuum package repo
3232
conda install -y pip numpy pandas cython
3333

34-
conda install -y parquet-cpp arrow-cpp -c apache/channel/dev
34+
# conda install -y parquet-cpp
35+
36+
conda install -y arrow-cpp -c apache/channel/dev
3537

3638
# Other stuff pip install
3739
pip install -r requirements.txt

cpp/cmake_modules/FindParquet.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ else ()
7272
endif ()
7373

7474
mark_as_advanced(
75+
PARQUET_FOUND
7576
PARQUET_INCLUDE_DIR
7677
PARQUET_LIBS
7778
PARQUET_LIBRARIES

cpp/src/arrow/util/memory-pool-test.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ TEST(DefaultMemoryPool, OOM) {
4646
ASSERT_RAISES(OutOfMemory, pool->Allocate(to_alloc, &data));
4747
}
4848

49+
// Death tests and valgrind are known to not play well 100% of the time. See
50+
// googletest documentation
51+
#ifndef ARROW_VALGRIND
52+
4953
TEST(DefaultMemoryPoolDeathTest, FreeLargeMemory) {
5054
MemoryPool* pool = default_memory_pool();
5155

@@ -60,4 +64,6 @@ TEST(DefaultMemoryPoolDeathTest, FreeLargeMemory) {
6064
pool->Free(data, 100);
6165
}
6266

67+
#endif // ARROW_VALGRIND
68+
6369
} // namespace arrow

python/CMakeLists.txt

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -340,8 +340,10 @@ if (PYARROW_BUILD_TESTS)
340340
endif()
341341

342342
## Parquet
343-
find_package(Parquet REQUIRED)
344-
include_directories(SYSTEM ${PARQUET_INCLUDE_DIR})
343+
find_package(Parquet)
344+
if(PARQUET_FOUND)
345+
include_directories(SYSTEM ${PARQUET_INCLUDE_DIR})
346+
endif()
345347

346348
## Arrow
347349
find_package(Arrow REQUIRED)
@@ -350,8 +352,6 @@ ADD_THIRDPARTY_LIB(arrow
350352
SHARED_LIB ${ARROW_SHARED_LIB})
351353
ADD_THIRDPARTY_LIB(arrow_io
352354
SHARED_LIB ${ARROW_IO_SHARED_LIB})
353-
ADD_THIRDPARTY_LIB(arrow_parquet
354-
SHARED_LIB ${ARROW_PARQUET_SHARED_LIB})
355355

356356
############################################################
357357
# Linker setup
@@ -418,6 +418,16 @@ endif()
418418
add_subdirectory(src/pyarrow)
419419
add_subdirectory(src/pyarrow/util)
420420

421+
set(CYTHON_EXTENSIONS
422+
array
423+
config
424+
error
425+
io
426+
scalar
427+
schema
428+
table
429+
)
430+
421431
set(PYARROW_SRCS
422432
src/pyarrow/common.cc
423433
src/pyarrow/config.cc
@@ -431,9 +441,19 @@ set(PYARROW_SRCS
431441
set(LINK_LIBS
432442
arrow
433443
arrow_io
434-
arrow_parquet
435444
)
436445

446+
if(PARQUET_FOUND AND ARROW_PARQUET_FOUND)
447+
ADD_THIRDPARTY_LIB(arrow_parquet
448+
SHARED_LIB ${ARROW_PARQUET_SHARED_LIB})
449+
set(LINK_LIBS
450+
${LINK_LIBS}
451+
arrow_parquet)
452+
set(CYTHON_EXTENSIONS
453+
${CYTHON_EXTENSIONS}
454+
parquet)
455+
endif()
456+
437457
SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
438458

439459
add_library(pyarrow SHARED
@@ -448,17 +468,6 @@ endif()
448468
# Setup and build Cython modules
449469
############################################################
450470

451-
set(CYTHON_EXTENSIONS
452-
array
453-
config
454-
error
455-
io
456-
parquet
457-
scalar
458-
schema
459-
table
460-
)
461-
462471
foreach(module ${CYTHON_EXTENSIONS})
463472
string(REPLACE "." ";" directories ${module})
464473
list(GET directories -1 module_name)

python/cmake_modules/FindArrow.cmake

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ find_library(ARROW_IO_LIB_PATH NAMES arrow_io
5252
${ARROW_SEARCH_LIB_PATH}
5353
NO_DEFAULT_PATH)
5454

55-
if (ARROW_INCLUDE_DIR AND ARROW_LIB_PATH AND ARROW_PARQUET_LIB_PATH)
55+
if (ARROW_INCLUDE_DIR AND ARROW_LIB_PATH)
5656
set(ARROW_FOUND TRUE)
5757
set(ARROW_LIB_NAME libarrow)
5858
set(ARROW_IO_LIB_NAME libarrow_io)
@@ -64,18 +64,9 @@ if (ARROW_INCLUDE_DIR AND ARROW_LIB_PATH AND ARROW_PARQUET_LIB_PATH)
6464

6565
set(ARROW_IO_STATIC_LIB ${ARROW_SEARCH_LIB_PATH}/${ARROW_IO_LIB_NAME}.a)
6666
set(ARROW_IO_SHARED_LIB ${ARROW_LIBS}/${ARROW_IO_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
67-
68-
set(ARROW_PARQUET_STATIC_LIB ${ARROW_SEARCH_LIB_PATH}/${ARROW_PARQUET_LIB_NAME}.a)
69-
set(ARROW_PARQUET_SHARED_LIB ${ARROW_LIBS}/${ARROW_PARQUET_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
70-
else ()
71-
set(ARROW_FOUND FALSE)
72-
endif ()
73-
74-
if (ARROW_FOUND)
7567
if (NOT Arrow_FIND_QUIETLY)
7668
message(STATUS "Found the Arrow core library: ${ARROW_LIB_PATH}")
7769
message(STATUS "Found the Arrow IO library: ${ARROW_IO_LIB_PATH}")
78-
message(STATUS "Found the Arrow Parquet library: ${ARROW_PARQUET_LIB_PATH}")
7970
endif ()
8071
else ()
8172
if (NOT Arrow_FIND_QUIETLY)
@@ -88,8 +79,23 @@ else ()
8879
message(STATUS "${ARROW_ERR_MSG}")
8980
endif (Arrow_FIND_REQUIRED)
9081
endif ()
82+
set(ARROW_FOUND FALSE)
9183
endif ()
9284

85+
if(ARROW_PARQUET_LIB_PATH)
86+
set(ARROW_PARQUET_FOUND TRUE)
87+
set(ARROW_PARQUET_STATIC_LIB ${ARROW_SEARCH_LIB_PATH}/${ARROW_PARQUET_LIB_NAME}.a)
88+
set(ARROW_PARQUET_SHARED_LIB ${ARROW_LIBS}/${ARROW_PARQUET_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
89+
if (NOT Arrow_FIND_QUIETLY)
90+
message(STATUS "Found the Arrow Parquet library: ${ARROW_PARQUET_LIB_PATH}")
91+
endif ()
92+
else()
93+
if (NOT Arrow_FIND_QUIETLY)
94+
message(STATUS "Could not find Arrow Parquet library")
95+
endif()
96+
set(ARROW_PARQUET_FOUND FALSE)
97+
endif()
98+
9399
mark_as_advanced(
94100
ARROW_INCLUDE_DIR
95101
ARROW_LIBS

python/pyarrow/tests/test_io.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def hdfs_test_client():
4646

4747
HDFS_TMP_PATH = '/tmp/pyarrow-test-{0}'.format(random.randint(0, 1000))
4848

49+
4950
@pytest.fixture(scope='session')
5051
def hdfs(request):
5152
fixture = hdfs_test_client()

python/pyarrow/tests/test_parquet.py

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,33 +15,45 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717

18-
from pyarrow.compat import unittest
19-
import pyarrow as arrow
20-
import pyarrow.parquet
18+
import pytest
2119

22-
A = arrow
20+
import pyarrow as A
2321

2422
import numpy as np
25-
import os.path
2623
import pandas as pd
2724

2825
import pandas.util.testing as pdt
2926

27+
try:
28+
import pyarrow.parquet as pq
29+
HAVE_PARQUET = True
30+
except ImportError:
31+
HAVE_PARQUET = False
3032

33+
# XXX: Make Parquet tests opt-in rather than skip-if-not-build
34+
parquet = pytest.mark.skipif(not HAVE_PARQUET,
35+
reason='Parquet support not built')
36+
37+
38+
@parquet
3139
def test_single_pylist_column_roundtrip(tmpdir):
3240
for dtype in [int, float]:
33-
filename = tmpdir.join('single_{}_column.parquet'.format(dtype.__name__))
41+
filename = tmpdir.join('single_{}_column.parquet'
42+
.format(dtype.__name__))
3443
data = [A.from_pylist(list(map(dtype, range(5))))]
3544
table = A.Table.from_arrays(('a', 'b'), data, 'table_name')
3645
A.parquet.write_table(table, filename.strpath)
37-
table_read = pyarrow.parquet.read_table(filename.strpath)
38-
for col_written, col_read in zip(table.itercolumns(), table_read.itercolumns()):
46+
table_read = pq.read_table(filename.strpath)
47+
for col_written, col_read in zip(table.itercolumns(),
48+
table_read.itercolumns()):
3949
assert col_written.name == col_read.name
4050
assert col_read.data.num_chunks == 1
4151
data_written = col_written.data.chunk(0)
4252
data_read = col_read.data.chunk(0)
4353
assert data_written.equals(data_read)
4454

55+
56+
@parquet
4557
def test_pandas_parquet_2_0_rountrip(tmpdir):
4658
size = 10000
4759
np.random.seed(0)
@@ -58,17 +70,20 @@ def test_pandas_parquet_2_0_rountrip(tmpdir):
5870
'float64': np.arange(size, dtype=np.float64),
5971
'bool': np.random.randn(size) > 0,
6072
# Pandas only support ns resolution, Arrow at the moment only ms
61-
'datetime': np.arange("2016-01-01T00:00:00.001", size, dtype='datetime64[ms]'),
73+
'datetime': np.arange("2016-01-01T00:00:00.001", size,
74+
dtype='datetime64[ms]'),
6275
'str': [str(x) for x in range(size)],
6376
'str_with_nulls': [None] + [str(x) for x in range(size - 2)] + [None]
6477
})
6578
filename = tmpdir.join('pandas_rountrip.parquet')
6679
arrow_table = A.from_pandas_dataframe(df, timestamps_to_ms=True)
6780
A.parquet.write_table(arrow_table, filename.strpath, version="2.0")
68-
table_read = pyarrow.parquet.read_table(filename.strpath)
81+
table_read = pq.read_table(filename.strpath)
6982
df_read = table_read.to_pandas()
7083
pdt.assert_frame_equal(df, df_read)
7184

85+
86+
@parquet
7287
def test_pandas_parquet_1_0_rountrip(tmpdir):
7388
size = 10000
7489
np.random.seed(0)
@@ -88,11 +103,10 @@ def test_pandas_parquet_1_0_rountrip(tmpdir):
88103
filename = tmpdir.join('pandas_rountrip.parquet')
89104
arrow_table = A.from_pandas_dataframe(df)
90105
A.parquet.write_table(arrow_table, filename.strpath, version="1.0")
91-
table_read = pyarrow.parquet.read_table(filename.strpath)
106+
table_read = pq.read_table(filename.strpath)
92107
df_read = table_read.to_pandas()
93108

94109
# We pass uint32_t as int64_t if we write Parquet version 1.0
95110
df['uint32'] = df['uint32'].values.astype(np.int64)
96111

97112
pdt.assert_frame_equal(df, df_read)
98-

python/pyarrow/tests/test_table.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,7 @@
1616
# under the License.
1717

1818
from pyarrow.compat import unittest
19-
import pyarrow as arrow
20-
21-
A = arrow
22-
23-
import pandas as pd
19+
import pyarrow as A
2420

2521

2622
class TestRowBatch(unittest.TestCase):
@@ -76,4 +72,3 @@ def test_pandas(self):
7672
assert set(df.columns) == set(('a', 'b'))
7773
assert df.shape == (5, 2)
7874
assert df.ix[0, 'b'] == -10
79-

0 commit comments

Comments
 (0)