1515# specific language governing permissions and limitations
1616# under the License.
1717
18- from pyarrow .compat import unittest
19- import pyarrow as arrow
20- import pyarrow .parquet
18+ import pytest
2119
22- A = arrow
20+ import pyarrow as A
2321
2422import numpy as np
25- import os .path
2623import pandas as pd
2724
2825import pandas .util .testing as pdt
2926
27+ try :
28+ import pyarrow .parquet as pq
29+ HAVE_PARQUET = True
30+ except ImportError :
31+ HAVE_PARQUET = False
3032
33+ # XXX: Make Parquet tests opt-in rather than skip-if-not-build
34+ parquet = pytest .mark .skipif (not HAVE_PARQUET ,
35+ reason = 'Parquet support not built' )
36+
37+
38+ @parquet
3139def test_single_pylist_column_roundtrip (tmpdir ):
3240 for dtype in [int , float ]:
33- filename = tmpdir .join ('single_{}_column.parquet' .format (dtype .__name__ ))
41+ filename = tmpdir .join ('single_{}_column.parquet'
42+ .format (dtype .__name__ ))
3443 data = [A .from_pylist (list (map (dtype , range (5 ))))]
3544 table = A .Table .from_arrays (('a' , 'b' ), data , 'table_name' )
3645 A .parquet .write_table (table , filename .strpath )
37- table_read = pyarrow .parquet .read_table (filename .strpath )
38- for col_written , col_read in zip (table .itercolumns (), table_read .itercolumns ()):
46+ table_read = pq .read_table (filename .strpath )
47+ for col_written , col_read in zip (table .itercolumns (),
48+ table_read .itercolumns ()):
3949 assert col_written .name == col_read .name
4050 assert col_read .data .num_chunks == 1
4151 data_written = col_written .data .chunk (0 )
4252 data_read = col_read .data .chunk (0 )
4353 assert data_written .equals (data_read )
4454
55+
56+ @parquet
4557def test_pandas_parquet_2_0_rountrip (tmpdir ):
4658 size = 10000
4759 np .random .seed (0 )
@@ -58,17 +70,20 @@ def test_pandas_parquet_2_0_rountrip(tmpdir):
5870 'float64' : np .arange (size , dtype = np .float64 ),
5971 'bool' : np .random .randn (size ) > 0 ,
6072 # Pandas only support ns resolution, Arrow at the moment only ms
61- 'datetime' : np .arange ("2016-01-01T00:00:00.001" , size , dtype = 'datetime64[ms]' ),
73+ 'datetime' : np .arange ("2016-01-01T00:00:00.001" , size ,
74+ dtype = 'datetime64[ms]' ),
6275 'str' : [str (x ) for x in range (size )],
6376 'str_with_nulls' : [None ] + [str (x ) for x in range (size - 2 )] + [None ]
6477 })
6578 filename = tmpdir .join ('pandas_rountrip.parquet' )
6679 arrow_table = A .from_pandas_dataframe (df , timestamps_to_ms = True )
6780 A .parquet .write_table (arrow_table , filename .strpath , version = "2.0" )
68- table_read = pyarrow . parquet .read_table (filename .strpath )
81+ table_read = pq .read_table (filename .strpath )
6982 df_read = table_read .to_pandas ()
7083 pdt .assert_frame_equal (df , df_read )
7184
85+
86+ @parquet
7287def test_pandas_parquet_1_0_rountrip (tmpdir ):
7388 size = 10000
7489 np .random .seed (0 )
@@ -88,11 +103,10 @@ def test_pandas_parquet_1_0_rountrip(tmpdir):
88103 filename = tmpdir .join ('pandas_rountrip.parquet' )
89104 arrow_table = A .from_pandas_dataframe (df )
90105 A .parquet .write_table (arrow_table , filename .strpath , version = "1.0" )
91- table_read = pyarrow . parquet .read_table (filename .strpath )
106+ table_read = pq .read_table (filename .strpath )
92107 df_read = table_read .to_pandas ()
93108
94109 # We pass uint32_t as int64_t if we write Parquet version 1.0
95110 df ['uint32' ] = df ['uint32' ].values .astype (np .int64 )
96111
97112 pdt .assert_frame_equal (df , df_read )
98-
0 commit comments