Skip to content

Commit d4755e4

Browse files
committed
ARROW-2677: [Python] Expose Parquet ZSTD compression
Author: Korn, Uwe <Uwe.Korn@blue-yonder.com> Closes #2120 from xhochy/ARROW-2677 and squashes the following commits: 585a4bf <Korn, Uwe> flake8 c540796 <Korn, Uwe> Mention possible compression options in docstring 68f83bb <Korn, Uwe> ARROW-2677:  Expose Parquet ZSTD compression
1 parent 2b00b8a commit d4755e4

File tree

4 files changed

+7
-2
lines changed

4 files changed

+7
-2
lines changed

python/pyarrow/_parquet.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ cdef extern from "parquet/api/schema.h" namespace "parquet" nogil:
102102
ParquetCompression_LZO" parquet::Compression::LZO"
103103
ParquetCompression_BROTLI" parquet::Compression::BROTLI"
104104
ParquetCompression_LZ4" parquet::Compression::LZ4"
105+
ParquetCompression_ZSTD" parquet::Compression::ZSTD"
105106

106107
enum ParquetVersion" parquet::ParquetVersion::type":
107108
ParquetVersion_V1" parquet::ParquetVersion::PARQUET_1_0"

python/pyarrow/_parquet.pyx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -809,7 +809,8 @@ cdef class ParquetReader:
809809
return array
810810

811811
cdef int check_compression_name(name) except -1:
812-
if name.upper() not in ['NONE', 'SNAPPY', 'GZIP', 'LZO', 'BROTLI', 'LZ4']:
812+
if name.upper() not in ['NONE', 'SNAPPY', 'GZIP', 'LZO', 'BROTLI', 'LZ4',
813+
'ZSTD']:
813814
raise ArrowException("Unsupported compression: " + name)
814815
return 0
815816

@@ -826,6 +827,8 @@ cdef ParquetCompression compression_from_name(str name):
826827
return ParquetCompression_BROTLI
827828
elif name == "LZ4":
828829
return ParquetCompression_LZ4
830+
elif name == "ZSTD":
831+
return ParquetCompression_ZSTD
829832
else:
830833
return ParquetCompression_UNCOMPRESSED
831834

python/pyarrow/parquet.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ def _sanitize_table(table, new_schema, flavor):
251251
Valid values: {None, 'ms', 'us'}
252252
compression : str or dict
253253
Specify the compression codec, either on a general basis or per-column.
254+
Valid values: {'NONE', 'SNAPPY', 'GZIP', 'LZO', 'BROTLI', 'LZ4', 'ZSTD'}
254255
flavor : {'spark'}, default None
255256
Sanitize schema or set other compatibility options for compatibility"""
256257

python/pyarrow/tests/test_parquet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,7 @@ def test_pandas_parquet_configuration_options(tmpdir):
501501
df_read = table_read.to_pandas()
502502
tm.assert_frame_equal(df, df_read)
503503

504-
for compression in ['NONE', 'SNAPPY', 'GZIP', 'LZ4']:
504+
for compression in ['NONE', 'SNAPPY', 'GZIP', 'LZ4', 'ZSTD']:
505505
_write_table(arrow_table, filename.strpath,
506506
version="2.0",
507507
compression=compression)

0 commit comments

Comments
 (0)