Skip to content

Commit 2422d9c

Browse files
committed
ARROW-2660: [Python] Experimental zero-copy pickling
Zero-copy pickling of buffers and buffer-based objects will be possible using PEP 574 (if/when accepted). The PyPI backport "pickle5" helps us test that possibility. Author: Antoine Pitrou <antoine@python.org> Closes #2161 from pitrou/ARROW-2660-zero-copy-pickling and squashes the following commits: 50f0491 <Antoine Pitrou> Fix test on Python 2.7 (hopefully) 132939c <Antoine Pitrou> Add pickle5 to CI environments 892302a <Antoine Pitrou> ARROW-2660: Zero-copy pickling
1 parent 161d1f0 commit 2422d9c

File tree

5 files changed

+51
-13
lines changed

5 files changed

+51
-13
lines changed

ci/cpp-python-msvc-build.bat

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ popd
133133

134134
pushd python
135135

136+
pip install pickle5
137+
136138
set PYARROW_CXXFLAGS=/WX
137139
set PYARROW_CMAKE_GENERATOR=%GENERATOR%
138140
set PYARROW_BUNDLE_ARROW_CPP=ON
@@ -167,6 +169,6 @@ pip install %WHEEL_PATH% || exit /B
167169
python -c "import pyarrow" || exit /B
168170
python -c "import pyarrow.parquet" || exit /B
169171

170-
pip install pandas pytest pytest-faulthandler
172+
pip install pandas pickle5 pytest pytest-faulthandler
171173

172174
py.test -r sxX --durations=15 --pyargs pyarrow.tests || exit /B

ci/travis_script_python.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ pushd $ARROW_PYTHON_DIR
102102

103103
# Other stuff pip install
104104
pip install -q -r requirements.txt
105+
if [ "$PYTHON_VERSION" == "3.6" ]; then
106+
pip install -q pickle5
107+
fi
105108
if [ "$ARROW_TRAVIS_COVERAGE" == "1" ]; then
106109
export PYARROW_GENERATE_COVERAGE=1
107110
pip install -q coverage

python/pyarrow/compat.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,10 @@ def frombytes(o):
107107
def unichar(s):
108108
return unichr(s)
109109
else:
110-
import pickle as builtin_pickle
110+
try:
111+
import pickle5 as builtin_pickle
112+
except ImportError:
113+
import pickle as builtin_pickle
111114

112115
unicode_type = str
113116
def lzip(*x):
@@ -142,10 +145,7 @@ def unichar(s):
142145
try:
143146
import cloudpickle as pickle
144147
except ImportError:
145-
try:
146-
import cPickle as pickle
147-
except ImportError:
148-
import pickle
148+
pickle = builtin_pickle
149149

150150
def encode_file_path(path):
151151
import os

python/pyarrow/io.pxi

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
# arrow::ipc
2020

2121
from libc.stdlib cimport malloc, free
22-
from pyarrow.compat import frombytes, tobytes, encode_file_path
22+
from pyarrow.compat import builtin_pickle, frombytes, tobytes, encode_file_path
2323
from io import BufferedIOBase, UnsupportedOperation
2424

2525
import re
@@ -823,8 +823,11 @@ cdef class Buffer:
823823
else:
824824
return NotImplemented
825825

826-
def __reduce__(self):
827-
return py_buffer, (self.to_pybytes(),)
826+
def __reduce_ex__(self, protocol):
827+
if protocol >= 5:
828+
return py_buffer, (builtin_pickle.PickleBuffer(self),)
829+
else:
830+
return py_buffer, (self.to_pybytes(),)
828831

829832
def to_pybytes(self):
830833
return cp.PyBytes_FromStringAndSize(

python/pyarrow/tests/test_array.py

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,18 @@
1717

1818
import collections
1919
import datetime
20+
import pickle
2021
import pytest
2122
import struct
2223
import sys
2324

2425
import numpy as np
2526
import pandas as pd
2627
import pandas.util.testing as tm
27-
import pickle
28+
try:
29+
import pickle5
30+
except ImportError:
31+
pickle5 = None
2832

2933
import pyarrow as pa
3034
from pyarrow.pandas_compat import get_logical_type
@@ -633,7 +637,7 @@ def test_cast_date64_to_int():
633637
assert result.equals(expected)
634638

635639

636-
@pytest.mark.parametrize(
640+
pickle_test_parametrize = pytest.mark.parametrize(
637641
('data', 'typ'),
638642
[
639643
([True, False, True, True], pa.bool_()),
@@ -647,12 +651,38 @@ def test_cast_date64_to_int():
647651
pa.struct([pa.field('a', pa.int64()), pa.field('b', pa.string())]))
648652
]
649653
)
654+
655+
656+
@pickle_test_parametrize
650657
def test_array_pickle(data, typ):
651658
# Allocate here so that we don't have any Arrow data allocated.
652659
# This is needed to ensure that allocator tests can be reliable.
653660
array = pa.array(data, type=typ)
654-
result = pickle.loads(pickle.dumps(array))
655-
assert array.equals(result)
661+
for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
662+
result = pickle.loads(pickle.dumps(array, proto))
663+
assert array.equals(result)
664+
665+
666+
@pickle_test_parametrize
667+
def test_array_pickle5(data, typ):
668+
# Test zero-copy pickling with protocol 5 (PEP 574)
669+
picklemod = pickle5 or pickle
670+
if pickle5 is None and picklemod.HIGHEST_PROTOCOL < 5:
671+
pytest.skip("need pickle5 package or Python 3.8+")
672+
673+
array = pa.array(data, type=typ)
674+
addresses = [buf.address if buf is not None else 0
675+
for buf in array.buffers()]
676+
677+
for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
678+
buffers = []
679+
pickled = picklemod.dumps(array, proto, buffer_callback=buffers.append)
680+
result = picklemod.loads(pickled, buffers=buffers)
681+
assert array.equals(result)
682+
683+
result_addresses = [buf.address if buf is not None else 0
684+
for buf in result.buffers()]
685+
assert result_addresses == addresses
656686

657687

658688
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)