Skip to content

Commit

Permalink
bpo-36785: PEP 574 implementation (GH-7076)
Browse files Browse the repository at this point in the history
  • Loading branch information
pitrou authored May 26, 2019
1 parent 22ccb0b commit 91f4380
Show file tree
Hide file tree
Showing 19 changed files with 1,886 additions and 240 deletions.
271 changes: 214 additions & 57 deletions Doc/library/pickle.rst

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Include/Python.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@
#include "weakrefobject.h"
#include "structseq.h"
#include "namespaceobject.h"
#include "picklebufobject.h"

#include "codecs.h"
#include "pyerrors.h"
Expand Down
31 changes: 31 additions & 0 deletions Include/picklebufobject.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/* PickleBuffer object. This is built-in for ease of use from third-party
* C extensions.
*/

#ifndef Py_PICKLEBUFOBJECT_H
#define Py_PICKLEBUFOBJECT_H
#ifdef __cplusplus
extern "C" {
#endif

#ifndef Py_LIMITED_API

PyAPI_DATA(PyTypeObject) PyPickleBuffer_Type;

#define PyPickleBuffer_Check(op) (Py_TYPE(op) == &PyPickleBuffer_Type)

/* Create a PickleBuffer redirecting to the given buffer-enabled object */
PyAPI_FUNC(PyObject *) PyPickleBuffer_FromObject(PyObject *);
/* Get the PickleBuffer's underlying view to the original object
* (NULL if released)
*/
PyAPI_FUNC(const Py_buffer *) PyPickleBuffer_GetBuffer(PyObject *);
/* Release the PickleBuffer. Returns 0 on success, -1 on error. */
PyAPI_FUNC(int) PyPickleBuffer_Release(PyObject *);

#endif /* !Py_LIMITED_API */

#ifdef __cplusplus
}
#endif
#endif /* !Py_PICKLEBUFOBJECT_H */
152 changes: 139 additions & 13 deletions Lib/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,10 @@
import codecs
import _compat_pickle

from _pickle import PickleBuffer

__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
"Unpickler", "dump", "dumps", "load", "loads"]
"Unpickler", "dump", "dumps", "load", "loads", "PickleBuffer"]

# Shortcut for use in isinstance testing
bytes_types = (bytes, bytearray)
Expand All @@ -51,10 +53,11 @@
"2.0", # Protocol 2
"3.0", # Protocol 3
"4.0", # Protocol 4
"5.0", # Protocol 5
] # Old format versions we can read

# This is the highest protocol number we know how to read.
HIGHEST_PROTOCOL = 4
HIGHEST_PROTOCOL = 5

# The protocol we write by default. May be less than HIGHEST_PROTOCOL.
# Only bump this if the oldest still supported version of Python already
Expand Down Expand Up @@ -167,6 +170,7 @@ def __init__(self, value):
SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes

# Protocol 4

SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes
BINUNICODE8 = b'\x8d' # push very long string
BINBYTES8 = b'\x8e' # push very long bytes string
Expand All @@ -178,6 +182,12 @@ def __init__(self, value):
MEMOIZE = b'\x94' # store top of the stack in memo
FRAME = b'\x95' # indicate the beginning of a new frame

# Protocol 5

BYTEARRAY8 = b'\x96' # push bytearray
NEXT_BUFFER = b'\x97' # push next out-of-band buffer
READONLY_BUFFER = b'\x98' # make top of stack readonly

__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])


Expand Down Expand Up @@ -251,6 +261,23 @@ def __init__(self, file_read, file_readline, file_tell=None):
self.file_readline = file_readline
self.current_frame = None

def readinto(self, buf):
if self.current_frame:
n = self.current_frame.readinto(buf)
if n == 0 and len(buf) != 0:
self.current_frame = None
n = len(buf)
buf[:] = self.file_read(n)
return n
if n < len(buf):
raise UnpicklingError(
"pickle exhausted before end of frame")
return n
else:
n = len(buf)
buf[:] = self.file_read(n)
return n

def read(self, n):
if self.current_frame:
data = self.current_frame.read(n)
Expand Down Expand Up @@ -371,7 +398,8 @@ def decode_long(data):

class _Pickler:

def __init__(self, file, protocol=None, *, fix_imports=True):
def __init__(self, file, protocol=None, *, fix_imports=True,
buffer_callback=None):
"""This takes a binary file for writing a pickle data stream.
The optional *protocol* argument tells the pickler to use the
Expand All @@ -393,13 +421,27 @@ def __init__(self, file, protocol=None, *, fix_imports=True):
will try to map the new Python 3 names to the old module names
used in Python 2, so that the pickle data stream is readable
with Python 2.
If *buffer_callback* is None (the default), buffer views are
serialized into *file* as part of the pickle stream.
If *buffer_callback* is not None, then it can be called any number
of times with a buffer view. If the callback returns a false value
(such as None), the given buffer is out-of-band; otherwise the
buffer is serialized in-band, i.e. inside the pickle stream.
It is an error if *buffer_callback* is not None and *protocol*
is None or smaller than 5.
"""
if protocol is None:
protocol = DEFAULT_PROTOCOL
if protocol < 0:
protocol = HIGHEST_PROTOCOL
elif not 0 <= protocol <= HIGHEST_PROTOCOL:
raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
if buffer_callback is not None and protocol < 5:
raise ValueError("buffer_callback needs protocol >= 5")
self._buffer_callback = buffer_callback
try:
self._file_write = file.write
except AttributeError:
Expand Down Expand Up @@ -756,6 +798,46 @@ def save_bytes(self, obj):
self.memoize(obj)
dispatch[bytes] = save_bytes

def save_bytearray(self, obj):
if self.proto < 5:
if not obj: # bytearray is empty
self.save_reduce(bytearray, (), obj=obj)
else:
self.save_reduce(bytearray, (bytes(obj),), obj=obj)
return
n = len(obj)
if n >= self.framer._FRAME_SIZE_TARGET:
self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
else:
self.write(BYTEARRAY8 + pack("<Q", n) + obj)
dispatch[bytearray] = save_bytearray

def save_picklebuffer(self, obj):
if self.proto < 5:
raise PicklingError("PickleBuffer can only pickled with "
"protocol >= 5")
with obj.raw() as m:
if not m.contiguous:
raise PicklingError("PickleBuffer can not be pickled when "
"pointing to a non-contiguous buffer")
in_band = True
if self._buffer_callback is not None:
in_band = bool(self._buffer_callback(obj))
if in_band:
# Write data in-band
# XXX The C implementation avoids a copy here
if m.readonly:
self.save_bytes(m.tobytes())
else:
self.save_bytearray(m.tobytes())
else:
# Write data out-of-band
self.write(NEXT_BUFFER)
if m.readonly:
self.write(READONLY_BUFFER)

dispatch[PickleBuffer] = save_picklebuffer

def save_str(self, obj):
if self.bin:
encoded = obj.encode('utf-8', 'surrogatepass')
Expand Down Expand Up @@ -1042,7 +1124,7 @@ def save_type(self, obj):
class _Unpickler:

def __init__(self, file, *, fix_imports=True,
encoding="ASCII", errors="strict"):
encoding="ASCII", errors="strict", buffers=None):
"""This takes a binary file for reading a pickle data stream.
The protocol version of the pickle is detected automatically, so
Expand All @@ -1061,7 +1143,17 @@ def __init__(self, file, *, fix_imports=True,
reading, a BytesIO object, or any other custom object that
meets this interface.
Optional keyword arguments are *fix_imports*, *encoding* and
If *buffers* is not None, it should be an iterable of buffer-enabled
objects that is consumed each time the pickle stream references
an out-of-band buffer view. Such buffers have been given in order
to the *buffer_callback* of a Pickler object.
If *buffers* is None (the default), then the buffers are taken
from the pickle stream, assuming they are serialized there.
It is an error for *buffers* to be None if the pickle stream
was produced with a non-None *buffer_callback*.
Other optional arguments are *fix_imports*, *encoding* and
*errors*, which are used to control compatibility support for
pickle stream generated by Python 2. If *fix_imports* is True,
pickle will try to map the old Python 2 names to the new names
Expand All @@ -1070,6 +1162,7 @@ def __init__(self, file, *, fix_imports=True,
default to 'ASCII' and 'strict', respectively. *encoding* can be
'bytes' to read theses 8-bit string instances as bytes objects.
"""
self._buffers = iter(buffers) if buffers is not None else None
self._file_readline = file.readline
self._file_read = file.read
self.memo = {}
Expand All @@ -1090,6 +1183,7 @@ def load(self):
"%s.__init__()" % (self.__class__.__name__,))
self._unframer = _Unframer(self._file_read, self._file_readline)
self.read = self._unframer.read
self.readinto = self._unframer.readinto
self.readline = self._unframer.readline
self.metastack = []
self.stack = []
Expand Down Expand Up @@ -1276,6 +1370,34 @@ def load_binbytes8(self):
self.append(self.read(len))
dispatch[BINBYTES8[0]] = load_binbytes8

def load_bytearray8(self):
len, = unpack('<Q', self.read(8))
if len > maxsize:
raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size "
"of %d bytes" % maxsize)
b = bytearray(len)
self.readinto(b)
self.append(b)
dispatch[BYTEARRAY8[0]] = load_bytearray8

def load_next_buffer(self):
if self._buffers is None:
raise UnpicklingError("pickle stream refers to out-of-band data "
"but no *buffers* argument was given")
try:
buf = next(self._buffers)
except StopIteration:
raise UnpicklingError("not enough out-of-band buffers")
self.append(buf)
dispatch[NEXT_BUFFER[0]] = load_next_buffer

def load_readonly_buffer(self):
buf = self.stack[-1]
with memoryview(buf) as m:
if not m.readonly:
self.stack[-1] = m.toreadonly()
dispatch[READONLY_BUFFER[0]] = load_readonly_buffer

def load_short_binstring(self):
len = self.read(1)[0]
data = self.read(len)
Expand Down Expand Up @@ -1600,25 +1722,29 @@ def load_stop(self):

# Shorthands

def _dump(obj, file, protocol=None, *, fix_imports=True):
_Pickler(file, protocol, fix_imports=fix_imports).dump(obj)
def _dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None):
_Pickler(file, protocol, fix_imports=fix_imports,
buffer_callback=buffer_callback).dump(obj)

def _dumps(obj, protocol=None, *, fix_imports=True):
def _dumps(obj, protocol=None, *, fix_imports=True, buffer_callback=None):
f = io.BytesIO()
_Pickler(f, protocol, fix_imports=fix_imports).dump(obj)
_Pickler(f, protocol, fix_imports=fix_imports,
buffer_callback=buffer_callback).dump(obj)
res = f.getvalue()
assert isinstance(res, bytes_types)
return res

def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict"):
return _Unpickler(file, fix_imports=fix_imports,
def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict",
buffers=None):
return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
encoding=encoding, errors=errors).load()

def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"):
def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict",
buffers=None):
if isinstance(s, str):
raise TypeError("Can't load pickle from unicode string")
file = io.BytesIO(s)
return _Unpickler(file, fix_imports=fix_imports,
return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
encoding=encoding, errors=errors).load()

# Use the faster _pickle if possible
Expand Down
Loading

0 comments on commit 91f4380

Please sign in to comment.