Skip to content

Commit fd7032a

Browse files
committed
Add support for handling ndarrays with dtype='O' using pickle (as well as cautionary note) (#46)
1 parent 963db59 commit fd7032a

File tree

7 files changed

+49
-17
lines changed

7 files changed

+49
-17
lines changed

AUTHORS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ Special thanks are due to the following parties for their contributions:
1111

1212
- [Alex Ford](https://github.com/asford) - bug fix.
1313
- [Colin Jermain](https://github.com/cjermain) - Python 3 support.
14+
- [Etienne Wodey](https://github.com/airwoodix) - support for ndarrays with dtype='O'
1415
- [John Tyree](https://github.com/johntyree) - support for numpy scalar booleans.
1516
- [Mehdi Sadeghi](https://github.com/mehdisadeghi) - bug reports.
1617
- [Sujoy Roy](https://github.com/tvkpz) - bug reports.

CHANGES.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ vi:ft=markdown
55
Change Log
66
==========
77

8+
Release 0.4.8 (April 28, 2022)
9+
------------------------------
10+
* Add support for ndarrays with dtype=object (#46).
11+
812
Release 0.4.7.1 (September 30, 2020)
913
------------------------------------
1014
* Fix Python 2.7 regression (#45).

LICENSE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ vi:ft=markdown
55
License
66
=======
77

8-
Copyright (c) 2013-2020, Lev E. Givon.
8+
Copyright (c) 2013-2022, Lev E. Givon.
99
All rights reserved.
1010

1111
Redistribution and use in source and binary forms, with or without

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,14 @@ data types during msgpack serialization and deserialization. Inclusion of type
6565
information in the serialized data necessarily incurs some storage overhead; if
6666
preservation of type information is not needed, one may be able to avoid some
6767
of this overhead by writing a custom encoder/decoder pair that produces more
68-
efficient serializations for those specific use cases.
68+
efficient serializations for those specific use cases.
69+
70+
Numpy arrays with a dtype of 'O' are serialized/deserialized using pickle as
71+
a fallback solution to enable msgpack-numpy to handle
72+
such arrays. As the additional overhead of pickle serialization negates one
73+
of the reasons to use msgpack, it may be advisable to either write a custom
74+
encoder/decoder to handle the specific use case efficiently or else not bother
75+
using msgpack-numpy.
6976

7077
Note that numpy arrays deserialized by msgpack-numpy are read-only and must be copied
7178
if they are to be modified.

msgpack_numpy.py

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@
44
Support for serialization of numpy data types with msgpack.
55
"""
66

7-
# Copyright (c) 2013-2020, Lev E. Givon
7+
# Copyright (c) 2013-2022, Lev E. Givon
88
# All rights reserved.
99
# Distributed under the terms of the BSD license:
1010
# http://www.opensource.org/licenses/bsd-license
1111

1212
import sys
1313
import functools
14+
import pickle
1415
import warnings
1516

1617
import msgpack
@@ -19,10 +20,14 @@
1920
import numpy as np
2021

2122
if sys.version_info >= (3, 0):
22-
if sys.platform == 'darwin':
23-
ndarray_to_bytes = lambda obj: obj.tobytes()
24-
else:
25-
ndarray_to_bytes = lambda obj: obj.data if obj.flags['C_CONTIGUOUS'] else obj.tobytes()
23+
def ndarray_to_bytes(obj):
24+
if obj.dtype == 'O':
25+
return obj.dumps()
26+
else:
27+
if sys.platform == 'darwin':
28+
return obj.tobytes()
29+
else:
30+
return obj.data if obj.flags['C_CONTIGUOUS'] else obj.tobytes()
2631

2732
num_to_bytes = lambda obj: obj.data
2833

@@ -32,10 +37,14 @@ def tostr(x):
3237
else:
3338
return str(x)
3439
else:
35-
if sys.platform == 'darwin':
36-
ndarray_to_bytes = lambda obj: obj.tobytes()
37-
else:
38-
ndarray_to_bytes = lambda obj: memoryview(obj.data) if obj.flags['C_CONTIGUOUS'] else obj.tobytes()
40+
def ndarray_to_bytes(obj):
41+
if obj.dtype == 'O':
42+
return obj.dumps()
43+
else:
44+
if sys.platform == 'darwin':
45+
return obj.tobytes()
46+
else:
47+
return memoryview(obj.data) if obj.flags['C_CONTIGUOUS'] else obj.tobytes()
3948

4049
num_to_bytes = lambda obj: memoryview(obj.data)
4150

@@ -50,12 +59,13 @@ def encode(obj, chain=None):
5059
if isinstance(obj, np.ndarray):
5160
# If the dtype is structured, store the interface description;
5261
# otherwise, store the corresponding array protocol type string:
53-
if obj.dtype.kind == 'V':
54-
kind = b'V'
62+
if obj.dtype.kind in ('V', 'O'):
63+
kind = bytes(obj.dtype.kind, 'ascii')
5564
descr = obj.dtype.descr
5665
else:
5766
kind = b''
5867
descr = obj.dtype.str
68+
5969
return {b'nd': True,
6070
b'type': descr,
6171
b'kind': kind,
@@ -81,14 +91,18 @@ def decode(obj, chain=None):
8191
if obj[b'nd'] is True:
8292

8393
# Check if b'kind' is in obj to enable decoding of data
84-
# serialized with older versions (#20):
94+
# serialized with older versions (#20) or data
95+
# that had dtype == 'O' (#46):
8596
if b'kind' in obj and obj[b'kind'] == b'V':
8697
descr = [tuple(tostr(t) if type(t) is bytes else t for t in d) \
8798
for d in obj[b'type']]
99+
elif b'kind' in obj and obj[b'kind'] == b'O':
100+
return pickle.loads(obj[b'data'])
88101
else:
89102
descr = obj[b'type']
90-
return np.frombuffer(obj[b'data'],
91-
dtype=_unpack_dtype(descr)).reshape(obj[b'shape'])
103+
return np.ndarray(buffer=obj[b'data'],
104+
dtype=_unpack_dtype(descr),
105+
shape=obj[b'shape'])
92106
else:
93107
descr = obj[b'type']
94108
return np.frombuffer(obj[b'data'],

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from setuptools import setup
77

88
NAME = 'msgpack-numpy'
9-
VERSION = '0.4.7.1'
9+
VERSION = '0.4.8'
1010
AUTHOR = 'Lev E. Givon'
1111
AUTHOR_EMAIL = 'lev@columbia.edu'
1212
URL = 'https://github.com/lebedov/msgpack-numpy'

tests.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,12 @@ def test_numpy_array_float(self):
188188
assert_array_equal(x, x_rec)
189189
assert_equal(x.dtype, x_rec.dtype)
190190

191+
def test_numpy_array_object(self):
192+
x = np.random.rand(5).astype(object)
193+
x_rec = self.encode_decode(x)
194+
assert_array_equal(x, x_rec)
195+
assert_equal(x.dtype, x_rec.dtype)
196+
191197
def test_numpy_array_complex(self):
192198
x = (np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex128)
193199
x_rec = self.encode_decode(x)

0 commit comments

Comments
 (0)