Skip to content

Commit 606287f

Browse files
authored
[7.x] Support serializing numpy and pandas types
1 parent 98bee5c commit 606287f

File tree

4 files changed

+142
-3
lines changed

4 files changed

+142
-3
lines changed

dev-requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ nosexcover
66
sphinx<1.7
77
sphinx_rtd_theme
88
jinja2
9+
numpy
10+
pandas
911

1012
# PyYAML 5.3 dropped support for Python 3.4 while
1113
# not amending that requirement to the package. :(

elasticsearch/serializer.py

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,49 @@
22
import simplejson as json
33
except ImportError:
44
import json
5+
56
import uuid
67
from datetime import date, datetime
78
from decimal import Decimal
89

910
from .exceptions import SerializationError, ImproperlyConfigured
1011
from .compat import string_types
1112

13+
INTEGER_TYPES = ()
14+
FLOAT_TYPES = (Decimal,)
15+
TIME_TYPES = (date, datetime)
16+
17+
try:
18+
import numpy as np
19+
20+
INTEGER_TYPES += (
21+
np.int_,
22+
np.intc,
23+
np.int8,
24+
np.int16,
25+
np.int32,
26+
np.int64,
27+
np.uint8,
28+
np.uint16,
29+
np.uint32,
30+
np.uint64,
31+
)
32+
FLOAT_TYPES += (
33+
np.float_,
34+
np.float16,
35+
np.float32,
36+
np.float64,
37+
)
38+
except ImportError:
39+
np = None
40+
41+
try:
42+
import pandas as pd
43+
44+
TIME_TYPES += (pd.Timestamp,)
45+
except ImportError:
46+
pd = None
47+
1248

1349
class TextSerializer(object):
1450
mimetype = "text/plain"
@@ -27,12 +63,29 @@ class JSONSerializer(object):
2763
mimetype = "application/json"
2864

2965
def default(self, data):
30-
if isinstance(data, (date, datetime)):
66+
if isinstance(data, TIME_TYPES):
3167
return data.isoformat()
32-
elif isinstance(data, Decimal):
33-
return float(data)
3468
elif isinstance(data, uuid.UUID):
3569
return str(data)
70+
elif isinstance(data, FLOAT_TYPES):
71+
return float(data)
72+
elif INTEGER_TYPES and isinstance(data, INTEGER_TYPES):
73+
return int(data)
74+
75+
# Special cases for numpy and pandas types
76+
elif np:
77+
if isinstance(data, np.bool_):
78+
return bool(data)
79+
elif isinstance(data, np.datetime64):
80+
return data.item().isoformat()
81+
elif isinstance(data, np.ndarray):
82+
return data.tolist()
83+
if pd:
84+
if isinstance(data, (pd.Series, pd.Categorical)):
85+
return data.tolist()
86+
elif hasattr(pd, "NA") and pd.isna(data):
87+
return None
88+
3689
raise TypeError("Unable to serialize %r (type: %s)" % (data, type(data)))
3790

3891
def loads(self, s):

test_elasticsearch/test_serializer.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
from datetime import datetime
66
from decimal import Decimal
77

8+
import numpy as np
9+
import pandas as pd
10+
811
from elasticsearch.serializer import (
912
JSONSerializer,
1013
Deserializer,
@@ -36,6 +39,86 @@ def test_uuid_serialization(self):
3639
),
3740
)
3841

42+
def test_serializes_numpy_bool(self):
43+
self.assertEquals('{"d":true}', JSONSerializer().dumps({"d": np.bool_(True)}))
44+
45+
def test_serializes_numpy_integers(self):
46+
ser = JSONSerializer()
47+
for np_type in (
48+
np.int_,
49+
np.int8,
50+
np.int16,
51+
np.int32,
52+
np.int64,
53+
):
54+
self.assertEquals(ser.dumps({"d": np_type(-1)}), '{"d":-1}')
55+
56+
for np_type in (
57+
np.uint8,
58+
np.uint16,
59+
np.uint32,
60+
np.uint64,
61+
):
62+
self.assertEquals(ser.dumps({"d": np_type(1)}), '{"d":1}')
63+
64+
def test_serializes_numpy_floats(self):
65+
ser = JSONSerializer()
66+
for np_type in (
67+
np.float_,
68+
np.float32,
69+
np.float64,
70+
):
71+
self.assertRegexpMatches(
72+
ser.dumps({"d": np_type(1.2)}), r'^\{"d":1\.2[\d]*}$'
73+
)
74+
75+
def test_serializes_numpy_datetime(self):
76+
self.assertEquals(
77+
'{"d":"2010-10-01T02:30:00"}',
78+
JSONSerializer().dumps({"d": np.datetime64("2010-10-01T02:30:00")}),
79+
)
80+
81+
def test_serializes_numpy_ndarray(self):
82+
self.assertEquals(
83+
'{"d":[0,0,0,0,0]}',
84+
JSONSerializer().dumps({"d": np.zeros((5,), dtype=np.uint8)}),
85+
)
86+
# This isn't useful for Elasticsearch, just want to make sure it works.
87+
self.assertEquals(
88+
'{"d":[[0,0],[0,0]]}',
89+
JSONSerializer().dumps({"d": np.zeros((2, 2), dtype=np.uint8)}),
90+
)
91+
92+
def test_serializes_pandas_timestamp(self):
93+
self.assertEquals(
94+
'{"d":"2010-10-01T02:30:00"}',
95+
JSONSerializer().dumps({"d": pd.Timestamp("2010-10-01T02:30:00")}),
96+
)
97+
98+
def test_serializes_pandas_series(self):
99+
self.assertEquals(
100+
'{"d":["a","b","c","d"]}',
101+
JSONSerializer().dumps({"d": pd.Series(["a", "b", "c", "d"])}),
102+
)
103+
104+
def test_serializes_pandas_na(self):
105+
if not hasattr(pd, "NA"): # pandas.NA added in v1
106+
raise SkipTest("pandas.NA required")
107+
self.assertEquals(
108+
'{"d":null}', JSONSerializer().dumps({"d": pd.NA}),
109+
)
110+
111+
def test_serializes_pandas_category(self):
112+
cat = pd.Categorical(["a", "c", "b", "a"], categories=["a", "b", "c"])
113+
self.assertEquals(
114+
'{"d":["a","c","b","a"]}', JSONSerializer().dumps({"d": cat}),
115+
)
116+
117+
cat = pd.Categorical([1, 2, 3], categories=[1, 2, 3])
118+
self.assertEquals(
119+
'{"d":[1,2,3]}', JSONSerializer().dumps({"d": cat}),
120+
)
121+
39122
def test_raises_serialization_error_on_dump_error(self):
40123
self.assertRaises(SerializationError, JSONSerializer().dumps, object())
41124

test_elasticsearch/test_server/test_common.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
"TestIndicesGetAlias10Basic",
4040
# Disallowing expensive queries is 7.7+
4141
"TestSearch320DisallowQueries",
42+
"TestIndicesPutIndexTemplate10Basic",
4243
}
4344
}
4445

0 commit comments

Comments
 (0)