Skip to content

Commit

Permalink
Add default param to encoder
Browse files Browse the repository at this point in the history
- Allow for custom types to be encoded
  • Loading branch information
vtermanis committed Nov 24, 2017
1 parent abbc147 commit 0e1955d
Show file tree
Hide file tree
Showing 10 changed files with 85 additions and 52 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
UNRELEASED
- Add default parameter to encoder, allowing for unsupported types to be
encoded.
- Treat object_pairs_hook the same in pure Python mode as with extension

0.10.0
- Support No-Op type (decoder only)
- Allow for object keys to be interned, saving memory if repeated (PY3 only)
Expand Down
3 changes: 3 additions & 0 deletions pylint.rc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ extension-pkg-whitelist=_ubjson
# AST will be different than the one from reality.
optimize-ast=no

score=no


[MESSAGES CONTROL]

Expand Down Expand Up @@ -67,6 +69,7 @@ confidence=
# C0111 - missing-docstring
disable=R0903,W0511,I0011,C0111,no-else-return


[REPORTS]

# Set the output format. Available formats are text, parseable, colorized, msvs
Expand Down
14 changes: 7 additions & 7 deletions src/_ubjson.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
/******************************************************************************/

// container_count, sort_keys, no_float32
static _ubjson_encoder_prefs_t _ubjson_encoder_prefs_defaults = { 0, 0, 1 };
static _ubjson_encoder_prefs_t _ubjson_encoder_prefs_defaults = { NULL, 0, 0, 1 };

// no_bytes, object_pairs_hook
static _ubjson_decoder_prefs_t _ubjson_decoder_prefs_defaults = { NULL, 0, 0 };
Expand All @@ -34,8 +34,8 @@ PyDoc_STRVAR(_ubjson_dump__doc__, "See pure Python version (encoder.dump) for do
#define FUNC_DEF_DUMP {"dump", (PyCFunction)_ubjson_dump, METH_VARARGS | METH_KEYWORDS, _ubjson_dump__doc__}
static PyObject*
_ubjson_dump(PyObject *self, PyObject *args, PyObject *kwargs) {
static const char *format = "OO|iii:dump";
static char *keywords[] = {"obj", "fp", "container_count", "sort_keys", "no_float32", NULL};
static const char *format = "OO|iiiO:dump";
static char *keywords[] = {"obj", "fp", "container_count", "sort_keys", "no_float32", "default", NULL};

_ubjson_encoder_buffer_t *buffer = NULL;
_ubjson_encoder_prefs_t prefs = _ubjson_encoder_prefs_defaults;
Expand All @@ -45,7 +45,7 @@ _ubjson_dump(PyObject *self, PyObject *args, PyObject *kwargs) {
UNUSED(self);

if (!PyArg_ParseTupleAndKeywords(args, kwargs, format, keywords, &obj, &fp, &prefs.container_count,
&prefs.sort_keys, &prefs.no_float32)) {
&prefs.sort_keys, &prefs.no_float32, &prefs.default_func)) {
goto bail;
}
BAIL_ON_NULL(fp_write = PyObject_GetAttrString(fp, "write"));
Expand All @@ -68,16 +68,16 @@ PyDoc_STRVAR(_ubjson_dumpb__doc__, "See pure Python version (encoder.dumpb) for
#define FUNC_DEF_DUMPB {"dumpb", (PyCFunction)_ubjson_dumpb, METH_VARARGS | METH_KEYWORDS, _ubjson_dumpb__doc__}
static PyObject*
_ubjson_dumpb(PyObject *self, PyObject *args, PyObject *kwargs) {
static const char *format = "O|iii:dumpb";
static char *keywords[] = {"obj", "container_count", "sort_keys", "no_float32", NULL};
static const char *format = "O|iiiO:dumpb";
static char *keywords[] = {"obj", "container_count", "sort_keys", "no_float32", "default", NULL};

_ubjson_encoder_buffer_t *buffer = NULL;
_ubjson_encoder_prefs_t prefs = _ubjson_encoder_prefs_defaults;
PyObject *obj;
UNUSED(self);

if (!PyArg_ParseTupleAndKeywords(args, kwargs, format, keywords, &obj, &prefs.container_count, &prefs.sort_keys,
&prefs.no_float32)) {
&prefs.no_float32, &prefs.default_func)) {
goto bail;
}

Expand Down
2 changes: 1 addition & 1 deletion src/decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ static PyObject* _decode_object(_ubjson_decoder_buffer_t *buffer);
/******************************************************************************/

/* Returns new decoder buffer or NULL on failure (an exception will be set). Input must either support buffer interface
* or be callable.
* or be callable. Currently only increases reference count for input parameter.
*/
_ubjson_decoder_buffer_t* _ubjson_decoder_buffer_create(_ubjson_decoder_prefs_t* prefs, PyObject *input) {
_ubjson_decoder_buffer_t *buffer;
Expand Down
23 changes: 17 additions & 6 deletions src/encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ static int _encode_PyMapping(PyObject *obj, _ubjson_encoder_buffer_t *buffer);

/******************************************************************************/

// fp_write, if not NULL, must be a callable which accepts a single bytes argument. On failure will set exception.
/* fp_write, if not NULL, must be a callable which accepts a single bytes argument. On failure will set exception.
* Currently only increases reference count for fp_write parameter.
*/
_ubjson_encoder_buffer_t* _ubjson_encoder_buffer_create(_ubjson_encoder_prefs_t* prefs, PyObject *fp_write) {
_ubjson_encoder_buffer_t *buffer;

Expand All @@ -87,24 +89,26 @@ _ubjson_encoder_buffer_t* _ubjson_encoder_buffer_create(_ubjson_encoder_prefs_t*
return NULL;
}

buffer->len = (NULL != buffer->fp_write) ? BUFFER_FP_SIZE : BUFFER_INITIAL_SIZE;
buffer->len = (NULL != fp_write) ? BUFFER_FP_SIZE : BUFFER_INITIAL_SIZE;
BAIL_ON_NULL(buffer->obj = PyBytes_FromStringAndSize(NULL, buffer->len));
buffer->raw = PyBytes_AS_STRING(buffer->obj);
buffer->pos = 0;

BAIL_ON_NULL(buffer->markers = PySet_New(NULL));

buffer->prefs = *prefs;

buffer->fp_write = fp_write;
Py_XINCREF(fp_write);

// treat Py_None as no default_func being supplied
if (Py_None == buffer->prefs.default_func) {
buffer->prefs.default_func = NULL;
}

return buffer;

bail:
Py_XDECREF(buffer->obj);
Py_XDECREF(buffer->markers);
free(buffer);
_ubjson_encoder_buffer_free(buffer);
return NULL;
}

Expand Down Expand Up @@ -607,6 +611,8 @@ static int _encode_PyMapping(PyObject *obj, _ubjson_encoder_buffer_t *buffer) {
/******************************************************************************/

int _ubjson_encode_value(PyObject *obj, _ubjson_encoder_buffer_t *buffer) {
PyObject *newobj = NULL; // result of default call (when encoding unsupported types)

if (Py_None == obj) {
WRITE_CHAR_OR_BAIL(TYPE_NULL);
} else if (Py_True == obj) {
Expand Down Expand Up @@ -642,13 +648,18 @@ int _ubjson_encode_value(PyObject *obj, _ubjson_encoder_buffer_t *buffer) {
} else if (NULL == obj) {
PyErr_SetString(PyExc_RuntimeError, "Internal error - _ubjson_encode_value got NULL obj");
goto bail;
} else if (NULL != buffer->prefs.default_func) {
BAIL_ON_NULL(newobj = PyObject_CallFunctionObjArgs(buffer->prefs.default_func, obj, NULL));
RECURSE_AND_BAIL_ON_NONZERO(_ubjson_encode_value(newobj, buffer), " while encoding with default function");
Py_DECREF(newobj);
} else {
PyErr_Format(EncoderException, "Cannot encode item of type %s", obj->ob_type->tp_name);
goto bail;
}
return 0;

bail:
Py_XDECREF(newobj);
return 1;
}

Expand Down
1 change: 1 addition & 0 deletions src/encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ extern "C" {
/******************************************************************************/

typedef struct {
PyObject *default_func;
int container_count;
int sort_keys;
int no_float32;
Expand Down
23 changes: 23 additions & 0 deletions test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,29 @@ def test_recursion(self):
with self.assert_raises_regex(RuntimeError, 'recursion'):
self.ubjloadb(raw)

def test_encode_default(self):
def default(obj):
if isinstance(obj, set):
return sorted(obj)
raise EncoderException('__test__marker__')

dumpb_default = partial(self.ubjdumpb, default=default)
# Top-level custom type
obj1 = {1, 2, 3}
obj2 = default(obj1)
# Custom type within sequence or mapping
obj3 = OrderedDict(sorted({'a': 1, 'b': obj1, 'c': [2, obj1]}.items()))
obj4 = OrderedDict(sorted({'a': 1, 'b': obj2, 'c': [2, obj2]}.items()))

with self.assert_raises_regex(EncoderException, 'Cannot encode item'):
self.ubjdumpb(obj1)

with self.assert_raises_regex(EncoderException, '__test__marker__'):
dumpb_default(self)

self.assertEqual(dumpb_default(obj1), self.ubjdumpb(obj2))
self.assertEqual(dumpb_default(obj3), self.ubjdumpb(obj4))


class TestEncodeDecodeFp(TestEncodeDecodePlain):
"""Performs tests via file-like objects (BytesIO) instead of bytes instances"""
Expand Down
2 changes: 1 addition & 1 deletion ubjson/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,6 @@
from .encoder import EncoderException # noqa
from .decoder import DecoderException # noqa

__version__ = '0.10.0'
__version__ = '0.11.0'

__all__ = ('EXTENSION_ENABLED', 'dump', 'dumpb', 'EncoderException', 'load', 'loadb', 'DecoderException')
6 changes: 2 additions & 4 deletions ubjson/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from struct import Struct, pack, error as StructError
from decimal import Decimal, DecimalException

from .compat import raise_from, Mapping, intern_unicode
from .compat import raise_from, intern_unicode
from .markers import (TYPE_NONE, TYPE_NULL, TYPE_NOOP, TYPE_BOOL_TRUE, TYPE_BOOL_FALSE, TYPE_INT8, TYPE_UINT8,
TYPE_INT16, TYPE_INT32, TYPE_INT64, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_HIGH_PREC, TYPE_CHAR,
TYPE_STRING, OBJECT_START, OBJECT_END, ARRAY_START, ARRAY_END, CONTAINER_TYPE, CONTAINER_COUNT)
Expand Down Expand Up @@ -298,7 +298,7 @@ def load(fp, no_bytes=False, object_pairs_hook=None, intern_object_keys=False):
no_bytes (bool): If set, typed UBJSON arrays (uint8) will not be
converted to a bytes instance and instead treated like
any other array (i.e. result in a list).
object_pairs_hook (function): Called with the result of any object
object_pairs_hook (callable): Called with the result of any object
literal decoded with an ordered list of
pairs (instead of dict).
intern_object_keys (bool): If set, object keys are interned which can
Expand Down Expand Up @@ -345,8 +345,6 @@ def load(fp, no_bytes=False, object_pairs_hook=None, intern_object_keys=False):
"""
if object_pairs_hook is None:
object_pairs_hook = dict
elif not issubclass(object_pairs_hook, Mapping):
raise TypeError('object_pairs_hook is not a mapping type')

if not callable(fp.read):
raise TypeError('fp.read not callable')
Expand Down
58 changes: 25 additions & 33 deletions ubjson/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,8 @@ def __encode_bytes(fp_write, item):
# no ARRAY_END since length was specified


def __encode_value(fp_write, item, no_float32):
def __encode_value(fp_write, item, seen_containers, container_count, sort_keys, # noqa pylint: disable=too-many-branches
no_float32, default):
if isinstance(item, UNICODE_TYPE):
__encode_string(fp_write, item)

Expand Down Expand Up @@ -162,13 +163,21 @@ def __encode_value(fp_write, item, no_float32):
elif isinstance(item, BYTES_TYPES):
__encode_bytes(fp_write, item)

else:
return False
# order important since mappings could also be sequences
elif isinstance(item, Mapping):
__encode_object(fp_write, item, seen_containers, container_count, sort_keys, no_float32, default)

elif isinstance(item, Sequence):
__encode_array(fp_write, item, seen_containers, container_count, sort_keys, no_float32, default)

elif default is not None:
__encode_value(fp_write, default(item), seen_containers, container_count, sort_keys, no_float32, default)

return True
else:
raise EncoderException('Cannot encode item of type %s' % type(item))


def __encode_array(fp_write, item, seen_containers, container_count, sort_keys, no_float32):
def __encode_array(fp_write, item, seen_containers, container_count, sort_keys, no_float32, default):
# circular reference check
container_id = id(item)
if container_id in seen_containers:
Expand All @@ -181,22 +190,15 @@ def __encode_array(fp_write, item, seen_containers, container_count, sort_keys,
__encode_int(fp_write, len(item))

for value in item:
if not __encode_value(fp_write, value, no_float32):
# order important since mappings could also be sequences
if isinstance(value, Mapping):
__encode_object(fp_write, value, seen_containers, container_count, sort_keys, no_float32)
elif isinstance(value, Sequence):
__encode_array(fp_write, value, seen_containers, container_count, sort_keys, no_float32)
else:
raise EncoderException('Cannot encode item of type %s' % type(value))
__encode_value(fp_write, value, seen_containers, container_count, sort_keys, no_float32, default)

if not container_count:
fp_write(ARRAY_END)

del seen_containers[container_id]


def __encode_object(fp_write, item, seen_containers, container_count, sort_keys, no_float32):
def __encode_object(fp_write, item, seen_containers, container_count, sort_keys, no_float32, default):
# circular reference check
container_id = id(item)
if container_id in seen_containers:
Expand All @@ -220,22 +222,15 @@ def __encode_object(fp_write, item, seen_containers, container_count, sort_keys,
__encode_int(fp_write, length)
fp_write(encoded_key)

if not __encode_value(fp_write, value, no_float32):
# order important since mappings could also be sequences
if isinstance(value, Mapping):
__encode_object(fp_write, value, seen_containers, container_count, sort_keys, no_float32)
elif isinstance(value, Sequence):
__encode_array(fp_write, value, seen_containers, container_count, sort_keys, no_float32)
else:
raise EncoderException('Cannot encode item of type %s' % type(value))
__encode_value(fp_write, value, seen_containers, container_count, sort_keys, no_float32, default)

if not container_count:
fp_write(OBJECT_END)

del seen_containers[container_id]


def dump(obj, fp, container_count=False, sort_keys=False, no_float32=True):
def dump(obj, fp, container_count=False, sort_keys=False, no_float32=True, default=None):
"""Writes the given object as UBJSON to the provided file-like object
Args:
Expand All @@ -251,6 +246,10 @@ def dump(obj, fp, container_count=False, sort_keys=False, no_float32=True):
no_float32 (bool): Never use float32 to store float numbers (other than
for zero). Disabling this might save space at the
loss of precision.
default (callable): Called for objects which cannot be serialised.
Should return a UBJSON-encodable version of the
object or raise an EncoderException.
Raises:
EncoderException: If an encoding failure occured.
Expand Down Expand Up @@ -303,19 +302,12 @@ def dump(obj, fp, container_count=False, sort_keys=False, no_float32=True):
raise TypeError('fp.write not callable')
fp_write = fp.write

if not __encode_value(fp_write, obj, no_float32):
# order important since mappings could also be sequences
if isinstance(obj, Mapping):
__encode_object(fp_write, obj, {}, container_count, sort_keys, no_float32)
elif isinstance(obj, Sequence):
__encode_array(fp_write, obj, {}, container_count, sort_keys, no_float32)
else:
raise EncoderException('Cannot encode item of type %s' % type(obj))
__encode_value(fp_write, obj, {}, container_count, sort_keys, no_float32, default)


def dumpb(obj, container_count=False, sort_keys=False, no_float32=True):
def dumpb(obj, container_count=False, sort_keys=False, no_float32=True, default=None):
"""Returns the given object as UBJSON in a bytes instance. See dump() for
available arguments."""
with BytesIO() as fp:
dump(obj, fp, container_count=container_count, sort_keys=sort_keys, no_float32=no_float32)
dump(obj, fp, container_count=container_count, sort_keys=sort_keys, no_float32=no_float32, default=default)
return fp.getvalue()

0 comments on commit 0e1955d

Please sign in to comment.