From 19cff3ebd9caf8b82ba1eef7fabb249cc8ee5786 Mon Sep 17 00:00:00 2001 From: Vilnis Termanis Date: Tue, 13 Jun 2017 11:44:30 +0100 Subject: [PATCH] Limited No-Op support (decoder only) #4 --- CHANGELOG | 3 +++ README.md | 4 ++-- UBJSON-Specification.md | 4 ++-- src/decoder.c | 24 ++++++++++++++++++++++-- src/markers.h | 1 + test/test.py | 36 ++++++++++++++++++++++++++++++++++-- ubjson/__init__.py | 2 +- ubjson/decoder.py | 16 ++++++++++++---- ubjson/markers.py | 1 + 9 files changed, 78 insertions(+), 13 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 2ed4eb1..60e89ab 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,6 @@ +UNRELEASED +- Support No-Op type (decoder only) + 0.9.0 - C extension re-implemented (without Cython) with major speedup (7-10x) - object_pairs_hook now works like built-in json module diff --git a/README.md b/README.md index d2fd279..0621b7f 100644 --- a/README.md +++ b/README.md @@ -66,8 +66,8 @@ This library has been checked using [flake8](https://pypi.python.org/pypi/flake8 # Limitations -- The **No-Op** type is not supported. (This should arguably be a protocol-level rather than serialisation-level option.) -- Strongly-typed containers are only supported by the decoder (apart from for **bytes**/**bytearray**). +- The **No-Op** type is only supported by the decoder. (This should arguably be a protocol-level rather than serialisation-level option.) Specifically, it is **only** allowed to occur at the start or between elements of a container and **only** inside un-typed containers. (In a typed container it impossible to tell the differene between an encoded element and a No-Op.) +- Strongly-typed containers are only supported by the decoder (apart from for **bytes**/**bytearray**) and not for No-Op. - Encoder/decoder extensions are not supported at this time. diff --git a/UBJSON-Specification.md b/UBJSON-Specification.md index 83c3b49..b3c169f 100644 --- a/UBJSON-Specification.md +++ b/UBJSON-Specification.md @@ -362,9 +362,9 @@ Optimized with type & count ### Special case: Marker-only types (null, no-op & boolean) If using both _count_ and _type_ optimisations, the marker itself represent the value thus saving repetition (since these types to not have a payload). Additional requirements are: -Strongly typed array of type no-op and with a count of 512: +Strongly typed array of type true (boolean) and with a count of 512: ``` -[[][$][N][#][I][512] +[[][$][T][#][I][512] ``` Strongly typed object of type null and with a count of 3: diff --git a/src/decoder.c b/src/decoder.c index 772599a..42e973a 100644 --- a/src/decoder.c +++ b/src/decoder.c @@ -624,13 +624,17 @@ static PyObject* _decode_array(_ubjson_decoder_buffer_t *buffer) { BAIL_ON_NULL(list = PyList_New(params.count)); while (params.count > 0) { + if (TYPE_NOOP == marker) { + READ_CHAR_OR_BAIL(marker, "array value type marker (sized, after no-op)"); + continue; + } BAIL_ON_NULL(value = _ubjson_decode_value(buffer, &marker)); PyList_SET_ITEM(list, list_pos++, value); // reference stolen by list so no longer want to decrement on failure value = NULL; params.count--; if (params.count > 0 && TYPE_NONE == params.type) { - READ_CHAR_OR_BAIL(marker, "array value type marker (counted)"); + READ_CHAR_OR_BAIL(marker, "array value type marker (sized)"); } } } @@ -638,6 +642,10 @@ static PyObject* _decode_array(_ubjson_decoder_buffer_t *buffer) { BAIL_ON_NULL(list = PyList_New(0)); while (ARRAY_END != marker) { + if (TYPE_NOOP == marker) { + READ_CHAR_OR_BAIL(marker, "array value type marker (after no-op)"); + continue; + } BAIL_ON_NULL(value = _ubjson_decode_value(buffer, &marker)); BAIL_ON_NONZERO(PyList_Append(list, value)); Py_CLEAR(value); @@ -721,6 +729,10 @@ static PyObject* _decode_object_with_hook(_ubjson_decoder_buffer_t *buffer) { fixed_type = (TYPE_NONE == params.type) ? NULL : ¶ms.type; while (params.count > 0) { + if (TYPE_NOOP == marker) { + READ_CHAR_OR_BAIL(marker, "object key length (sized, after no-op)"); + continue; + } DECODE_OBJECT_KEY_OR_RAISE_ENCODER_EXCEPTION("sized"); BAIL_ON_NULL(value = _ubjson_decode_value(buffer, fixed_type)); BAIL_ON_NULL(item = PyTuple_Pack(2, key, value)); @@ -732,7 +744,7 @@ static PyObject* _decode_object_with_hook(_ubjson_decoder_buffer_t *buffer) { params.count--; if (params.count > 0) { - READ_CHAR_OR_BAIL(marker, "object key length"); + READ_CHAR_OR_BAIL(marker, "object key length (sized)"); } } } @@ -741,6 +753,10 @@ static PyObject* _decode_object_with_hook(_ubjson_decoder_buffer_t *buffer) { fixed_type = (TYPE_NONE == params.type) ? NULL : ¶ms.type; while (OBJECT_END != marker) { + if (TYPE_NOOP == marker) { + READ_CHAR_OR_BAIL(marker, "object key length (after no-op)"); + continue; + } DECODE_OBJECT_KEY_OR_RAISE_ENCODER_EXCEPTION("unsized"); BAIL_ON_NULL(value = _ubjson_decode_value(buffer, fixed_type)); BAIL_ON_NULL(item = PyTuple_Pack(2, key, value)); @@ -801,6 +817,10 @@ static PyObject* _decode_object(_ubjson_decoder_buffer_t *buffer) { fixed_type = (TYPE_NONE == params.type) ? NULL : ¶ms.type; while (params.count > 0 && (params.counting || (OBJECT_END != marker))) { + if (TYPE_NOOP == marker) { + READ_CHAR_OR_BAIL(marker, "object key length"); + continue; + } DECODE_OBJECT_KEY_OR_RAISE_ENCODER_EXCEPTION("sized/unsized"); BAIL_ON_NULL(value = _ubjson_decode_value(buffer, fixed_type)); BAIL_ON_NONZERO(PyDict_SetItem(object, key, value)); diff --git a/src/markers.h b/src/markers.h index c09aaa8..e469830 100644 --- a/src/markers.h +++ b/src/markers.h @@ -22,6 +22,7 @@ extern "C" { #define TYPE_NONE '\0' // Used internally only, not part of ubjson specification #define TYPE_NULL 'Z' +#define TYPE_NOOP 'N' #define TYPE_BOOL_TRUE 'T' #define TYPE_BOOL_FALSE 'F' #define TYPE_INT8 'i' diff --git a/test/test.py b/test/test.py index 29a703c..24cc324 100644 --- a/test/test.py +++ b/test/test.py @@ -24,8 +24,8 @@ from ubjson import (dump as ubjdump, dumpb as ubjdumpb, load as ubjload, loadb as ubjloadb, EncoderException, DecoderException) -from ubjson.markers import (TYPE_NULL, TYPE_BOOL_TRUE, TYPE_BOOL_FALSE, TYPE_INT8, TYPE_UINT8, TYPE_INT16, TYPE_INT32, - TYPE_INT64, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_HIGH_PREC, TYPE_CHAR, TYPE_STRING, +from ubjson.markers import (TYPE_NULL, TYPE_NOOP, TYPE_BOOL_TRUE, TYPE_BOOL_FALSE, TYPE_INT8, TYPE_UINT8, TYPE_INT16, + TYPE_INT32, TYPE_INT64, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_HIGH_PREC, TYPE_CHAR, TYPE_STRING, OBJECT_START, OBJECT_END, ARRAY_START, ARRAY_END, CONTAINER_TYPE, CONTAINER_COUNT) PY2 = version_info[0] < 3 @@ -277,6 +277,19 @@ def test_array_fixed(self): TYPE_BOOL_FALSE + TYPE_BOOL_TRUE + ARRAY_END), [[], [True], [False, True]]) + def test_array_noop(self): + # only supported without type + self.assertEqual(self.ubjloadb(ARRAY_START + + TYPE_NOOP + + TYPE_UINT8 + b'\x01' + + TYPE_NOOP + + TYPE_UINT8 + b'\x02' + + TYPE_NOOP + + ARRAY_END), [1, 2]) + self.assertEqual(self.ubjloadb(ARRAY_START + CONTAINER_COUNT + TYPE_UINT8 + b'\x01' + + TYPE_NOOP + + TYPE_UINT8 + b'\x01'), [1]) + def test_object(self): # custom hook with self.assertRaises(TypeError): @@ -347,6 +360,11 @@ def test_object_fixed(self): TYPE_UINT8 + b'\x02' + b'cc' + b'\x03')), {'aa': 1, 'bb': 2, 'cc': 3}) + # count only + self.assertEqual(loadb(OBJECT_START + CONTAINER_COUNT + TYPE_UINT8 + b'\x02' + + TYPE_UINT8 + b'\x02' + b'aa' + TYPE_NULL + TYPE_UINT8 + b'\x02' + b'bb' + TYPE_NULL), + {'aa': None, 'bb': None}) + # fixed type + count self.assertEqual(loadb(OBJECT_START + CONTAINER_TYPE + TYPE_NULL + CONTAINER_COUNT + TYPE_UINT8 + b'\x02' + TYPE_UINT8 + b'\x02' + b'aa' + TYPE_UINT8 + b'\x02' + b'bb'), @@ -357,6 +375,20 @@ def test_object_fixed(self): TYPE_UINT8 + b'\x02' + b'aa' + b'\x04' + TYPE_UINT8 + b'\x02' + b'bb' + b'\x05'), {'aa': 4, 'bb': 5}) + def test_object_noop(self): + # only supported without type + for hook in (None, OrderedDict): + loadb = partial(self.ubjloadb, object_pairs_hook=hook) + self.assertEqual(ubjloadb(OBJECT_START + + TYPE_NOOP + + TYPE_UINT8 + b'\x01' + 'a'.encode('utf-8') + TYPE_NULL + + TYPE_NOOP + + TYPE_UINT8 + b'\x01' + 'b'.encode('utf-8') + TYPE_BOOL_TRUE + + OBJECT_END), {'a': None, 'b': True}) + self.assertEqual(ubjloadb(OBJECT_START + CONTAINER_COUNT + TYPE_UINT8 + b'\x01' + + TYPE_NOOP + + TYPE_UINT8 + b'\x01' + 'a'.encode('utf-8') + TYPE_NULL), {'a': None}) + def test_circular(self): sequence = [1, 2, 3] sequence.append(sequence) diff --git a/ubjson/__init__.py b/ubjson/__init__.py index 23230b2..bff7d6f 100644 --- a/ubjson/__init__.py +++ b/ubjson/__init__.py @@ -37,6 +37,6 @@ from .encoder import EncoderException # noqa from .decoder import DecoderException # noqa -__version__ = '0.9.0' +__version__ = '0.9.1' __all__ = ('EXTENSION_ENABLED', 'dump', 'dumpb', 'EncoderException', 'load', 'loadb', 'DecoderException') diff --git a/ubjson/decoder.py b/ubjson/decoder.py index d2f7cc1..90b53c8 100644 --- a/ubjson/decoder.py +++ b/ubjson/decoder.py @@ -13,16 +13,16 @@ # limitations under the License. -"""UBJSON draft v12 decoder. It does NOT support No-Op ('N') values""" +"""UBJSON draft v12 decoder""" from io import BytesIO from struct import Struct, pack, error as StructError from decimal import Decimal, DecimalException from .compat import raise_from, Mapping -from .markers import (TYPE_NONE, TYPE_NULL, TYPE_BOOL_TRUE, TYPE_BOOL_FALSE, TYPE_INT8, TYPE_UINT8, TYPE_INT16, - TYPE_INT32, TYPE_INT64, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_HIGH_PREC, TYPE_CHAR, TYPE_STRING, - OBJECT_START, OBJECT_END, ARRAY_START, ARRAY_END, CONTAINER_TYPE, CONTAINER_COUNT) +from .markers import (TYPE_NONE, TYPE_NULL, TYPE_NOOP, TYPE_BOOL_TRUE, TYPE_BOOL_FALSE, TYPE_INT8, TYPE_UINT8, + TYPE_INT16, TYPE_INT32, TYPE_INT64, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_HIGH_PREC, TYPE_CHAR, + TYPE_STRING, OBJECT_START, OBJECT_END, ARRAY_START, ARRAY_END, CONTAINER_TYPE, CONTAINER_COUNT) __TYPES = frozenset((TYPE_NULL, TYPE_BOOL_TRUE, TYPE_BOOL_FALSE, TYPE_INT8, TYPE_UINT8, TYPE_INT16, TYPE_INT32, TYPE_INT64, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_HIGH_PREC, TYPE_CHAR, TYPE_STRING, ARRAY_START, @@ -211,6 +211,10 @@ def __decode_object(fp_read, no_bytes, object_pairs_hook): # noqa (complexity) return object_pairs_hook(pairs) while count > 0 and (counting or marker != OBJECT_END): + if marker == TYPE_NOOP: + marker = fp_read(1) + continue + # decode key for object key = __decode_object_key(fp_read, marker) marker = fp_read(1) if type_ == TYPE_NONE else type_ @@ -256,6 +260,10 @@ def __decode_array(fp_read, no_bytes, object_pairs_hook): # noqa (complexity) container = [] while count > 0 and (counting or marker != ARRAY_END): + if marker == TYPE_NOOP: + marker = fp_read(1) + continue + # decode value try: value = __METHOD_MAP[marker](fp_read, marker) diff --git a/ubjson/markers.py b/ubjson/markers.py index f5f1d55..595dbdf 100644 --- a/ubjson/markers.py +++ b/ubjson/markers.py @@ -18,6 +18,7 @@ # Value types TYPE_NONE = b'\x00' # Used internally only, not part of ubjson specification TYPE_NULL = b'Z' +TYPE_NOOP = b'N' TYPE_BOOL_TRUE = b'T' TYPE_BOOL_FALSE = b'F' TYPE_INT8 = b'i'