Skip to content

Commit 622990e

Browse files
authored
Introduce various updates for the Compact serialization [API-1564] (#610)
* Introduce various updates for the Compact serialization This PR introduces couple of updates to the implementation - Schema now holds a regular dict, instead of OrderedDict for faster lookups and simpler implementation - Prevent duplicate field names by adding a check to the schema writer - Disallowing compact serializers to override builtin serializers * address review comments
1 parent 244084b commit 622990e

File tree

3 files changed

+96
-22
lines changed

3 files changed

+96
-22
lines changed

hazelcast/serialization/compact.py

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import abc
2-
import collections
32
import datetime
43
import decimal
54
import typing
@@ -1560,10 +1559,10 @@ def _raise_on_none_value_in_fix_sized_item_array(field: "FieldDescriptor") -> ty
15601559
class SchemaWriter(CompactWriter):
15611560
def __init__(self, type_name: str):
15621561
self._type_name = type_name
1563-
self._fields: typing.List[FieldDescriptor] = []
1562+
self._fields: typing.Dict[str, FieldDescriptor] = {}
15641563

15651564
def build(self) -> "Schema":
1566-
return Schema(self._type_name, self._fields)
1565+
return Schema(self._type_name, list(self._fields.values()))
15671566

15681567
def write_boolean(self, field_name: str, value: bool) -> None:
15691568
self._add_field(field_name, FieldKind.BOOLEAN)
@@ -1740,16 +1739,20 @@ def write_array_of_compact(
17401739
self._add_field(field_name, FieldKind.ARRAY_OF_COMPACT)
17411740

17421741
def _add_field(self, name: str, kind: "FieldKind"):
1743-
self._fields.append(FieldDescriptor(name, kind))
1742+
if name in self._fields:
1743+
raise HazelcastSerializationError(f"Field with the name '{name}' already exists")
1744+
1745+
self._fields[name] = FieldDescriptor(name, kind)
17441746

17451747

17461748
class Schema:
17471749
def __init__(self, type_name: str, fields_list: typing.List["FieldDescriptor"]):
17481750
self.type_name = type_name
1749-
self.fields: typing.Dict[str, "FieldDescriptor"] = Schema._dict_to_key_ordered_dict(
1750-
{f.name: f for f in fields_list}
1751-
)
1752-
self.fields_list = list(self.fields.values())
1751+
self.fields: typing.Dict[str, "FieldDescriptor"] = {f.name: f for f in fields_list}
1752+
# Sort the fields by the field name so that the field offsets/indexes
1753+
# can be set correctly.
1754+
fields_list.sort(key=lambda f: f.name)
1755+
self.fields_list = fields_list
17531756
self.schema_id: int = 0
17541757
self.var_sized_field_count: int = 0
17551758
self.fix_sized_fields_length: int = 0
@@ -1769,6 +1772,11 @@ def _init(self):
17691772
else:
17701773
fix_sized_fields.append(field)
17711774

1775+
# Fix sized fields should be in descending order of size in bytes.
1776+
# For ties, the alphabetical order(ascending) of the field name will
1777+
# be used. Since, `fields_list` is sorted at this point, and the `sort`
1778+
# method is stable, only sorting by the size in bytes is enough for
1779+
# this invariant to hold.
17721780
fix_sized_fields.sort(
17731781
key=lambda f: FIELD_OPERATIONS[f.kind].size_in_bytes(),
17741782
reverse=True,
@@ -1792,6 +1800,8 @@ def _init(self):
17921800

17931801
self.fix_sized_fields_length = position
17941802

1803+
# Variable sized fields should be in ascending alphabetical ordering
1804+
# of the field names
17951805
index = 0
17961806
for field in var_sized_fields:
17971807
field.index = index
@@ -1800,15 +1810,6 @@ def _init(self):
18001810
self.var_sized_field_count = index
18011811
self.schema_id = RabinFingerprint.of(self)
18021812

1803-
@staticmethod
1804-
def _dict_to_key_ordered_dict(
1805-
d: typing.Dict[str, "FieldDescriptor"]
1806-
) -> typing.Dict[str, "FieldDescriptor"]:
1807-
od = collections.OrderedDict()
1808-
for key in sorted(d):
1809-
od[key] = d[key]
1810-
return od
1811-
18121813
def __eq__(self, other: typing.Any) -> bool:
18131814
return (
18141815
isinstance(other, Schema)

hazelcast/serialization/service.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import typing
88

99
from hazelcast.config import IntType, Config
10-
from hazelcast.errors import HazelcastInstanceNotActiveError
10+
from hazelcast.errors import HazelcastInstanceNotActiveError, IllegalArgumentError
1111
from hazelcast.serialization.api import IdentifiedDataSerializable, Portable
1212
from hazelcast.serialization.compact import (
1313
SchemaNotFoundError,
@@ -111,6 +111,11 @@ def __init__(
111111
for _type, custom_serializer in config.custom_serializers.items():
112112
self._registry.safe_register_serializer(custom_serializer(), _type)
113113

114+
# Called here so that we can make sure that we are not overriding
115+
# any of the default serializers registered above with the Compact
116+
# serialization.
117+
self._registry.validate()
118+
114119
def to_data(self, obj, partitioning_strategy=None):
115120
"""Serialize the input object into byte array representation
116121
@@ -241,12 +246,12 @@ def _register_constant_serializers(self):
241246
self._registry.register_constant_serializer(BooleanSerializer(), bool)
242247
self._registry.register_constant_serializer(CharSerializer())
243248
self._registry.register_constant_serializer(ShortSerializer())
244-
self._registry.register_constant_serializer(IntegerSerializer())
249+
self._registry.register_constant_serializer(IntegerSerializer(), int)
245250
self._registry.register_constant_serializer(LongSerializer())
246251
self._registry.register_constant_serializer(FloatSerializer())
247252
self._registry.register_constant_serializer(DoubleSerializer(), float)
248253
self._registry.register_constant_serializer(UuidSerializer(), uuid.UUID)
249-
self._registry.register_constant_serializer(StringSerializer())
254+
self._registry.register_constant_serializer(StringSerializer(), str)
250255
# Arrays of primitives and String
251256
self._registry.register_constant_serializer(ByteArraySerializer(), bytearray)
252257
self._registry.register_constant_serializer(BooleanArraySerializer())
@@ -496,6 +501,22 @@ def register_from_super_type(self, obj_type, super_type) -> typing.Optional[Stre
496501
self.safe_register_serializer(serializer, obj_type)
497502
return serializer
498503

504+
def validate(self):
505+
"""
506+
Makes sure that the classes registered as Compact serializable are not
507+
overriding the default serializers.
508+
509+
Must be called in the constructor of the serialization service after it
510+
completes registering default serializers.
511+
"""
512+
for compact_type in self._compact_types:
513+
if compact_type in self._constant_type_dict:
514+
raise IllegalArgumentError(
515+
f"Compact serializer for the class {compact_type}' can not be "
516+
f"registered as it overrides the default serializer for that "
517+
f"class provided by Hazelcast."
518+
)
519+
499520
def destroy(self):
500521
for serializer in list(self._type_dict.values()):
501522
serializer.destroy()

tests/unit/serialization/compact_test.py

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from parameterized import parameterized
77

88
from hazelcast.config import Config
9-
from hazelcast.errors import HazelcastSerializationError
9+
from hazelcast.errors import HazelcastSerializationError, IllegalArgumentError
1010
from hazelcast.serialization import SerializationServiceV1
1111
from hazelcast.serialization.api import CompactSerializer, CompactReader, CompactWriter, FieldKind
1212
from hazelcast.serialization.compact import (
@@ -198,6 +198,12 @@ def test_schema_writer(self):
198198
for name, kind in fields:
199199
self.assertEqual(kind, schema.fields.get(name).kind)
200200

201+
def test_schema_writer_with_duplicate_field_names(self):
202+
writer = SchemaWriter("foo")
203+
writer.write_int32("bar", 42)
204+
with self.assertRaisesRegex(HazelcastSerializationError, "already exists"):
205+
writer.write_string("bar", "42")
206+
201207

202208
class Child:
203209
def __init__(self, name: str):
@@ -214,7 +220,7 @@ def read(self, reader: CompactReader):
214220
name = reader.read_string("name")
215221
return Child(name)
216222

217-
def write(self, writer: CompactWriter, obj: Parent):
223+
def write(self, writer: CompactWriter, obj: Child):
218224
writer.write_string("name", obj.name)
219225

220226
def get_type_name(self):
@@ -259,3 +265,49 @@ def test_missing_serializer(self):
259265
):
260266
obj = Parent(Child("test"))
261267
self._serialize(service, obj)
268+
269+
270+
class CompactSerializationTest(unittest.TestCase):
271+
def test_overriding_default_serializers(self):
272+
config = Config()
273+
config.compact_serializers = [StringCompactSerializer()]
274+
275+
with self.assertRaisesRegex(IllegalArgumentError, "can not be registered as it overrides"):
276+
SerializationServiceV1(config)
277+
278+
def test_serializer_with_duplicate_field_names(self):
279+
config = Config()
280+
config.compact_serializers = [SerializerWithDuplicateFieldsNames()]
281+
282+
service = SerializationServiceV1(config)
283+
with self.assertRaisesRegex(HazelcastSerializationError, "already exists"):
284+
service.to_data(Child("foo"))
285+
286+
287+
class StringCompactSerializer(CompactSerializer[str]):
288+
def read(self, reader: CompactReader) -> str:
289+
pass
290+
291+
def write(self, writer: CompactWriter, obj: str) -> None:
292+
pass
293+
294+
def get_class(self) -> typing.Type[str]:
295+
return str
296+
297+
def get_type_name(self) -> str:
298+
return "str"
299+
300+
301+
class SerializerWithDuplicateFieldsNames(CompactSerializer[Child]):
302+
def read(self, reader: CompactReader) -> Child:
303+
pass
304+
305+
def write(self, writer: CompactWriter, obj: Child) -> None:
306+
writer.write_string("name", obj.name)
307+
writer.write_string("name", obj.name)
308+
309+
def get_class(self) -> typing.Type[Child]:
310+
return Child
311+
312+
def get_type_name(self) -> str:
313+
return "child"

0 commit comments

Comments
 (0)