Skip to content

Commit

Permalink
Rework SerializableByKey handling to improve performance (#6469)
Browse files Browse the repository at this point in the history
  • Loading branch information
maffoo authored Feb 20, 2024
1 parent a4ec796 commit 33eea01
Show file tree
Hide file tree
Showing 14 changed files with 7,585 additions and 3,891 deletions.
229 changes: 50 additions & 179 deletions cirq-core/cirq/protocols/json_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
Optional,
overload,
Sequence,
Set,
Tuple,
Type,
Union,
Expand Down Expand Up @@ -221,10 +220,22 @@ class CirqEncoder(json.JSONEncoder):
See https://github.com/quantumlib/Cirq/issues/2014
"""

def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self._memo: dict[Any, dict] = {}

def default(self, o):
# Object with custom method?
if hasattr(o, '_json_dict_'):
return _json_dict_with_cirq_type(o)
json_dict = _json_dict_with_cirq_type(o)
if isinstance(o, SerializableByKey):
if ref := self._memo.get(o):
return ref
key = len(self._memo)
ref = {"cirq_type": "REF", "key": key}
self._memo[o] = ref
return {"cirq_type": "VAL", "key": key, "val": json_dict}
return json_dict

# Sympy object? (Must come before general number checks.)
# TODO: More support for sympy
Expand Down Expand Up @@ -306,27 +317,46 @@ def default(self, o):
return super().default(o) # pragma: no cover


def _cirq_object_hook(d, resolvers: Sequence[JsonResolver], context_map: Dict[str, Any]):
if 'cirq_type' not in d:
return d
class ObjectHook:
"""Callable to be used as object_hook during deserialization."""

LEGACY_CONTEXT_TYPES = {'_ContextualSerialization', '_SerializedKey', '_SerializedContext'}

def __init__(self, resolvers: Sequence[JsonResolver]) -> None:
self.resolvers = resolvers
self.memo: Dict[int, SerializableByKey] = {}
self.context_map: Dict[int, SerializableByKey] = {}

if d['cirq_type'] == '_SerializedKey':
return _SerializedKey.read_from_context(context_map, **d)
def __call__(self, d):
cirq_type = d.get('cirq_type')
if cirq_type is None:
return d

if d['cirq_type'] == '_SerializedContext':
_SerializedContext.update_context(context_map, **d)
return None
if cirq_type == 'VAL':
obj = d['val']
self.memo[d['key']] = obj
return obj

if d['cirq_type'] == '_ContextualSerialization':
return _ContextualSerialization.deserialize_with_context(**d)
if cirq_type == 'REF':
return self.memo[d['key']]

cls = factory_from_json(d['cirq_type'], resolvers=resolvers)
from_json_dict = getattr(cls, '_from_json_dict_', None)
if from_json_dict is not None:
return from_json_dict(**d)
# Deserialize from legacy "contextual serialization" format
if cirq_type in self.LEGACY_CONTEXT_TYPES:
if cirq_type == '_SerializedKey':
return self.context_map[d['key']]
if cirq_type == '_SerializedContext':
self.context_map[d['key']] = d['obj']
return None
if cirq_type == '_ContextualSerialization':
return d['object_dag'][-1]

del d['cirq_type']
return cls(**d)
cls = factory_from_json(cirq_type, resolvers=self.resolvers)
from_json_dict = getattr(cls, '_from_json_dict_', None)
if from_json_dict is not None:
return from_json_dict(**d)

del d['cirq_type']
return cls(**d)


class SerializableByKey(SupportsJSON):
Expand All @@ -338,137 +368,6 @@ class SerializableByKey(SupportsJSON):
"""


class _SerializedKey(SupportsJSON):
"""Internal object for holding a SerializableByKey key.
This is a private type used in contextual serialization. Its deserialization
is context-dependent, and is not expected to match the original; in other
words, `cls._from_json_dict_(obj._json_dict_())` does not return
the original `obj` for this type.
"""

def __init__(self, key: str):
self.key = key

def _json_dict_(self):
return obj_to_dict_helper(self, ['key'])

@classmethod
def _from_json_dict_(cls, **kwargs):
raise TypeError(f'Internal error: {cls} should never deserialize with _from_json_dict_.')

@classmethod
def read_from_context(cls, context_map, key, **kwargs):
return context_map[key]


class _SerializedContext(SupportsJSON):
"""Internal object for a single SerializableByKey key-to-object mapping.
This is a private type used in contextual serialization. Its deserialization
is context-dependent, and is not expected to match the original; in other
words, `cls._from_json_dict_(obj._json_dict_())` does not return
the original `obj` for this type.
"""

def __init__(self, obj: SerializableByKey, uid: int):
self.key = uid
self.obj = obj

def _json_dict_(self):
return obj_to_dict_helper(self, ['key', 'obj'])

@classmethod
def _from_json_dict_(cls, **kwargs):
raise TypeError(f'Internal error: {cls} should never deserialize with _from_json_dict_.')

@classmethod
def update_context(cls, context_map, key, obj, **kwargs):
context_map.update({key: obj})


class _ContextualSerialization(SupportsJSON):
"""Internal object for serializing an object with its context.
This is a private type used in contextual serialization. Its deserialization
is context-dependent, and is not expected to match the original; in other
words, `cls._from_json_dict_(obj._json_dict_())` does not return
the original `obj` for this type.
"""

def __init__(self, obj: Any):
# Context information and the wrapped object are stored together in
# `object_dag` to ensure consistent serialization ordering.
self.object_dag = []
context = []
for sbk in get_serializable_by_keys(obj):
if sbk not in context:
context.append(sbk)
new_sc = _SerializedContext(sbk, len(context))
self.object_dag.append(new_sc)
self.object_dag += [obj]

def _json_dict_(self):
return obj_to_dict_helper(self, ['object_dag'])

@classmethod
def _from_json_dict_(cls, **kwargs):
raise TypeError(f'Internal error: {cls} should never deserialize with _from_json_dict_.')

@classmethod
def deserialize_with_context(cls, object_dag, **kwargs):
# The last element of object_dag is the object to be deserialized.
return object_dag[-1]


def has_serializable_by_keys(obj: Any) -> bool:
"""Returns true if obj contains one or more SerializableByKey objects."""
if isinstance(obj, SerializableByKey):
return True
json_dict = getattr(obj, '_json_dict_', lambda: None)()
if isinstance(json_dict, Dict):
return any(has_serializable_by_keys(v) for v in json_dict.values())

# Handle primitive container types.
if isinstance(obj, Dict):
return any(has_serializable_by_keys(elem) for pair in obj.items() for elem in pair)

if hasattr(obj, '__iter__') and not isinstance(obj, str):
# Return False on TypeError because some numpy values
# (like np.array(1)) have iterable methods
# yet return a TypeError when there is an attempt to iterate over them
try:
return any(has_serializable_by_keys(elem) for elem in obj)
except TypeError:
return False
return False


def get_serializable_by_keys(obj: Any) -> List[SerializableByKey]:
"""Returns all SerializableByKeys contained by obj.
Objects are ordered such that nested objects appear before the object they
are nested inside. This is required to ensure SerializableByKeys are only
fully defined once in serialization.
"""
result = []
if isinstance(obj, SerializableByKey):
result.append(obj)
json_dict = getattr(obj, '_json_dict_', lambda: None)()
if isinstance(json_dict, Dict):
for v in json_dict.values():
result = get_serializable_by_keys(v) + result
if result:
return result

# Handle primitive container types.
if isinstance(obj, Dict):
return [sbk for pair in obj.items() for sbk in get_serializable_by_keys(pair)]
if hasattr(obj, '__iter__') and not isinstance(obj, str):
return [sbk for v in obj for sbk in get_serializable_by_keys(v)]
return []


def json_namespace(type_obj: Type) -> str:
"""Returns a namespace for JSON serialization of `type_obj`.
Expand Down Expand Up @@ -610,37 +509,12 @@ def to_json(
party classes, prefer adding the `_json_dict_` magic method
to your classes rather than overriding this default.
"""
if has_serializable_by_keys(obj):
obj = _ContextualSerialization(obj)

class ContextualEncoder(cls): # type: ignore
"""An encoder with a context map for concise serialization."""

# These lists populate gradually during serialization. An object
# with components defined in 'context' will represent those
# components using their keys instead of inline definition.
seen: Set[str] = set()

def default(self, o):
if not isinstance(o, SerializableByKey):
return super().default(o)
for candidate in obj.object_dag[:-1]:
if candidate.obj == o:
if not candidate.key in ContextualEncoder.seen:
ContextualEncoder.seen.add(candidate.key)
return _json_dict_with_cirq_type(candidate.obj)
else:
return _json_dict_with_cirq_type(_SerializedKey(candidate.key))
raise ValueError("Object mutated during serialization.") # pragma: no cover

cls = ContextualEncoder

if file_or_fn is None:
return json.dumps(obj, indent=indent, separators=separators, cls=cls)

if isinstance(file_or_fn, (str, pathlib.Path)):
with open(file_or_fn, 'w') as actually_a_file:
json.dump(obj, actually_a_file, indent=indent, cls=cls)
json.dump(obj, actually_a_file, indent=indent, separators=separators, cls=cls)
return None

json.dump(obj, file_or_fn, indent=indent, separators=separators, cls=cls)
Expand Down Expand Up @@ -682,10 +556,7 @@ def read_json(
if resolvers is None:
resolvers = DEFAULT_RESOLVERS

context_map: Dict[str, 'SerializableByKey'] = {}

def obj_hook(x):
return _cirq_object_hook(x, resolvers, context_map)
obj_hook = ObjectHook(resolvers)

if json_text is not None:
return json.loads(json_text, object_hook=obj_hook)
Expand Down
50 changes: 11 additions & 39 deletions cirq-core/cirq/protocols/json_serialization_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,11 @@ def __eq__(self, other):
and self.data_dict == other.data_dict
)

def __hash__(self):
return hash(
(self.name, tuple(self.data_list), self.data_tuple, frozenset(self.data_dict.items()))
)

def _json_dict_(self):
return {
"name": self.name,
Expand All @@ -386,12 +391,12 @@ def _from_json_dict_(cls, name, data_list, data_tuple, data_dict, **kwargs):
return cls(name, data_list, tuple(data_tuple), data_dict)


def test_context_serialization():
def test_serializable_by_key():
def custom_resolver(name):
if name == 'SBKImpl':
return SBKImpl

test_resolvers = [custom_resolver] + cirq.DEFAULT_RESOLVERS
test_resolvers = [custom_resolver, *cirq.DEFAULT_RESOLVERS]

sbki_empty = SBKImpl('sbki_empty')
assert_json_roundtrip_works(sbki_empty, resolvers=test_resolvers)
Expand All @@ -406,55 +411,22 @@ def custom_resolver(name):
assert_json_roundtrip_works(sbki_dict, resolvers=test_resolvers)

sbki_json = str(cirq.to_json(sbki_dict))
# There should be exactly one context item for each previous SBKImpl.
assert sbki_json.count('"cirq_type": "_SerializedContext"') == 4
# There should be exactly two key items for each of sbki_(empty|list|tuple),
# plus one for the top-level sbki_dict.
assert sbki_json.count('"cirq_type": "_SerializedKey"') == 7
# The final object should be a _SerializedKey for sbki_dict.
final_obj_idx = sbki_json.rfind('{')
final_obj = sbki_json[final_obj_idx : sbki_json.find('}', final_obj_idx) + 1]
assert (
final_obj
== """{
"cirq_type": "_SerializedKey",
"key": 4
}"""
)
# There are 4 SBKImpl instances, one each for empty, list, tuple, dict.
assert sbki_json.count('"cirq_type": "VAL"') == 4
# There are 3 SBKImpl refs, one each for empty, list, and tuple.
assert sbki_json.count('"cirq_type": "REF"') == 3

list_sbki = [sbki_dict]
assert_json_roundtrip_works(list_sbki, resolvers=test_resolvers)

dict_sbki = {'a': sbki_dict}
assert_json_roundtrip_works(dict_sbki, resolvers=test_resolvers)

assert sbki_list != json_serialization._SerializedKey(sbki_list)

# Serialization keys have unique suffixes.
sbki_other_list = SBKImpl('sbki_list', data_list=[sbki_list])
assert_json_roundtrip_works(sbki_other_list, resolvers=test_resolvers)


def test_internal_serializer_types():
sbki = SBKImpl('test_key')
key = 1
test_key = json_serialization._SerializedKey(key)
test_context = json_serialization._SerializedContext(sbki, 1)
test_serialization = json_serialization._ContextualSerialization(sbki)

key_json = test_key._json_dict_()
with pytest.raises(TypeError, match='_from_json_dict_'):
_ = json_serialization._SerializedKey._from_json_dict_(**key_json)

context_json = test_context._json_dict_()
with pytest.raises(TypeError, match='_from_json_dict_'):
_ = json_serialization._SerializedContext._from_json_dict_(**context_json)

serialization_json = test_serialization._json_dict_()
with pytest.raises(TypeError, match='_from_json_dict_'):
_ = json_serialization._ContextualSerialization._from_json_dict_(**serialization_json)


# during test setup deprecated submodules are inspected and trigger the
# deprecation error in testing. It is cleaner to just turn it off than to assert
# deprecation for each submodule.
Expand Down
Loading

0 comments on commit 33eea01

Please sign in to comment.