|
12 | 12 | most things by construction instead of by filtering. That's the difference
|
13 | 13 | between "I'd like it to be faster" and "doesn't finish at all".
|
14 | 14 | """
|
15 |
| - |
| 15 | +import functools |
16 | 16 | import itertools
|
17 | 17 | import json
|
18 | 18 | import math
|
19 | 19 | import re
|
20 | 20 | from copy import deepcopy
|
21 | 21 | from json.encoder import _make_iterencode, encode_basestring_ascii # type: ignore
|
22 |
| -from typing import Any, Dict, List, NoReturn, Optional, Tuple, Union |
| 22 | +from typing import Any, Callable, Dict, List, NoReturn, Optional, Tuple, Type, Union |
23 | 23 |
|
24 | 24 | import jsonschema
|
25 | 25 | from hypothesis.errors import InvalidArgument
|
@@ -108,9 +108,62 @@ class HypothesisRefResolutionError(jsonschema.exceptions.RefResolutionError):
|
108 | 108 | pass
|
109 | 109 |
|
110 | 110 |
|
111 |
| -def encode_canonical_json(value: JSONType) -> str: |
| 111 | +def _make_cache_key( |
| 112 | + value: JSONType, |
| 113 | +) -> Tuple[Type, Union[Tuple, None, bool, float, str]]: |
| 114 | + """Make a hashable object from any JSON value. |
| 115 | +
|
| 116 | + The idea is to recursively convert all mutable values to immutable and adding values types as a discriminant. |
| 117 | + """ |
| 118 | + if isinstance(value, dict): |
| 119 | + return (dict, tuple((k, _make_cache_key(v)) for k, v in value.items())) |
| 120 | + if isinstance(value, list): |
| 121 | + return (list, tuple(map(_make_cache_key, value))) |
| 122 | + # Primitive types are hashable |
| 123 | + # `type` is needed to distinguish false-ish values - 0, "", False have the same hash (0) |
| 124 | + return (type(value), value) |
| 125 | + |
| 126 | + |
| 127 | +class HashedJSON: |
| 128 | + """A proxy that holds a JSON value. |
| 129 | +
|
| 130 | + Adds a capability for the inner value to be cached, loosely based on `functools._HashedSeq`. |
| 131 | + """ |
| 132 | + |
| 133 | + __slots__ = ("value", "hashedvalue") |
| 134 | + |
| 135 | + def __init__(self, value: JSONType): |
| 136 | + self.value = value |
| 137 | + # `hash` is called multiple times on cache miss, therefore it is evaluated only once |
| 138 | + self.hashedvalue = hash(_make_cache_key(value)) |
| 139 | + |
| 140 | + def __hash__(self) -> int: |
| 141 | + return self.hashedvalue |
| 142 | + |
| 143 | + def __eq__(self, other: "HashedJSON") -> bool: # type: ignore |
| 144 | + # TYPES: This class should be used only for caching purposes and there should be |
| 145 | + # no values of other types to compare |
| 146 | + return self.hashedvalue == other.hashedvalue |
| 147 | + |
| 148 | + |
| 149 | +def cached_json(func: Callable[[HashedJSON], str]) -> Callable[[JSONType], str]: |
| 150 | + """Cache calls to `encode_canonical_json`. |
| 151 | +
|
| 152 | + The same schemas are encoded multiple times during canonicalisation and caching gives visible performance impact. |
| 153 | + """ |
| 154 | + cached_func = functools.lru_cache(maxsize=1024)(func) |
| 155 | + |
| 156 | + @functools.wraps(cached_func) |
| 157 | + def wrapped(value: JSONType) -> str: |
| 158 | + return cached_func(HashedJSON(value)) |
| 159 | + |
| 160 | + return wrapped |
| 161 | + |
| 162 | + |
| 163 | +@cached_json |
| 164 | +def encode_canonical_json(value: HashedJSON) -> str: |
112 | 165 | """Canonical form serialiser, for uniqueness testing."""
|
113 |
| - return json.dumps(value, sort_keys=True, cls=CanonicalisingJsonEncoder) |
| 166 | + return json.dumps(value.value, sort_keys=True, cls=CanonicalisingJsonEncoder) |
114 | 167 |
|
115 | 168 |
|
116 | 169 | def sort_key(value: JSONType) -> Tuple[int, float, Union[float, str]]:
|
|
0 commit comments