Skip to content

Commit 70ec36a

Browse files
authored
Merge pull request #72 from Stranger6667/dd/remove-caching
Remove internal caching due to hash collisions
2 parents 3014c4c + aac3ba0 commit 70ec36a

File tree

2 files changed

+5
-57
lines changed

2 files changed

+5
-57
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Changelog
22

3+
- Remove internal caching due to hash collisions (#71)
4+
35
#### 0.18.1 - 2020-11-21
46
- Canonicalise `anyOf` special cases when all subschemas have only the `type` keyword
57

src/hypothesis_jsonschema/_encode.py

Lines changed: 3 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
"""Canonical encoding for the JSONSchema semantics, where 1 == 1.0."""
2-
import functools
32
import json
43
import math
54
from json.encoder import _make_iterencode, encode_basestring_ascii # type: ignore
6-
from typing import Any, Callable, Dict, Tuple, Type, Union
5+
from typing import Any, Dict, Tuple, Union
76

87
# Mypy does not (yet!) support recursive type definitions.
98
# (and writing a few steps by hand is a DoS attack on the AST walker in Pytest)
@@ -36,62 +35,9 @@ def floatstr(o: float) -> str:
3635
)(o, 0)
3736

3837

39-
def _make_cache_key(
40-
value: JSONType,
41-
) -> Tuple[Type, Union[None, bool, float, str, tuple, frozenset]]:
42-
"""Make a hashable object from any JSON value.
43-
44-
The idea is to recursively convert all mutable values to immutable and adding values types as a discriminant.
45-
"""
46-
if isinstance(value, dict):
47-
return (dict, frozenset((k, _make_cache_key(v)) for k, v in value.items()))
48-
if isinstance(value, list):
49-
return (list, tuple(map(_make_cache_key, value)))
50-
# Primitive types are hashable
51-
# `type` is needed to distinguish false-ish values - 0, "", False have the same hash (0)
52-
return (type(value), value)
53-
54-
55-
class HashedJSON:
56-
"""A proxy that holds a JSON value.
57-
58-
Adds a capability for the inner value to be cached, loosely based on `functools._HashedSeq`.
59-
"""
60-
61-
__slots__ = ("value", "hashedvalue")
62-
63-
def __init__(self, value: JSONType):
64-
self.value = value
65-
# `hash` is called multiple times on cache miss, therefore it is evaluated only once
66-
self.hashedvalue = hash(_make_cache_key(value))
67-
68-
def __hash__(self) -> int:
69-
return self.hashedvalue
70-
71-
def __eq__(self, other: "HashedJSON") -> bool: # type: ignore
72-
# TYPES: This class should be used only for caching purposes and there should be
73-
# no values of other types to compare
74-
return self.hashedvalue == other.hashedvalue
75-
76-
77-
def cached_json(func: Callable[[HashedJSON], str]) -> Callable[[JSONType], str]:
78-
"""Cache calls to `encode_canonical_json`.
79-
80-
The same schemas are encoded multiple times during canonicalisation and caching gives visible performance impact.
81-
"""
82-
cached_func = functools.lru_cache(maxsize=1024)(func)
83-
84-
@functools.wraps(cached_func)
85-
def wrapped(value: JSONType) -> str:
86-
return cached_func(HashedJSON(value))
87-
88-
return wrapped
89-
90-
91-
@cached_json
92-
def encode_canonical_json(value: HashedJSON) -> str:
38+
def encode_canonical_json(value: JSONType) -> str:
9339
"""Canonical form serialiser, for uniqueness testing."""
94-
return json.dumps(value.value, sort_keys=True, cls=CanonicalisingJsonEncoder)
40+
return json.dumps(value, sort_keys=True, cls=CanonicalisingJsonEncoder)
9541

9642

9743
def sort_key(value: JSONType) -> Tuple[int, float, Union[float, str]]:

0 commit comments

Comments
 (0)