14
14
import enum
15
15
import logging
16
16
import threading
17
- from typing import Any , Dict , Generic , Iterable , Optional , Set , TypeVar
17
+ from typing import Any , Dict , Generic , Iterable , Optional , Set , Tuple , TypeVar , Union
18
18
19
19
import attr
20
+ from typing_extensions import Literal
20
21
21
22
from synapse .util .caches .lrucache import LruCache
23
+ from synapse .util .caches .treecache import TreeCache
22
24
23
25
logger = logging .getLogger (__name__ )
24
26
33
35
34
36
# This class can't be generic because it uses slots with attrs.
35
37
# See: https://github.com/python-attrs/attrs/issues/313
36
- @attr .s (slots = True , auto_attribs = True )
38
+ @attr .s (slots = True , frozen = True , auto_attribs = True )
37
39
class DictionaryEntry : # should be: Generic[DKT, DV].
38
40
"""Returned when getting an entry from the cache
39
41
42
+ If `full` is true then `known_absent` will be the empty set.
43
+
40
44
Attributes:
41
45
full: Whether the cache has the full or dict or just some keys.
42
46
If not full then not all requested keys will necessarily be present
@@ -53,20 +57,90 @@ def __len__(self) -> int:
53
57
return len (self .value )
54
58
55
59
60
+ class _FullCacheKey (enum .Enum ):
61
+ """The key we use to cache the full dict."""
62
+
63
+ KEY = object ()
64
+
65
+
56
66
class _Sentinel (enum .Enum ):
57
67
# defining a sentinel in this way allows mypy to correctly handle the
58
68
# type of a dictionary lookup.
59
69
sentinel = object ()
60
70
61
71
72
+ class _PerKeyValue (Generic [DV ]):
73
+ """The cached value of a dictionary key. If `value` is the sentinel,
74
+ indicates that the requested key is known to *not* be in the full dict.
75
+ """
76
+
77
+ __slots__ = ["value" ]
78
+
79
+ def __init__ (self , value : Union [DV , Literal [_Sentinel .sentinel ]]) -> None :
80
+ self .value = value
81
+
82
+ def __len__ (self ) -> int :
83
+ # We add a `__len__` implementation as we use this class in a cache
84
+ # where the values are variable length.
85
+ return 1
86
+
87
+
62
88
class DictionaryCache (Generic [KT , DKT , DV ]):
63
89
"""Caches key -> dictionary lookups, supporting caching partial dicts, i.e.
64
90
fetching a subset of dictionary keys for a particular key.
91
+
92
+ This cache has two levels of key. First there is the "cache key" (of type
93
+ `KT`), which maps to a dict. The keys to that dict are the "dict key" (of
94
+ type `DKT`). The overall structure is therefore `KT->DKT->DV`. For
95
+ example, it might look like:
96
+
97
+ {
98
+ 1: { 1: "a", 2: "b" },
99
+ 2: { 1: "c" },
100
+ }
101
+
102
+ It is possible to look up either individual dict keys, or the *complete*
103
+ dict for a given cache key.
104
+
105
+ Each dict item, and the complete dict is treated as a separate LRU
106
+ entry for the purpose of cache expiry. For example, given:
107
+ dict_cache.get(1, None) -> DictionaryEntry({1: "a", 2: "b"})
108
+ dict_cache.get(1, [1]) -> DictionaryEntry({1: "a"})
109
+ dict_cache.get(1, [2]) -> DictionaryEntry({2: "b"})
110
+
111
+ ... then the cache entry for the complete dict will expire first,
112
+ followed by the cache entry for the '1' dict key, and finally that
113
+ for the '2' dict key.
65
114
"""
66
115
67
116
def __init__ (self , name : str , max_entries : int = 1000 ):
68
- self .cache : LruCache [KT , DictionaryEntry ] = LruCache (
69
- max_size = max_entries , cache_name = name , size_callback = len
117
+ # We use a single LruCache to store two different types of entries:
118
+ # 1. Map from (key, dict_key) -> dict value (or sentinel, indicating
119
+ # the key doesn't exist in the dict); and
120
+ # 2. Map from (key, _FullCacheKey.KEY) -> full dict.
121
+ #
122
+ # The former is used when explicit keys of the dictionary are looked up,
123
+ # and the latter when the full dictionary is requested.
124
+ #
125
+ # If when explicit keys are requested and not in the cache, we then look
126
+ # to see if we have the full dict and use that if we do. If found in the
127
+ # full dict each key is added into the cache.
128
+ #
129
+ # This set up allows the `LruCache` to prune the full dict entries if
130
+ # they haven't been used in a while, even when there have been recent
131
+ # queries for subsets of the dict.
132
+ #
133
+ # Typing:
134
+ # * A key of `(KT, DKT)` has a value of `_PerKeyValue`
135
+ # * A key of `(KT, _FullCacheKey.KEY)` has a value of `Dict[DKT, DV]`
136
+ self .cache : LruCache [
137
+ Tuple [KT , Union [DKT , Literal [_FullCacheKey .KEY ]]],
138
+ Union [_PerKeyValue , Dict [DKT , DV ]],
139
+ ] = LruCache (
140
+ max_size = max_entries ,
141
+ cache_name = name ,
142
+ cache_type = TreeCache ,
143
+ size_callback = len ,
70
144
)
71
145
72
146
self .name = name
@@ -91,23 +165,83 @@ def get(
91
165
Args:
92
166
key
93
167
dict_keys: If given a set of keys then return only those keys
94
- that exist in the cache.
168
+ that exist in the cache. If None then returns the full dict
169
+ if it is in the cache.
95
170
96
171
Returns:
97
- DictionaryEntry
172
+ DictionaryEntry: If `dict_keys` is not None then `DictionaryEntry`
173
+ will contain include the keys that are in the cache. If None then
174
+ will either return the full dict if in the cache, or the empty
175
+ dict (with `full` set to False) if it isn't.
98
176
"""
99
- entry = self .cache .get (key , _Sentinel .sentinel )
100
- if entry is not _Sentinel .sentinel :
101
- if dict_keys is None :
102
- return DictionaryEntry (
103
- entry .full , entry .known_absent , dict (entry .value )
104
- )
177
+ if dict_keys is None :
178
+ # The caller wants the full set of dictionary keys for this cache key
179
+ return self ._get_full_dict (key )
180
+
181
+ # We are being asked for a subset of keys.
182
+
183
+ # First go and check for each requested dict key in the cache, tracking
184
+ # which we couldn't find.
185
+ values = {}
186
+ known_absent = set ()
187
+ missing = []
188
+ for dict_key in dict_keys :
189
+ entry = self .cache .get ((key , dict_key ), _Sentinel .sentinel )
190
+ if entry is _Sentinel .sentinel :
191
+ missing .append (dict_key )
192
+ continue
193
+
194
+ assert isinstance (entry , _PerKeyValue )
195
+
196
+ if entry .value is _Sentinel .sentinel :
197
+ known_absent .add (dict_key )
105
198
else :
106
- return DictionaryEntry (
107
- entry .full ,
108
- entry .known_absent ,
109
- {k : entry .value [k ] for k in dict_keys if k in entry .value },
110
- )
199
+ values [dict_key ] = entry .value
200
+
201
+ # If we found everything we can return immediately.
202
+ if not missing :
203
+ return DictionaryEntry (False , known_absent , values )
204
+
205
+ # We are missing some keys, so check if we happen to have the full dict in
206
+ # the cache.
207
+ #
208
+ # We don't update the last access time for this cache fetch, as we
209
+ # aren't explicitly interested in the full dict and so we don't want
210
+ # requests for explicit dict keys to keep the full dict in the cache.
211
+ entry = self .cache .get (
212
+ (key , _FullCacheKey .KEY ),
213
+ _Sentinel .sentinel ,
214
+ update_last_access = False ,
215
+ )
216
+ if entry is _Sentinel .sentinel :
217
+ # Not in the cache, return the subset of keys we found.
218
+ return DictionaryEntry (False , known_absent , values )
219
+
220
+ # We have the full dict!
221
+ assert isinstance (entry , dict )
222
+
223
+ for dict_key in missing :
224
+ # We explicitly add each dict key to the cache, so that cache hit
225
+ # rates and LRU times for each key can be tracked separately.
226
+ value = entry .get (dict_key , _Sentinel .sentinel ) # type: ignore[arg-type]
227
+ self .cache [(key , dict_key )] = _PerKeyValue (value )
228
+
229
+ if value is not _Sentinel .sentinel :
230
+ values [dict_key ] = value
231
+
232
+ return DictionaryEntry (True , set (), values )
233
+
234
+ def _get_full_dict (
235
+ self ,
236
+ key : KT ,
237
+ ) -> DictionaryEntry :
238
+ """Fetch the full dict for the given key."""
239
+
240
+ # First we check if we have cached the full dict.
241
+ entry = self .cache .get ((key , _FullCacheKey .KEY ), _Sentinel .sentinel )
242
+ if entry is not _Sentinel .sentinel :
243
+ assert isinstance (entry , dict )
244
+ return DictionaryEntry (True , set (), entry )
111
245
112
246
return DictionaryEntry (False , set (), {})
113
247
@@ -117,7 +251,13 @@ def invalidate(self, key: KT) -> None:
117
251
# Increment the sequence number so that any SELECT statements that
118
252
# raced with the INSERT don't update the cache (SYN-369)
119
253
self .sequence += 1
120
- self .cache .pop (key , None )
254
+
255
+ # We want to drop all information about the dict for the given key, so
256
+ # we use `del_multi` to delete it all in one go.
257
+ #
258
+ # We ignore the type error here: `del_multi` accepts a truncated key
259
+ # (when the key type is a tuple).
260
+ self .cache .del_multi ((key ,)) # type: ignore[arg-type]
121
261
122
262
def invalidate_all (self ) -> None :
123
263
self .check_thread ()
@@ -131,7 +271,16 @@ def update(
131
271
value : Dict [DKT , DV ],
132
272
fetched_keys : Optional [Iterable [DKT ]] = None ,
133
273
) -> None :
134
- """Updates the entry in the cache
274
+ """Updates the entry in the cache.
275
+
276
+ Note: This does *not* invalidate any existing entries for the `key`.
277
+ In particular, if we add an entry for the cached "full dict" with
278
+ `fetched_keys=None`, existing entries for individual dict keys are
279
+ not invalidated. Likewise, adding entries for individual keys does
280
+ not invalidate any cached value for the full dict.
281
+
282
+ In other words: if the underlying data is *changed*, the cache must
283
+ be explicitly invalidated via `.invalidate()`.
135
284
136
285
Args:
137
286
sequence
@@ -149,20 +298,27 @@ def update(
149
298
# Only update the cache if the caches sequence number matches the
150
299
# number that the cache had before the SELECT was started (SYN-369)
151
300
if fetched_keys is None :
152
- self ._insert (key , value , set ())
301
+ self .cache [ (key , _FullCacheKey . KEY )] = value
153
302
else :
154
- self ._update_or_insert (key , value , fetched_keys )
303
+ self ._update_subset (key , value , fetched_keys )
155
304
156
- def _update_or_insert (
157
- self , key : KT , value : Dict [DKT , DV ], known_absent : Iterable [DKT ]
305
+ def _update_subset (
306
+ self , key : KT , value : Dict [DKT , DV ], fetched_keys : Iterable [DKT ]
158
307
) -> None :
159
- # We pop and reinsert as we need to tell the cache the size may have
160
- # changed
308
+ """Add the given dictionary values as explicit keys in the cache.
309
+
310
+ Args:
311
+ key: top-level cache key
312
+ value: The dictionary with all the values that we should cache
313
+ fetched_keys: The full set of dict keys that were looked up. Any keys
314
+ here not in `value` should be marked as "known absent".
315
+ """
316
+
317
+ for dict_key , dict_value in value .items ():
318
+ self .cache [(key , dict_key )] = _PerKeyValue (dict_value )
161
319
162
- entry : DictionaryEntry = self .cache .pop (key , DictionaryEntry (False , set (), {}))
163
- entry .value .update (value )
164
- entry .known_absent .update (known_absent )
165
- self .cache [key ] = entry
320
+ for dict_key in fetched_keys :
321
+ if dict_key in value :
322
+ continue
166
323
167
- def _insert (self , key : KT , value : Dict [DKT , DV ], known_absent : Set [DKT ]) -> None :
168
- self .cache [key ] = DictionaryEntry (True , known_absent , value )
324
+ self .cache [(key , dict_key )] = _PerKeyValue (_Sentinel .sentinel )
0 commit comments