Skip to content

Commit 4392b5e

Browse files
jbapplelemire
authored andcommitted
Add fuse filters, which can accommodate higher load (#11)
For large enough sets of keys, Dietzfelbinger & Walzer's fuse filters, described in "Dense Peelable Random Uniform Hypergraphs", https://arxiv.org/abs/1907.04749, can accomodate fill factors up to 87.9% full, rather than 1 / 1.23 = 81.3%. This patch adds an 8-bit version of fuse filters. It does not yet add 16-bit or buffered filters, which are future (rote) work.
1 parent 15669f9 commit 4392b5e

File tree

4 files changed

+380
-2
lines changed

4 files changed

+380
-2
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
all: unit bench
22

3-
unit : tests/unit.c include/xorfilter.h
3+
unit : tests/unit.c include/xorfilter.h include/fusefilter.h
44
cc -std=c99 -O3 -o unit tests/unit.c -Iinclude -Wall -Wextra -Wshadow -Wcast-qual
55

6-
bench : benchmarks/bench.c include/xorfilter.h
6+
bench : benchmarks/bench.c include/xorfilter.h include/fusefilter.h
77
cc -std=c99 -O3 -o bench benchmarks/bench.c -Iinclude -Wall -Wextra -Wshadow -Wcast-qual
88

99
test: unit

benchmarks/bench.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include "fusefilter.h"
12
#include "xorfilter.h"
23
#include <assert.h>
34
#include <time.h>
@@ -111,9 +112,38 @@ bool testbufferedxor16(size_t size) {
111112
return true;
112113
}
113114

115+
bool testfuse8(size_t size) {
116+
printf("testing fuse8 ");
117+
printf("size = %zu \n", size);
118+
119+
fuse8_t filter;
120+
121+
fuse8_allocate(size, &filter);
122+
// we need some set of values
123+
uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size);
124+
for (size_t i = 0; i < size; i++) {
125+
big_set[i] = i; // we use contiguous values
126+
}
127+
// we construct the filter
128+
fuse8_populate(big_set, size, &filter); // warm the cache
129+
for (size_t times = 0; times < 5; times++) {
130+
clock_t t;
131+
t = clock();
132+
fuse8_populate(big_set, size, &filter);
133+
t = clock() - t;
134+
double time_taken = ((double)t) / CLOCKS_PER_SEC; // in seconds
135+
printf("It took %f seconds to build an index over %zu values. \n",
136+
time_taken, size);
137+
}
138+
fuse8_free(&filter);
139+
free(big_set);
140+
return true;
141+
}
142+
114143
int main() {
115144
for (size_t s = 10000000; s <= 10000000; s *= 10) {
116145

146+
testfuse8(s);
117147
testbufferedxor8(s);
118148
testxor8(s);
119149
testbufferedxor16(s);

include/fusefilter.h

Lines changed: 310 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
#ifndef FUSEFILTER_H
2+
#define FUSEFILTER_H
3+
#include <stdbool.h>
4+
#include <stddef.h>
5+
#include <stdint.h>
6+
#include <stdio.h>
7+
#include <stdlib.h>
8+
#include <string.h>
9+
10+
/**
11+
* We assume that you have a large set of 64-bit integers
12+
* and you want a data structure to do membership tests using
13+
* no more than ~8 or ~16 bits per key. If your initial set
14+
* is made of strings or other types, you first need to hash them
15+
* to a 64-bit integer.
16+
*/
17+
18+
/**
19+
* We start with a few utilities.
20+
***/
21+
static inline uint64_t fuse_murmur64(uint64_t h) {
22+
h ^= h >> 33;
23+
h *= UINT64_C(0xff51afd7ed558ccd);
24+
h ^= h >> 33;
25+
h *= UINT64_C(0xc4ceb9fe1a85ec53);
26+
h ^= h >> 33;
27+
return h;
28+
}
29+
30+
static inline uint64_t fuse_mix_split(uint64_t key, uint64_t seed) {
31+
return fuse_murmur64(key + seed);
32+
}
33+
34+
static inline uint64_t fuse_rotl64(uint64_t n, unsigned int c) {
35+
return (n << (c & 63)) | (n >> ((-c) & 63));
36+
}
37+
38+
static inline uint32_t fuse_reduce(uint32_t hash, uint32_t n) {
39+
// http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
40+
return (uint32_t)(((uint64_t)hash * n) >> 32);
41+
}
42+
43+
static inline uint64_t fuse_fingerprint(uint64_t hash) {
44+
return hash ^ (hash >> 32);
45+
}
46+
47+
/**
48+
* We need a decent random number generator.
49+
**/
50+
51+
// returns random number, modifies the seed
52+
static inline uint64_t fuse_rng_splitmix64(uint64_t *seed) {
53+
uint64_t z = (*seed += UINT64_C(0x9E3779B97F4A7C15));
54+
z = (z ^ (z >> 30)) * UINT64_C(0xBF58476D1CE4E5B9);
55+
z = (z ^ (z >> 27)) * UINT64_C(0x94D049BB133111EB);
56+
return z ^ (z >> 31);
57+
}
58+
59+
#define FUSE_ARITY 3
60+
#define FUSE_SEGMENT_COUNT 100
61+
#define FUSE_SLOTS (FUSE_SEGMENT_COUNT + FUSE_ARITY - 1)
62+
63+
/**
64+
* fuse8 is the recommended default, no more than
65+
* a 0.3% false-positive probability.
66+
*/
67+
typedef struct fuse8_s {
68+
uint64_t seed;
69+
uint64_t segmentLength; // = slotCount / FUSE_SLOTS
70+
uint8_t
71+
*fingerprints; // after fuse8_allocate, will point to 3*blockLength values
72+
} fuse8_t;
73+
74+
// Report if the key is in the set, with false positive rate.
75+
static inline bool fuse8_contain(uint64_t key, const fuse8_t *filter) {
76+
uint64_t hash = fuse_mix_split(key, filter->seed);
77+
uint8_t f = fuse_fingerprint(hash);
78+
uint32_t r0 = (uint32_t)hash;
79+
uint32_t r1 = (uint32_t)fuse_rotl64(hash, 21);
80+
uint32_t r2 = (uint32_t)fuse_rotl64(hash, 42);
81+
uint32_t r3 = (0xBF58476D1CE4E5B9 * hash) >> 32;
82+
uint32_t seg = fuse_reduce(r0, FUSE_SEGMENT_COUNT);
83+
uint32_t h0 = (seg + 0) * filter->segmentLength + fuse_reduce(r1, filter->segmentLength);
84+
uint32_t h1 = (seg + 1) * filter->segmentLength + fuse_reduce(r2, filter->segmentLength);
85+
uint32_t h2 = (seg + 2) * filter->segmentLength + fuse_reduce(r3, filter->segmentLength);
86+
return f == (filter->fingerprints[h0] ^ filter->fingerprints[h1] ^
87+
filter->fingerprints[h2]);
88+
}
89+
90+
// allocate enough capacity for a set containing up to 'size' elements
91+
// caller is responsible to call fuse8_free(filter)
92+
static inline bool fuse8_allocate(uint32_t size, fuse8_t *filter) {
93+
size_t capacity = 1.0 / 0.879 * size;
94+
capacity = capacity / FUSE_SLOTS * FUSE_SLOTS;
95+
filter->fingerprints = (uint8_t *)malloc(capacity * sizeof(uint8_t));
96+
if (filter->fingerprints != NULL) {
97+
filter->segmentLength = capacity / FUSE_SLOTS;
98+
return true;
99+
} else {
100+
return false;
101+
}
102+
}
103+
104+
// report memory usage
105+
static inline size_t fuse8_size_in_bytes(const fuse8_t *filter) {
106+
return FUSE_SLOTS * filter->segmentLength * sizeof(uint8_t) + sizeof(fuse8_t);
107+
}
108+
109+
// release memory
110+
static inline void fuse8_free(fuse8_t *filter) {
111+
free(filter->fingerprints);
112+
filter->fingerprints = NULL;
113+
filter->segmentLength = 0;
114+
}
115+
116+
struct fuse_fuseset_s {
117+
uint64_t fusemask;
118+
uint32_t count;
119+
};
120+
121+
typedef struct fuse_fuseset_s fuse_fuseset_t;
122+
123+
struct fuse_hashes_s {
124+
uint64_t h;
125+
uint32_t h0;
126+
uint32_t h1;
127+
uint32_t h2;
128+
};
129+
130+
typedef struct fuse_hashes_s fuse_hashes_t;
131+
132+
static inline fuse_hashes_t fuse8_get_h0_h1_h2(uint64_t k, const fuse8_t *filter) {
133+
uint64_t hash = fuse_mix_split(k, filter->seed);
134+
fuse_hashes_t answer;
135+
answer.h = hash;
136+
uint32_t r0 = (uint32_t)hash;
137+
uint32_t r1 = (uint32_t)fuse_rotl64(hash, 21);
138+
uint32_t r2 = (uint32_t)fuse_rotl64(hash, 42);
139+
uint32_t r3 = (0xBF58476D1CE4E5B9 * hash) >> 32;
140+
uint32_t seg = fuse_reduce(r0, FUSE_SEGMENT_COUNT);
141+
answer.h0 = (seg + 0) * filter->segmentLength + fuse_reduce(r1, filter->segmentLength);
142+
answer.h1 = (seg + 1) * filter->segmentLength + fuse_reduce(r2, filter->segmentLength);
143+
answer.h2 = (seg + 2) * filter->segmentLength + fuse_reduce(r3, filter->segmentLength);
144+
return answer;
145+
}
146+
147+
struct fuse_h0h1h2_s {
148+
uint32_t h0;
149+
uint32_t h1;
150+
uint32_t h2;
151+
};
152+
153+
typedef struct fuse_h0h1h2_s fuse_h0h1h2_t;
154+
155+
static inline fuse_h0h1h2_t fuse8_get_just_h0_h1_h2(uint64_t hash,
156+
const fuse8_t *filter) {
157+
fuse_h0h1h2_t answer;
158+
uint32_t r0 = (uint32_t)hash;
159+
uint32_t r1 = (uint32_t)fuse_rotl64(hash, 21);
160+
uint32_t r2 = (uint32_t)fuse_rotl64(hash, 42);
161+
uint32_t r3 = (0xBF58476D1CE4E5B9 * hash) >> 32;
162+
uint32_t seg = fuse_reduce(r0, FUSE_SEGMENT_COUNT);
163+
answer.h0 = (seg + 0) * filter->segmentLength + fuse_reduce(r1, filter->segmentLength);
164+
answer.h1 = (seg + 1) * filter->segmentLength + fuse_reduce(r2, filter->segmentLength);
165+
answer.h2 = (seg + 2) * filter->segmentLength + fuse_reduce(r3, filter->segmentLength);
166+
return answer;
167+
}
168+
169+
struct fuse_keyindex_s {
170+
uint64_t hash;
171+
uint32_t index;
172+
};
173+
174+
typedef struct fuse_keyindex_s fuse_keyindex_t;
175+
176+
struct fuse_setbuffer_s {
177+
fuse_keyindex_t *buffer;
178+
uint32_t *counts;
179+
int insignificantbits;
180+
uint32_t slotsize; // should be 1<< insignificantbits
181+
uint32_t slotcount;
182+
size_t originalsize;
183+
};
184+
185+
//
186+
// construct the filter, returns true on success, false on failure.
187+
// most likely, a failure is due to too high a memory usage
188+
// size is the number of keys
189+
// The caller is responsable for calling fuse8_allocate(size,filter) before.
190+
// The caller is responsible to ensure that there are no duplicated keys.
191+
//
192+
bool fuse8_populate(const uint64_t *keys, uint32_t size, fuse8_t *filter) {
193+
uint64_t rng_counter = 1;
194+
filter->seed = fuse_rng_splitmix64(&rng_counter);
195+
size_t arrayLength = filter->segmentLength * FUSE_SLOTS; // size of the backing array
196+
//size_t segmentLength = filter->segmentLength;
197+
fuse_fuseset_t *sets =
198+
(fuse_fuseset_t *)malloc(arrayLength * sizeof(fuse_fuseset_t));
199+
200+
fuse_keyindex_t *Q =
201+
(fuse_keyindex_t *)malloc(arrayLength * sizeof(fuse_keyindex_t));
202+
203+
fuse_keyindex_t *stack =
204+
(fuse_keyindex_t *)malloc(size * sizeof(fuse_keyindex_t));
205+
206+
if ((sets == NULL) || (Q == NULL) || (stack == NULL)) {
207+
free(sets);
208+
free(Q);
209+
free(stack);
210+
return false;
211+
}
212+
213+
for (int loop = 0; true; ++loop) {
214+
// if (loop > 0 && 0 == (loop & (loop - 1))) fprintf(stderr, "loop %d\n", loop);
215+
memset(sets, 0, sizeof(fuse_fuseset_t) * arrayLength);
216+
for (size_t i = 0; i < size; i++) {
217+
uint64_t key = keys[i];
218+
fuse_hashes_t hs = fuse8_get_h0_h1_h2(key, filter);
219+
sets[hs.h0].fusemask ^= hs.h;
220+
sets[hs.h0].count++;
221+
sets[hs.h1].fusemask ^= hs.h;
222+
sets[hs.h1].count++;
223+
sets[hs.h2].fusemask ^= hs.h;
224+
sets[hs.h2].count++;
225+
}
226+
// todo: the flush should be sync with the detection that follows
227+
// scan for values with a count of one
228+
size_t Qsize = 0;
229+
for (size_t i = 0; i < arrayLength; i++) {
230+
if (sets[i].count == 1) {
231+
Q[Qsize].index = i;
232+
Q[Qsize].hash = sets[i].fusemask;
233+
Qsize++;
234+
}
235+
}
236+
237+
size_t stack_size = 0;
238+
while (Qsize > 0) {
239+
fuse_keyindex_t keyindex = Q[--Qsize];
240+
size_t index = keyindex.index;
241+
if (sets[index].count == 0)
242+
continue; // not actually possible after the initial scan.
243+
// sets0[index].count = 0;
244+
uint64_t hash = keyindex.hash;
245+
fuse_h0h1h2_t hs = fuse8_get_just_h0_h1_h2(hash, filter);
246+
247+
stack[stack_size] = keyindex;
248+
stack_size++;
249+
250+
//if (hs.h0 != index) {
251+
sets[hs.h0].fusemask ^= hash;
252+
sets[hs.h0].count--;
253+
if (sets[hs.h0].count == 1) {
254+
Q[Qsize].index = hs.h0;
255+
Q[Qsize].hash = sets[hs.h0].fusemask;
256+
Qsize++;
257+
}
258+
//}
259+
260+
//if (hs.h1 != index) {
261+
sets[hs.h1].fusemask ^= hash;
262+
sets[hs.h1].count--;
263+
if (sets[hs.h1].count == 1) {
264+
Q[Qsize].index = hs.h1;
265+
Q[Qsize].hash = sets[hs.h1].fusemask;
266+
Qsize++;
267+
}
268+
//}
269+
270+
//if (hs.h2 != index) {
271+
sets[hs.h2].fusemask ^= hash;
272+
sets[hs.h2].count--;
273+
if (sets[hs.h2].count == 1) {
274+
Q[Qsize].index = hs.h2;
275+
Q[Qsize].hash = sets[hs.h2].fusemask;
276+
Qsize++;
277+
}
278+
//}
279+
}
280+
281+
if (stack_size == size) {
282+
// success
283+
break;
284+
}
285+
286+
filter->seed = fuse_rng_splitmix64(&rng_counter);
287+
}
288+
289+
size_t stack_size = size;
290+
while (stack_size > 0) {
291+
fuse_keyindex_t ki = stack[--stack_size];
292+
fuse_h0h1h2_t hs = fuse8_get_just_h0_h1_h2(ki.hash, filter);
293+
uint8_t hsh = fuse_fingerprint(ki.hash);
294+
if(ki.index == hs.h0) {
295+
hsh ^= filter->fingerprints[hs.h1] ^ filter->fingerprints[hs.h2];
296+
} else if(ki.index == hs.h1) {
297+
hsh ^= filter->fingerprints[hs.h0] ^ filter->fingerprints[hs.h2];
298+
} else {
299+
hsh ^= filter->fingerprints[hs.h0] ^ filter->fingerprints[hs.h1];
300+
}
301+
filter->fingerprints[ki.index] = hsh;
302+
}
303+
304+
free(sets);
305+
free(Q);
306+
free(stack);
307+
return true;
308+
}
309+
310+
#endif

0 commit comments

Comments
 (0)