Skip to content

Commit a53edd6

Browse files
committed
more trying
1 parent 6dd0355 commit a53edd6

File tree

1 file changed

+80
-19
lines changed

1 file changed

+80
-19
lines changed

lazy_iterate.jl

Lines changed: 80 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,48 +3,109 @@ using BitIntegers
33
using Base: count_ones
44
import Base: in!, iterate, IteratorSize
55

6-
function _hash_bloom(x, seeds, T)
6+
7+
8+
mutable struct BloomFilterNode{T,K,L}
9+
memory::T
10+
seeds::NTuple{K, UInt64}
11+
const next::L
12+
function BloomFilterNode{T,K}(seeds, next=nothing) where {T<:Unsigned,K}
13+
new{T,K,typeof(next)}(zero(T), seeds, next)
14+
end
15+
end
16+
17+
struct BloomFilterChain{T,K,L}
18+
head::L
19+
function BloomFilterChain{T,K}(head) where {T<:Unsigned,K}
20+
new{T,K,typeof(head)}(head)
21+
end
22+
end
23+
24+
function BloomFilterChain{T,K}() where {T,K}
25+
seeds = ntuple(_ -> rand(UInt64), K)
26+
head = BloomFilterNode{T,K}(seeds)
27+
return BloomFilterChain{T,K}(head)
28+
end
29+
30+
function grow_chain(f::BloomFilterChain{T,K}) where {T,K}
31+
# wT = widen(T)
32+
wT = T
33+
new_seeds = ntuple(_ -> rand(UInt64), K)
34+
head = BloomFilterNode{wT,K}(new_seeds, f.head)
35+
return BloomFilterChain{wT,K}(head)
36+
end
37+
function into(f::BloomFilterChain{T,K}, x; p = 0.05) where {T,K}
38+
39+
# wT = widen(T)
40+
wT = T
41+
new_seeds = ntuple(_ -> rand(UInt64), K)
42+
head = BloomFilterNode{wT,K}(new_seeds, f.head)
43+
return BloomFilterChain{wT,K}(head)
44+
end
45+
46+
47+
function _hash_bloom(x, seeds, ::Type{T}) where T
748
out = zero(T)
49+
bits = 8sizeof(T)
850
for h in seeds
9-
out |= one(T)<<(hash(x, h) % (8sizeof(T)))
51+
out |= one(T)<<(hash(x, h) & (bits-1))
1052
end
1153
return out
1254
end
1355

14-
mutable struct BloomFilter{T,K}
15-
memory::T
16-
const seeds::NTuple{K, UInt64}
17-
const p::Float64 # max tolerable false positive rate
18-
function BloomFilter{T,K}(p, seeds) where {T<:Unsigned,K}
19-
new{T,K}(zero(T), seeds, p)
20-
end
56+
function _in(x, f::BloomFilterNode{T,K,Nothing}) where {T,K}
57+
h = _hash_bloom(x, f.seeds, T)
58+
return (f.memory & h) == h
2159
end
2260

23-
function BloomFilter(; k=5, p=1/16)
24-
return BloomFilter{UInt64, k}(p, ntuple(_ -> rand(UInt), k))
61+
function _in(x, f::BloomFilterNode{T}) where {T}
62+
h = _hash_bloom(x, f.seeds, T)
63+
return ((f.memory & h) == h) || _in(x, f.next)
2564
end
2665

27-
function in!(x, f::BloomFilter{T}) where {T}
66+
function _in!(x, f::BloomFilterNode{T,K,Nothing}) where {T,K}
2867
h = _hash_bloom(x, f.seeds, T)
29-
r = (f.memory & h) == h
68+
r = ((f.memory & h) == h)
3069
f.memory |= h
3170
return r
3271
end
3372

34-
function false_positive_rate(f::BloomFilter{T,K}) where {T,K}
73+
function _in!(x, f::BloomFilterNode{T}) where {T}
74+
h = _hash_bloom(x, f.seeds, T)
75+
r = ((f.memory & h) == h)
76+
f.memory |= h
77+
return r || _in(x, f.next)
78+
end
79+
80+
in!(x, f::BloomFilterChain{T}) where {T} = _in!(x, f.head)
81+
82+
_false_positive_rate(::Nothing, k) = 0
83+
function _false_positive_rate(f::BloomFilterNode{T}, k) where {T}
3584
pop = count_ones(f.memory)
3685
bits = 8sizeof(T)
37-
return (pop/bits)^K
86+
87+
p1 = (pop/bits)^k
88+
p2 = _false_positive_rate(f.next, k)
89+
return p1 + p2 - p1 * p2
3890
end
3991

40-
struct Unique
41-
f::Set
42-
g
92+
_false_positive_rate(f::BloomFilterChain{T,K}) where {T,K} = _false_positive_rate(f.head, K)
93+
94+
95+
96+
97+
98+
99+
100+
myunique(it) = Unique{typeof(it)}(BloomFilterChain{UInt128,3}(), it)
101+
struct Unique{T}
102+
f::BloomFilterChain{UInt128,3}
103+
g::T
43104
end
44105

45106
function iterate(u::Unique)
46107
x, next = iterate(u.g)
47-
push!(u.f, x)
108+
in!(x, u.f)
48109
return (x, next)
49110
end
50111

0 commit comments

Comments
 (0)