@@ -3,48 +3,109 @@ using BitIntegers
33using  Base:  count_ones
44import  Base:  in!, iterate, IteratorSize
55
6- function  _hash_bloom (x, seeds, T)
6+ 
7+ 
8+ mutable struct  BloomFilterNode{T,K,L}
9+     memory:: T 
10+     seeds:: NTuple{K, UInt64} 
11+     const  next:: L 
12+     function  BloomFilterNode {T,K} (seeds, next= nothing ) where  {T<: Unsigned ,K}
13+         new {T,K,typeof(next)} (zero (T), seeds, next)
14+     end 
15+ end 
16+ 
17+ struct  BloomFilterChain{T,K,L}
18+     head:: L 
19+     function  BloomFilterChain {T,K} (head) where  {T<: Unsigned ,K}
20+         new {T,K,typeof(head)} (head)
21+     end 
22+ end 
23+ 
24+ function  BloomFilterChain {T,K} () where  {T,K}
25+     seeds =  ntuple (_ ->  rand (UInt64), K)
26+     head =  BloomFilterNode {T,K} (seeds)
27+     return  BloomFilterChain {T,K} (head)
28+ end 
29+ 
30+ function  grow_chain (f:: BloomFilterChain{T,K} ) where  {T,K}
31+     #  wT = widen(T)
32+     wT =  T
33+     new_seeds =  ntuple (_ ->  rand (UInt64), K)
34+     head =  BloomFilterNode {wT,K} (new_seeds, f. head)
35+     return  BloomFilterChain {wT,K} (head)
36+ end 
37+ function  into (f:: BloomFilterChain{T,K} , x; p =  0.05 ) where  {T,K}
38+     
39+     #  wT = widen(T)
40+     wT =  T
41+     new_seeds =  ntuple (_ ->  rand (UInt64), K)
42+     head =  BloomFilterNode {wT,K} (new_seeds, f. head)
43+     return  BloomFilterChain {wT,K} (head)
44+ end 
45+ 
46+ 
47+ function  _hash_bloom (x, seeds, :: Type{T} ) where  T
748    out =  zero (T)
49+     bits =  8 sizeof (T)
850    for  h in  seeds
9-         out |=  one (T)<< (hash (x, h) %  ( 8 sizeof (T) ))
51+         out |=  one (T)<< (hash (x, h) &  (bits - 1 ))
1052    end 
1153    return  out
1254end 
1355
14- mutable struct  BloomFilter{T,K}
15-     memory:: T 
16-     const  seeds:: NTuple{K, UInt64} 
17-     const  p:: Float64  #  max tolerable false positive rate
18-     function  BloomFilter {T,K} (p, seeds) where  {T<: Unsigned ,K}
19-         new {T,K} (zero (T), seeds, p)
20-     end 
56+ function  _in (x, f:: BloomFilterNode{T,K,Nothing} ) where  {T,K}
57+     h =  _hash_bloom (x, f. seeds, T)
58+     return  (f. memory &  h) ==  h
2159end 
2260
23- function  BloomFilter (; k= 5 , p= 1 / 16 )
24-     return  BloomFilter {UInt64, k} (p, ntuple (_ ->  rand (UInt), k))
61+ function  _in (x, f:: BloomFilterNode{T} ) where  {T}
62+     h =  _hash_bloom (x, f. seeds, T)
63+     return  ((f. memory &  h) ==  h) ||  _in (x, f. next)
2564end 
2665
27- function  in !:: BloomFilter{T }where  {T}
66+ function  _in !:: BloomFilterNode{T,K,Nothing }where  {T,K }
2867    h =  _hash_bloom (x, f. seeds, T)
29-     r =  (f. memory &  h) ==  h
68+     r =  (( f. memory &  h) ==  h) 
3069    f. memory |=  h
3170    return  r
3271end 
3372
34- function  false_positive_rate (f:: BloomFilter{T,K} ) where  {T,K}
73+ function  _in! (x, f:: BloomFilterNode{T} ) where  {T}
74+     h =  _hash_bloom (x, f. seeds, T)
75+     r =  ((f. memory &  h) ==  h)
76+     f. memory |=  h
77+     return  r ||  _in (x, f. next)
78+ end 
79+ 
80+ in! (x, f:: BloomFilterChain{T} ) where  {T} =  _in! (x, f. head)
81+ 
82+ _false_positive_rate (:: Nothing , k) =  0 
83+ function  _false_positive_rate (f:: BloomFilterNode{T} , k) where  {T}
3584    pop =  count_ones (f. memory)
3685    bits =  8 sizeof (T)
37-     return  (pop/ bits)^ K
86+ 
87+     p1 =  (pop/ bits)^ k
88+     p2 =  _false_positive_rate (f. next, k)
89+     return   p1 +  p2 -  p1 *  p2
3890end 
3991
40- struct  Unique
41-     f:: Set 
42-     g
92+ _false_positive_rate (f:: BloomFilterChain{T,K} ) where  {T,K} =  _false_positive_rate (f. head, K)
93+ 
94+ 
95+ 
96+ 
97+ 
98+ 
99+ 
100+ myunique (it) =  Unique {typeof(it)} (BloomFilterChain {UInt128,3} (), it)
101+ struct  Unique{T}
102+     f:: BloomFilterChain{UInt128,3} 
103+     g:: T 
43104end 
44105
45106function  iterate (u:: Unique )
46107    x, next =  iterate (u. g)
47-     push! ( u. f, x )
108+     in! (x,  u. f)
48109    return  (x, next)
49110end 
50111
0 commit comments