-
Notifications
You must be signed in to change notification settings - Fork 5
/
bloomfilter.go
150 lines (136 loc) · 3.62 KB
/
bloomfilter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
package bloomfilter
import (
"encoding/binary"
"math"
"sync"
)
type BloomFilter struct {
m uint32
k int
buckets []uint32
location []uint32
lock sync.RWMutex
}
// New creates a new bloom filter. m should specify the number of bits.
// m is rounded up to the nearest multiple of 32.
// k specifies the number of hashing functions.
func New(m, k int) *BloomFilter {
var n = uint32(math.Ceil(float64(m) / 32))
return &BloomFilter{
m: n * 32,
k: k,
buckets: make([]uint32, n),
location: make([]uint32, k),
}
}
// NewFromBytes creates a new bloom filter from a byte slice.
// b is a byte slice exported from another bloomfilter.
// k specifies the number of hashing functions.
func NewFromBytes(bb []byte, k int) *BloomFilter {
ii := make([]uint32, len(bb)/4)
for i := range ii {
ii[i] = binary.BigEndian.Uint32(bb[i*4 : (i+1)*4])
}
return &BloomFilter{
m: uint32(len(ii) * 32),
k: k,
buckets: ii,
location: make([]uint32, k),
}
}
// EstimateParameters estimates requirements for m and k.
// https://github.com/willf/bloom
func EstimateParameters(n int, p float64) (m int, k int) {
m = int(math.Ceil(-1 * float64(n) * math.Log(p) / math.Pow(math.Log(2), 2)))
k = int(math.Ceil(math.Log(2) * float64(m) / float64(n)))
if m%32 > 0 {
m += 32 - m%32
}
return
}
func (bf *BloomFilter) locations(v []byte) []uint32 {
var a = fnv_1a(v, 0)
var b = fnv_1a(v, 1576284489)
var x = a % uint32(bf.m)
for i := range bf.location {
bf.location[i] = x
x = (x + b) % bf.m
}
return bf.location
}
// Add adds a byte array to the bloom filter
func (bf *BloomFilter) Add(v []byte) {
bf.lock.Lock()
defer bf.lock.Unlock()
var loc = bf.locations(v)
for _, l := range loc {
bf.buckets[l/32] |= 1 << (l % 32)
}
}
// AddInt adds an int to the bloom filter
func (bf *BloomFilter) AddInt(v int) {
var a = make([]byte, 4)
binary.BigEndian.PutUint32(a, uint32(v))
bf.Add(a)
}
// Test evaluates a byte array to determine whether it is (probably) in the bloom filter
func (bf *BloomFilter) Test(v []byte) bool {
bf.lock.RLock()
defer bf.lock.RUnlock()
var loc = bf.locations(v)
for _, l := range loc {
if (bf.buckets[l/32] & (1 << (l % 32))) == 0 {
return false
}
}
return true
}
// TestInt evaluates an int to determine whether it is (probably) in the bloom filter
func (bf *BloomFilter) TestInt(v int) bool {
bf.lock.RLock()
defer bf.lock.RUnlock()
var a = make([]byte, 4)
binary.BigEndian.PutUint32(a, uint32(v))
return bf.Test(a)
}
// ToBytes returns the bloom filter as a byte slice
func (bf *BloomFilter) ToBytes() []byte {
bf.lock.RLock()
defer bf.lock.RUnlock()
var bb = []byte{}
for _, bucket := range bf.buckets {
var a = make([]byte, 4)
binary.BigEndian.PutUint32(a, bucket)
bb = append(bb, a...)
}
return bb
}
// Fowler/Noll/Vo hashing.
// Nonstandard variation: this function optionally takes a seed value that is incorporated
// into the offset basis. According to http://www.isthe.com/chongo/tech/comp/fnv/index.html
// "almost any offset_basis will serve so long as it is non-zero".
func fnv_1a(v []byte, seed int) uint32 {
var a = uint32(2166136261 ^ seed)
for _, b := range v {
var c = uint32(b)
var d = c & 0xff00
if d != 0 {
a = fnv_multiply(a ^ d>>8)
}
a = fnv_multiply(a ^ c&0xff)
}
return fnv_mix(a)
}
// a * 16777619 mod 2**32
func fnv_multiply(a uint32) uint32 {
return a + (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24)
}
// See https://web.archive.org/web/20131019013225/http://home.comcast.net/~bretm/hash/6.html
func fnv_mix(a uint32) uint32 {
a += a << 13
a ^= a >> 7
a += a << 3
a ^= a >> 17
a += a << 5
return a & 0xffffffff
}