Skip to content

Commit 7f8edde

Browse files
committed
Add union and intersection
1 parent 6e23ebd commit 7f8edde

File tree

3 files changed

+160
-8
lines changed

3 files changed

+160
-8
lines changed

example_test.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,30 @@ func Example_falsePositives() {
5050
fmt.Println(count, "mistakes were made.")
5151
// Output: 1 mistakes were made.
5252
}
53+
54+
// Compute the intersection and union of two filters.
55+
func ExampleFilter_And() {
56+
// Create two Bloom filter with room for n elements
57+
// at a false-positives rate less than 1/p.
58+
n := 1000
59+
p := 100
60+
f1, f2 := bloom.New(n, p), bloom.New(n, p)
61+
62+
// Add "0", "1", …, "499" to f1
63+
for i := 0; i < n/2; i++ {
64+
f1.Add(strconv.Itoa(i))
65+
}
66+
67+
// Add "250", "251", …, "749" to f2
68+
for i := n / 4; i < 3*n/4; i++ {
69+
f2.Add(strconv.Itoa(i))
70+
}
71+
72+
// Compute the approximate size of f1 ∩ f2 and f1 ∪ f2.
73+
fmt.Println("f1 ∩ f2:", f1.And(f2).Count())
74+
fmt.Println("f1 ∪ f2:", f1.Or(f2).Count())
75+
// Output:
76+
// f1 ∩ f2: 276
77+
// f1 ∪ f2: 758
78+
79+
}

filter.go

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,80 @@ func (f *Filter) Test(s string) bool {
117117
return f.TestByte(b)
118118
}
119119

120-
// Count returns an estimate of the number of unique elements added to this filter.
120+
// Count returns an estimate of the number of unique elements in this filter.
121121
func (f *Filter) Count() int64 {
122122
return f.count
123123
}
124+
125+
// And returns a new Bloom filter that consists of all elements
126+
// that belong to both f1 and f2. It requires two filters with
127+
// the same size and the same false-positives rate.
128+
//
129+
// The false-positive rate of the resulting filter is at most
130+
// the false-positive rate of f1 and f2, but may be larger than
131+
// the rate of the filter created from scratch using the intersection
132+
// of the two sets.
133+
func (f1 *Filter) And(f2 *Filter) *Filter {
134+
if len(f1.data) != len(f2.data) || f1.lookups != f2.lookups {
135+
panic("operation requires filters of the same type")
136+
}
137+
len := len(f1.data)
138+
res := &Filter{
139+
data: make([]uint64, len),
140+
lookups: f1.lookups,
141+
}
142+
bitCount := 0
143+
for i := 0; i < len; i++ {
144+
w := f1.data[i] & f2.data[i]
145+
res.data[i] = w
146+
bitCount += count(w)
147+
}
148+
// Estimate the number of elements from the bitCount.
149+
m := 64 * float64(len)
150+
bits := float64(bitCount)
151+
n := m / float64(f1.lookups) * math.Log(m/(m-bits))
152+
res.count = int64(n)
153+
return res
154+
}
155+
156+
// Or returns a new Bloom filter that consists of all elements
157+
// that belong to either f1 or f2. It requires two filters with
158+
// the same size and the same false-positives rate.
159+
//
160+
// The resulting filter is the same as the filter created from scratch
161+
// using the union of the two sets.
162+
func (f1 *Filter) Or(f2 *Filter) *Filter {
163+
if len(f1.data) != len(f2.data) || f1.lookups != f2.lookups {
164+
panic("operation requires filters of the same type")
165+
}
166+
len := len(f1.data)
167+
res := &Filter{
168+
data: make([]uint64, len),
169+
lookups: f1.lookups,
170+
}
171+
bitCount := 0
172+
for i := 0; i < len; i++ {
173+
w := f1.data[i] | f2.data[i]
174+
res.data[i] = w
175+
bitCount += count(w)
176+
}
177+
// Estimate the number of elements from the bitCount.
178+
m := 64 * float64(len)
179+
n := m / float64(f1.lookups) * math.Log(m/(m-float64(bitCount)))
180+
res.count = int64(n)
181+
return res
182+
}
183+
184+
// count returns the number of nonzero bits in w.
185+
func count(w uint64) int {
186+
// Adapted from github.com/yourbasic/bit/funcs.go.
187+
const maxw = 1<<64 - 1
188+
const bpw = 64
189+
w -= (w >> 1) & (maxw / 3)
190+
w = w&(maxw/15*3) + (w>>2)&(maxw/15*3)
191+
w += w >> 4
192+
w &= maxw / 255 * 15
193+
w *= maxw / 255
194+
w >>= (bpw/8 - 1) * 8
195+
return int(w)
196+
}

filter_test.go

Lines changed: 59 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,23 +58,64 @@ func TestFilter(t *testing.T) {
5858
}
5959
}
6060

61+
func TestAndOr(t *testing.T) {
62+
s1 := "asöldkgjaösldkgaösldkasldgjkaösldkgjöasgkdjg"
63+
s2 := "elasödlnkgaölsdkfgaölsdkjfaölsdkgaölskgnaösl"
64+
s3 := "aölsdgkaösldkgaösldkgjaölsdkjgaölsdkgjaösldk"
65+
for n := 0; n < 100; n++ {
66+
for p := 1; p <= 128; p *= 2 {
67+
f1, f2 := New(n, p), New(n, p)
68+
f1.Add(s1)
69+
f1.Add(s2)
70+
f2.Add(s2)
71+
f2.Add(s3)
72+
and, or := f1.And(f2), f1.Or(f2)
73+
member := and.Test(s1)
74+
if member {
75+
t.Errorf("and.Test(s1) = %v; want false\n", member)
76+
}
77+
member = and.Test(s2)
78+
if !member {
79+
t.Errorf("and.Test(s2) = %v; want true\n", member)
80+
}
81+
member = and.Test(s3)
82+
if member {
83+
t.Errorf("and.Test(s3) = %v; want false\n", member)
84+
}
85+
member = or.Test(s1)
86+
if !member {
87+
t.Errorf("or.Test(s1) = %v; want true\n", member)
88+
}
89+
member = or.Test(s2)
90+
if !member {
91+
t.Errorf("or.Test(s2) = %v; want true\n", member)
92+
}
93+
member = or.Test(s3)
94+
if !member {
95+
t.Errorf("or.Test(s3) = %v; want true\n", member)
96+
}
97+
}
98+
}
99+
}
100+
101+
var fox string = "The quick brown fox jumps over the lazy dog."
102+
61103
func BenchmarkAdd(b *testing.B) {
62104
b.StopTimer()
63105
filter := New(1<<30, 200)
64106
b.StartTimer()
65-
s := "The quick brown fox jumps over the lazy dog."
66107
for i := 0; i < b.N; i++ {
67-
filter.Add(s)
108+
filter.Add(fox)
68109
}
69110
}
70111

71112
func BenchmarkAddByte(b *testing.B) {
72113
b.StopTimer()
73114
filter := New(1<<30, 200)
74115
b.StartTimer()
75-
s := []byte("The quick brown fox jumps over the lazy dog.")
116+
bytes := []byte(fox)
76117
for i := 0; i < b.N; i++ {
77-
filter.AddByte(s)
118+
filter.AddByte(bytes)
78119
}
79120
}
80121

@@ -83,16 +124,27 @@ func BenchmarkTest(b *testing.B) {
83124
filter := New(1<<30, 200)
84125
b.StartTimer()
85126
for i := 0; i < b.N; i++ {
86-
filter.Test("The quick brown fox jumps over the lazy dog.")
127+
filter.Test(fox)
87128
}
88129
}
89130

90131
func BenchmarkTestByte(b *testing.B) {
91132
b.StopTimer()
92133
filter := New(1<<30, 200)
93134
b.StartTimer()
94-
s := []byte("The quick brown fox jumps over the lazy dog.")
135+
bytes := []byte(fox)
136+
for i := 0; i < b.N; i++ {
137+
filter.TestByte(bytes)
138+
}
139+
}
140+
141+
func BenchmarkTestAnd(b *testing.B) {
142+
n := 1000
143+
b.StopTimer()
144+
f1 := New(n, 200)
145+
f2 := New(n, 200)
146+
b.StartTimer()
95147
for i := 0; i < b.N; i++ {
96-
filter.TestByte(s)
148+
_ = f1.And(f2)
97149
}
98150
}

0 commit comments

Comments
 (0)