Skip to content
This repository was archived by the owner on Jul 30, 2022. It is now read-only.

Commit 2a6e800

Browse files
committed
Test the Bloom filter false positive rate.
1 parent 89521c9 commit 2a6e800

File tree

1 file changed

+49
-3
lines changed

1 file changed

+49
-3
lines changed

table/table_test.go

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ package table
77
import (
88
"bufio"
99
"bytes"
10+
"encoding/binary"
1011
"fmt"
1112
"io"
1213
"io/ioutil"
@@ -279,11 +280,10 @@ func TestReaderBloomUsed(t *testing.T) {
279280
t.Errorf("degenerate=%t: false negatives: got %d, want %d", degenerate, c.falseNegatives, 0)
280281
}
281282

282-
gotActualNegatives := c.falsePositives + c.trueNegatives
283-
if gotActualNegatives < wantActualNegatives {
283+
if got := c.falsePositives + c.trueNegatives; got < wantActualNegatives {
284284
t.Errorf("degenerate=%t: actual negatives (false positives + true negatives): "+
285285
"got %d (%d + %d), want >= %d",
286-
degenerate, gotActualNegatives, c.falsePositives, c.trueNegatives, wantActualNegatives)
286+
degenerate, got, c.falsePositives, c.trueNegatives, wantActualNegatives)
287287
}
288288

289289
if !degenerate {
@@ -297,6 +297,52 @@ func TestReaderBloomUsed(t *testing.T) {
297297
}
298298
}
299299

300+
func TestBloomFilterFalsePositiveRate(t *testing.T) {
301+
f, err := os.Open(filepath.FromSlash("../testdata/h.bloom.no-compression.ldb"))
302+
if err != nil {
303+
t.Fatal(err)
304+
}
305+
c := &countingFilterPolicy{
306+
FilterPolicy: bloom.FilterPolicy(1),
307+
}
308+
r := NewReader(f, &db.Options{
309+
FilterPolicy: c,
310+
})
311+
312+
const n = 10000
313+
// key is a buffer that will be re-used for n Get calls, each with a
314+
// different key. The "m" in the 2-byte prefix means that the key falls in
315+
// the [minWord, maxWord] range and so will not be rejected prior to
316+
// applying the Bloom filter. The "!" in the 2-byte prefix means that the
317+
// key is not actually in the table. The filter will only see actual
318+
// negatives: false positives or true negatives.
319+
key := []byte("m!....")
320+
for i := 0; i < n; i++ {
321+
binary.LittleEndian.PutUint32(key[2:6], uint32(i))
322+
r.Get(key, nil)
323+
}
324+
325+
if c.truePositives != 0 {
326+
t.Errorf("true positives: got %d, want 0", c.truePositives)
327+
}
328+
if c.falseNegatives != 0 {
329+
t.Errorf("false negatives: got %d, want 0", c.falseNegatives)
330+
}
331+
if got := c.falsePositives + c.trueNegatives; got != n {
332+
t.Errorf("actual negatives (false positives + true negatives): got %d (%d + %d), want %d",
333+
got, c.falsePositives, c.trueNegatives, n)
334+
}
335+
336+
// According the the comments in the C++ LevelDB code, the false positive
337+
// rate should be approximately 1% for for bloom.FilterPolicy(10). The 10
338+
// was the parameter used to write the .ldb file. When reading the file,
339+
// the 1 in the bloom.FilterPolicy(1) above doesn't matter, only the
340+
// bloom.FilterPolicy matters.
341+
if got := float64(100*c.falsePositives) / n; got < 0.2 || 5 < got {
342+
t.Errorf("false positive rate: got %v%%, want approximately 1%%", got)
343+
}
344+
}
345+
300346
type countingFilterPolicy struct {
301347
db.FilterPolicy
302348
degenerate bool

0 commit comments

Comments
 (0)