@@ -7,6 +7,7 @@ package table
7
7
import (
8
8
"bufio"
9
9
"bytes"
10
+ "encoding/binary"
10
11
"fmt"
11
12
"io"
12
13
"io/ioutil"
@@ -279,11 +280,10 @@ func TestReaderBloomUsed(t *testing.T) {
279
280
t .Errorf ("degenerate=%t: false negatives: got %d, want %d" , degenerate , c .falseNegatives , 0 )
280
281
}
281
282
282
- gotActualNegatives := c .falsePositives + c .trueNegatives
283
- if gotActualNegatives < wantActualNegatives {
283
+ if got := c .falsePositives + c .trueNegatives ; got < wantActualNegatives {
284
284
t .Errorf ("degenerate=%t: actual negatives (false positives + true negatives): " +
285
285
"got %d (%d + %d), want >= %d" ,
286
- degenerate , gotActualNegatives , c .falsePositives , c .trueNegatives , wantActualNegatives )
286
+ degenerate , got , c .falsePositives , c .trueNegatives , wantActualNegatives )
287
287
}
288
288
289
289
if ! degenerate {
@@ -297,6 +297,52 @@ func TestReaderBloomUsed(t *testing.T) {
297
297
}
298
298
}
299
299
300
+ func TestBloomFilterFalsePositiveRate (t * testing.T ) {
301
+ f , err := os .Open (filepath .FromSlash ("../testdata/h.bloom.no-compression.ldb" ))
302
+ if err != nil {
303
+ t .Fatal (err )
304
+ }
305
+ c := & countingFilterPolicy {
306
+ FilterPolicy : bloom .FilterPolicy (1 ),
307
+ }
308
+ r := NewReader (f , & db.Options {
309
+ FilterPolicy : c ,
310
+ })
311
+
312
+ const n = 10000
313
+ // key is a buffer that will be re-used for n Get calls, each with a
314
+ // different key. The "m" in the 2-byte prefix means that the key falls in
315
+ // the [minWord, maxWord] range and so will not be rejected prior to
316
+ // applying the Bloom filter. The "!" in the 2-byte prefix means that the
317
+ // key is not actually in the table. The filter will only see actual
318
+ // negatives: false positives or true negatives.
319
+ key := []byte ("m!...." )
320
+ for i := 0 ; i < n ; i ++ {
321
+ binary .LittleEndian .PutUint32 (key [2 :6 ], uint32 (i ))
322
+ r .Get (key , nil )
323
+ }
324
+
325
+ if c .truePositives != 0 {
326
+ t .Errorf ("true positives: got %d, want 0" , c .truePositives )
327
+ }
328
+ if c .falseNegatives != 0 {
329
+ t .Errorf ("false negatives: got %d, want 0" , c .falseNegatives )
330
+ }
331
+ if got := c .falsePositives + c .trueNegatives ; got != n {
332
+ t .Errorf ("actual negatives (false positives + true negatives): got %d (%d + %d), want %d" ,
333
+ got , c .falsePositives , c .trueNegatives , n )
334
+ }
335
+
336
+ // According the the comments in the C++ LevelDB code, the false positive
337
+ // rate should be approximately 1% for for bloom.FilterPolicy(10). The 10
338
+ // was the parameter used to write the .ldb file. When reading the file,
339
+ // the 1 in the bloom.FilterPolicy(1) above doesn't matter, only the
340
+ // bloom.FilterPolicy matters.
341
+ if got := float64 (100 * c .falsePositives ) / n ; got < 0.2 || 5 < got {
342
+ t .Errorf ("false positive rate: got %v%%, want approximately 1%%" , got )
343
+ }
344
+ }
345
+
300
346
type countingFilterPolicy struct {
301
347
db.FilterPolicy
302
348
degenerate bool
0 commit comments