@@ -23,6 +23,97 @@ type indexEntry struct {
23
23
keyLen int
24
24
}
25
25
26
+ // filterBaseLog being 11 means that we generate a new filter for every 2KiB of
27
+ // data.
28
+ //
29
+ // It's a little unfortunate that this is 11, whilst the default db.Options
30
+ // BlockSize is 1<<12 or 4KiB, so that in practice, every second filter is
31
+ // empty, but both values match the C++ code.
32
+ const filterBaseLog = 11
33
+
34
+ type filterWriter struct {
35
+ policy db.FilterPolicy
36
+ // block holds the keys for the current block. The buffers are re-used for
37
+ // each new block.
38
+ block struct {
39
+ data []byte
40
+ lengths []int
41
+ keys [][]byte
42
+ }
43
+ // data and offsets are the per-block filters for the overall table.
44
+ data []byte
45
+ offsets []uint32
46
+ }
47
+
48
+ func (f * filterWriter ) hasKeys () bool {
49
+ return len (f .block .lengths ) != 0
50
+ }
51
+
52
+ func (f * filterWriter ) appendKey (key []byte ) {
53
+ f .block .data = append (f .block .data , key ... )
54
+ f .block .lengths = append (f .block .lengths , len (key ))
55
+ }
56
+
57
+ func (f * filterWriter ) appendOffset () error {
58
+ o := len (f .data )
59
+ if uint64 (o ) > 1 << 32 - 1 {
60
+ return errors .New ("leveldb/table: filter data is too long" )
61
+ }
62
+ f .offsets = append (f .offsets , uint32 (o ))
63
+ return nil
64
+ }
65
+
66
+ func (f * filterWriter ) emit () error {
67
+ if err := f .appendOffset (); err != nil {
68
+ return err
69
+ }
70
+ if ! f .hasKeys () {
71
+ return nil
72
+ }
73
+
74
+ i , j := 0 , 0
75
+ for _ , length := range f .block .lengths {
76
+ j += length
77
+ f .block .keys = append (f .block .keys , f .block .data [i :j ])
78
+ i = j
79
+ }
80
+ f .data = append (f .data , f .policy .NewFilter (f .block .keys )... )
81
+
82
+ // Reset the per-block state.
83
+ f .block .data = f .block .data [:0 ]
84
+ f .block .lengths = f .block .lengths [:0 ]
85
+ f .block .keys = f .block .keys [:0 ]
86
+ return nil
87
+ }
88
+
89
+ func (f * filterWriter ) finishBlock (blockOffset uint64 ) error {
90
+ for i := blockOffset >> filterBaseLog ; i > uint64 (len (f .offsets )); {
91
+ if err := f .emit (); err != nil {
92
+ return err
93
+ }
94
+ }
95
+ return nil
96
+ }
97
+
98
+ func (f * filterWriter ) finish () ([]byte , error ) {
99
+ if f .hasKeys () {
100
+ if err := f .emit (); err != nil {
101
+ return nil , err
102
+ }
103
+ }
104
+ if err := f .appendOffset (); err != nil {
105
+ return nil , err
106
+ }
107
+
108
+ var b [4 ]byte
109
+ for _ , x := range f .offsets {
110
+ binary .LittleEndian .PutUint32 (b [:], x )
111
+ f .data = append (f .data , b [0 ], b [1 ], b [2 ], b [3 ])
112
+ }
113
+ f .data = append (f .data , filterBaseLog )
114
+ return f .data , nil
115
+ }
116
+
26
117
// Writer is a table writer. It implements the DB interface, as documented
27
118
// in the leveldb/db package.
28
119
type Writer struct {
@@ -65,6 +156,8 @@ type Writer struct {
65
156
// re-used over the lifetime of the writer, avoiding the allocation of a
66
157
// temporary buffer for each block.
67
158
compressedBuf []byte
159
+ // filter accumulates the filter block.
160
+ filter filterWriter
68
161
// tmp is a scratch buffer, large enough to hold either footerLen bytes,
69
162
// blockTrailerLen bytes, or (5 * binary.MaxVarintLen64) bytes.
70
163
tmp [50 ]byte
@@ -103,6 +196,9 @@ func (w *Writer) Set(key, value []byte, o *db.WriteOptions) error {
103
196
w .err = fmt .Errorf ("leveldb/table: Set called in non-increasing key order: %q, %q" , w .prevKey , key )
104
197
return w .err
105
198
}
199
+ if w .filter .policy != nil {
200
+ w .filter .appendKey (key )
201
+ }
106
202
w .flushPendingBH (key )
107
203
w .append (key , value , w .nEntries % w .blockRestartInterval == 0 )
108
204
// If the estimated block size is sufficiently large, finish the current block.
@@ -169,15 +265,32 @@ func (w *Writer) finishBlock() (blockHandle, error) {
169
265
// Compress the buffer, discarding the result if the improvement
170
266
// isn't at least 12.5%.
171
267
b := w .buf .Bytes ()
172
- w . tmp [ 0 ] = noCompressionBlockType
268
+ blockType := byte ( noCompressionBlockType )
173
269
if w .compression == db .SnappyCompression {
174
270
compressed := snappy .Encode (w .compressedBuf , b )
175
271
w .compressedBuf = compressed [:cap (compressed )]
176
272
if len (compressed ) < len (b )- len (b )/ 8 {
177
- w . tmp [ 0 ] = snappyCompressionBlockType
273
+ blockType = snappyCompressionBlockType
178
274
b = compressed
179
275
}
180
276
}
277
+ bh , err := w .writeRawBlock (b , blockType )
278
+
279
+ // Calculate filters.
280
+ if w .filter .policy != nil {
281
+ w .filter .finishBlock (w .offset )
282
+ }
283
+
284
+ // Reset the per-block state.
285
+ w .buf .Reset ()
286
+ w .nEntries = 0
287
+ w .restarts = w .restarts [:0 ]
288
+
289
+ return bh , err
290
+ }
291
+
292
+ func (w * Writer ) writeRawBlock (b []byte , blockType byte ) (blockHandle , error ) {
293
+ w .tmp [0 ] = blockType
181
294
182
295
// Calculate the checksum.
183
296
checksum := crc .New (b ).Update (w .tmp [:1 ]).Value ()
@@ -192,11 +305,6 @@ func (w *Writer) finishBlock() (blockHandle, error) {
192
305
}
193
306
bh := blockHandle {w .offset , uint64 (len (b ))}
194
307
w .offset += uint64 (len (b )) + blockTrailerLen
195
-
196
- // Reset the per-block state.
197
- w .buf .Reset ()
198
- w .nEntries = 0
199
- w .restarts = w .restarts [:0 ]
200
308
return bh , nil
201
309
}
202
310
@@ -229,16 +337,36 @@ func (w *Writer) Close() (err error) {
229
337
w .flushPendingBH (nil )
230
338
}
231
339
232
- // Write the (empty) metaindex block.
340
+ // Writer.append uses w.tmp[:3*binary.MaxVarintLen64]. Let tmp be the other
341
+ // half of that slice.
342
+ tmp := w .tmp [3 * binary .MaxVarintLen64 : 5 * binary .MaxVarintLen64 ]
343
+
344
+ // Write the filter block.
345
+ if w .filter .policy != nil {
346
+ b , err := w .filter .finish ()
347
+ if err != nil {
348
+ w .err = err
349
+ return w .err
350
+ }
351
+ bh , err := w .writeRawBlock (b , noCompressionBlockType )
352
+ if err != nil {
353
+ w .err = err
354
+ return w .err
355
+ }
356
+ n := encodeBlockHandle (tmp , bh )
357
+ w .append ([]byte ("filter." + w .filter .policy .Name ()), tmp [:n ], true )
358
+ }
359
+
360
+ // Write the metaindex block. It might be an empty block, if the filter
361
+ // policy is nil.
233
362
metaindexBlockHandle , err := w .finishBlock ()
234
363
if err != nil {
235
364
w .err = err
236
365
return w .err
237
366
}
238
367
239
368
// Write the index block.
240
- // writer.append uses w.tmp[:3*binary.MaxVarintLen64].
241
- i0 , tmp := 0 , w .tmp [3 * binary .MaxVarintLen64 :5 * binary .MaxVarintLen64 ]
369
+ i0 := 0
242
370
for _ , ie := range w .indexEntries {
243
371
n := encodeBlockHandle (tmp , ie .bh )
244
372
i1 := i0 + ie .keyLen
@@ -280,15 +408,17 @@ func (w *Writer) Close() (err error) {
280
408
// NewWriter returns a new table writer for the file. Closing the writer will
281
409
// close the file.
282
410
func NewWriter (f db.File , o * db.Options ) * Writer {
283
- // TODO: honor o.GetFilterPolicy().
284
411
w := & Writer {
285
412
closer : f ,
286
413
blockRestartInterval : o .GetBlockRestartInterval (),
287
414
blockSize : o .GetBlockSize (),
288
415
cmp : o .GetComparer (),
289
416
compression : o .GetCompression (),
290
- prevKey : make ([]byte , 0 , 256 ),
291
- restarts : make ([]uint32 , 0 , 256 ),
417
+ filter : filterWriter {
418
+ policy : o .GetFilterPolicy (),
419
+ },
420
+ prevKey : make ([]byte , 0 , 256 ),
421
+ restarts : make ([]uint32 , 0 , 256 ),
292
422
}
293
423
if f == nil {
294
424
w .err = errors .New ("leveldb/table: nil file" )
0 commit comments