Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use hash index instead of bloom filter #87

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ module github.com/coocood/badger
go 1.12

require (
github.com/coocood/bbloom v0.0.0-20180518162752-7774d68761e5
github.com/coocood/rtutil v0.0.0-20190304133409-c84515f646f2
github.com/dgryski/go-farm v0.0.0-20190104051053-3adb47b1fb0f
github.com/dustin/go-humanize v1.0.0
Expand Down
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
github.com/coocood/bbloom v0.0.0-20180518162752-7774d68761e5 h1:A9dUQP3gNU5GjLMe8by8QM3mn+sHLRAlwquACitTHb0=
github.com/coocood/bbloom v0.0.0-20180518162752-7774d68761e5/go.mod h1:aXF5GNK2jkah+VKjINjpMxB29d5XlAKk4Yo1oX0q9wk=
github.com/coocood/rtutil v0.0.0-20190304133409-c84515f646f2 h1:NnLfQ77q0G4k2Of2c1ceQ0ec6MkLQyDp+IGdVM0D8XM=
github.com/coocood/rtutil v0.0.0-20190304133409-c84515f646f2/go.mod h1:7qG7YFnOALvsx6tKTNmQot8d7cGFXM9TidzvRFLWYwM=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
Expand Down
3 changes: 0 additions & 3 deletions level_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,9 +310,6 @@ func (s *levelHandler) getInTables(key []byte, tables []*table.Table) y.ValueStr
}

func (s *levelHandler) getInTable(key []byte, table *table.Table) (result y.ValueStruct) {
if table.DoesNotHave(y.ParseKey(key)) {
return
}
resultKey, resultVs, ok := table.PointGet(key)
if !ok {
it := table.NewIteratorNoRef(false)
Expand Down
54 changes: 31 additions & 23 deletions table/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ package table

import (
"encoding/binary"
"github.com/coocood/badger/fileutil"
"github.com/coocood/badger/options"
"golang.org/x/time/rate"
"os"
"reflect"
"unsafe"

"github.com/coocood/badger/fileutil"
"github.com/coocood/badger/options"
"golang.org/x/time/rate"

"github.com/coocood/badger/y"
"github.com/coocood/bbloom"
)

const restartInterval = 256 // Might want to change this to be based on total size instead of numKeys.
Expand Down Expand Up @@ -70,8 +70,6 @@ type Builder struct {
// The offsets are relative to the start of the block.
entryEndOffsets []uint32

bloomFilter bbloom.Bloom

enableHashIndex bool
hashIndexBuilder hashIndexBuilder
}
Expand All @@ -81,11 +79,9 @@ type Builder struct {
func NewTableBuilder(f *os.File, limiter *rate.Limiter, opt options.TableBuilderOptions) *Builder {
assumeKeyNum := 256 * 1024
return &Builder{
w: fileutil.NewBufferedFileWriter(f, opt.WriteBufferSize, opt.BytesPerSync, limiter),
buf: make([]byte, 0, 4*1024),
baseKeysBuf: make([]byte, 0, assumeKeyNum/restartInterval),
// assume a large enough num of keys to init bloom filter.
bloomFilter: bbloom.New(float64(assumeKeyNum), 0.01),
w: fileutil.NewBufferedFileWriter(f, opt.WriteBufferSize, opt.BytesPerSync, limiter),
buf: make([]byte, 0, 4*1024),
baseKeysBuf: make([]byte, 0, assumeKeyNum/restartInterval),
enableHashIndex: opt.EnableHashIndex,
hashIndexBuilder: newHashIndexBuilder(opt.HashUtilRatio),
}
Expand Down Expand Up @@ -113,7 +109,6 @@ func (b *Builder) resetBuffers() {
b.blockBaseOffset = 0
b.blockEndOffsets = b.blockEndOffsets[:0]
b.entryEndOffsets = b.entryEndOffsets[:0]
b.bloomFilter.Clear()
b.hashIndexBuilder.reset()
}

Expand All @@ -134,13 +129,10 @@ func (b Builder) keyDiff(newKey []byte) []byte {
}

func (b *Builder) addHelper(key []byte, v y.ValueStruct) {
// Add key to bloom filter.
if len(key) > 0 {
y.Assert(len(key) > 0)
if b.enableHashIndex {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need enable by default and remove the option?

keyNoTs := y.ParseKey(key)
b.bloomFilter.Add(keyNoTs)
if b.enableHashIndex {
b.hashIndexBuilder.addKey(keyNoTs, uint32(len(b.baseKeysEndOffs)), uint8(b.counter))
}
b.hashIndexBuilder.addKey(keyNoTs, uint32(len(b.baseKeysEndOffs)), uint8(b.counter))
}

// diffKey stores the difference of key with blockBaseKey.
Expand Down Expand Up @@ -213,19 +205,35 @@ func (b *Builder) ReachedCapacity(capacity int64) bool {
return int64(estimateSz) > capacity
}

// format of SST
// |block 1|
// [block 1 end offset]
// |length of block 1 end offset|

// ...
// ...
// |block N|
// [block N end offset]
// |length of block N end offset|
//
// |block 1 end offset| ... |block N end offset|
// all block's base key
// |block 1 base key end offset| ... |block N base key end offset|
// block count
// {hash index section}
// |bucket 1| ... |bucket N|
// ---> hash entry(which block(2 bytes), which key inside the block(1 byte)
// how many bucket

// Finish finishes the table by appending the index.
func (b *Builder) Finish() error {
b.finishBlock() // This will never start a new block.
b.buf = append(b.buf, u32SliceToBytes(b.blockEndOffsets)...)
b.buf = append(b.buf, b.baseKeysBuf...)
b.buf = append(b.buf, u32SliceToBytes(b.baseKeysEndOffs)...)
// block count
b.buf = append(b.buf, u32ToBytes(uint32(len(b.baseKeysEndOffs)))...)

// Write bloom filter.
bfData := b.bloomFilter.BinaryMarshal()
b.buf = append(b.buf, bfData...)
b.buf = append(b.buf, u32ToBytes(uint32(len(bfData)))...)

if b.enableHashIndex {
b.buf = b.hashIndexBuilder.finish(b.buf)
} else {
Expand Down
27 changes: 6 additions & 21 deletions table/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (

"github.com/coocood/badger/options"
"github.com/coocood/badger/y"
"github.com/coocood/bbloom"
"github.com/pingcap/errors"
)

Expand All @@ -54,8 +53,6 @@ type Table struct {
smallest, biggest []byte // Smallest and largest keys.
id uint64 // file id, part of filename

bf bbloom.Bloom

hIdx hashIndex
}

Expand Down Expand Up @@ -221,28 +218,20 @@ func (t *Table) readIndex() {
t.hIdx.readIndex(buckets, numBuckets)
}

// Read bloom filter.
readPos -= 4
buf = t.readNoFail(readPos, 4)
bloomLen := int(bytesToU32(buf))
readPos -= bloomLen
data := t.readNoFail(readPos, bloomLen)
t.bf.BinaryUnmarshal(data)
blockCnt := int(bytesToU32(buf))

readPos -= 4
buf = t.readNoFail(readPos, 4)
numBlocks := int(bytesToU32(buf))

readPos -= 4 * numBlocks
buf = t.readNoFail(readPos, 4*numBlocks)
readPos -= 4 * blockCnt
buf = t.readNoFail(readPos, 4*blockCnt)
t.baseKeysEndOffs = bytesToU32Slice(buf)

baseKeyBufLen := int(t.baseKeysEndOffs[numBlocks-1])
baseKeyBufLen := int(t.baseKeysEndOffs[blockCnt-1])
readPos -= baseKeyBufLen
t.baseKeys = t.readNoFail(readPos, baseKeyBufLen)

readPos -= 4 * numBlocks
buf = t.readNoFail(readPos, 4*numBlocks)
readPos -= 4 * blockCnt
buf = t.readNoFail(readPos, 4*blockCnt)
t.blockEndOffsets = bytesToU32Slice(buf)
}

Expand Down Expand Up @@ -298,10 +287,6 @@ func (t *Table) Filename() string { return t.fd.Name() }
// ID is the table's ID number (used to make the file name).
func (t *Table) ID() uint64 { return t.id }

// DoesNotHave returns true if (but not "only if") the table does not have the key. It does a
// bloom filter lookup.
func (t *Table) DoesNotHave(key []byte) bool { return !t.bf.Has(key) }

// ParseFileID reads the file id out of a filename.
func ParseFileID(name string) (uint64, bool) {
name = path.Base(name)
Expand Down
3 changes: 0 additions & 3 deletions y/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ var (
NumVLogBytesWritten *expvar.Int
// NumLSMGets is number of LMS gets
NumLSMGets *expvar.Map
// NumLSMBloomHits is number of LMS bloom hits
NumLSMBloomHits *expvar.Map
// NumGets is number of gets
NumGets *expvar.Int
// NumPuts is number of puts
Expand All @@ -59,7 +57,6 @@ func init() {
NumBytesRead = expvar.NewInt("badger_read_bytes")
NumVLogBytesWritten = expvar.NewInt("badger_value_log_written_bytes")
NumLSMGets = expvar.NewMap("badger_lsm_level_gets_total")
NumLSMBloomHits = expvar.NewMap("badger_lsm_bloom_hits_total")
NumGets = expvar.NewInt("badger_gets_total")
NumPuts = expvar.NewInt("badger_puts_total")
NumBlockedPuts = expvar.NewInt("badger_blocked_puts_total")
Expand Down