Skip to content

Commit aed31ae

Browse files
dboehm-avalabsDan Laine
andauthored
Remove version db from merkle db (ava-labs#1534)
Co-authored-by: Dan Laine <daniel.laine@avalabs.org>
1 parent ab20b7d commit aed31ae

File tree

2 files changed

+67
-50
lines changed

2 files changed

+67
-50
lines changed

x/merkledb/cache.go

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,15 @@ func newOnEvictCache[K comparable, V any](maxSize int, onEviction func(V) error)
2626
}
2727
}
2828

29+
// removeOldest returns and removes the oldest element from this cache.
30+
func (c *onEvictCache[K, V]) removeOldest() (K, V, bool) {
31+
k, v, exists := c.fifo.Oldest()
32+
if exists {
33+
c.fifo.Delete(k)
34+
}
35+
return k, v, exists
36+
}
37+
2938
// Get an element from this cache.
3039
func (c *onEvictCache[K, V]) Get(key K) (V, bool) {
3140
c.lock.RLock()
@@ -44,14 +53,14 @@ func (c *onEvictCache[K, V]) Put(key K, value V) error {
4453
c.fifo.Put(key, value) // Mark as MRU
4554

4655
if c.fifo.Len() > c.maxSize {
47-
oldestKey, oldsetVal, _ := c.fifo.Oldest()
56+
oldestKey, oldestVal, _ := c.fifo.Oldest()
4857
c.fifo.Delete(oldestKey)
49-
return c.onEviction(oldsetVal)
58+
return c.onEviction(oldestVal)
5059
}
5160
return nil
5261
}
5362

54-
// Removes all elements from the cache.
63+
// Flush removes all elements from the cache.
5564
// Returns the last non-nil error during [c.onEviction], if any.
5665
// If [c.onEviction] errors, it will still be called for any
5766
// subsequent elements and the cache will still be emptied.
@@ -65,8 +74,8 @@ func (c *onEvictCache[K, V]) Flush() error {
6574
var errs wrappers.Errs
6675
iter := c.fifo.NewIterator()
6776
for iter.Next() {
68-
val := iter.Value()
69-
errs.Add(c.onEviction(val))
77+
errs.Add(c.onEviction(iter.Value()))
7078
}
79+
7180
return errs.Err
7281
}

x/merkledb/db.go

Lines changed: 53 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import (
1919

2020
"github.com/ava-labs/avalanchego/database"
2121
"github.com/ava-labs/avalanchego/database/prefixdb"
22-
"github.com/ava-labs/avalanchego/database/versiondb"
2322
"github.com/ava-labs/avalanchego/ids"
2423
"github.com/ava-labs/avalanchego/trace"
2524
"github.com/ava-labs/avalanchego/utils"
@@ -29,8 +28,8 @@ import (
2928
)
3029

3130
const (
32-
RootPath = EmptyPath
33-
31+
RootPath = EmptyPath
32+
evictionBatchSize = 100
3433
// TODO: name better
3534
rebuildViewSizeFractionOfCacheSize = 50
3635
minRebuildViewSizePerCommit = 1000
@@ -139,9 +138,7 @@ type merkleDB struct {
139138
// Should be held before taking [db.lock]
140139
commitLock sync.RWMutex
141140

142-
// versiondb that the other dbs are built on.
143-
// Allows the changes made to the snapshot and [nodeDB] to be atomic.
144-
nodeDB *versiondb.Database
141+
nodeDB database.Database
145142

146143
// Stores data about the database's current state.
147144
metadataDB database.Database
@@ -176,7 +173,7 @@ func newDatabase(
176173
) (*merkleDB, error) {
177174
trieDB := &merkleDB{
178175
metrics: metrics,
179-
nodeDB: versiondb.New(prefixdb.New(nodePrefix, db)),
176+
nodeDB: prefixdb.New(nodePrefix, db),
180177
metadataDB: prefixdb.New(metadataPrefix, db),
181178
history: newTrieHistory(config.HistoryLength),
182179
tracer: config.Tracer,
@@ -265,8 +262,7 @@ func (db *merkleDB) rebuild(ctx context.Context) error {
265262
return err
266263
}
267264
currentViewSize++
268-
}
269-
if err := db.nodeDB.Delete(key); err != nil {
265+
} else if err := db.nodeDB.Delete(key); err != nil {
270266
return err
271267
}
272268
}
@@ -354,10 +350,6 @@ func (db *merkleDB) Close() error {
354350
return err
355351
}
356352

357-
if err := db.nodeDB.Commit(); err != nil {
358-
return err
359-
}
360-
361353
// Successfully wrote intermediate nodes.
362354
return db.metadataDB.Put(cleanShutdownKey, hadCleanShutdown)
363355
}
@@ -749,23 +741,42 @@ func (db *merkleDB) NewIteratorWithStartAndPrefix(start, prefix []byte) database
749741
// the movement of [node] from [db.nodeCache] to [db.nodeDB] is atomic.
750742
// As soon as [db.nodeCache] no longer has [node], [db.nodeDB] does.
751743
// Non-nil error is fatal -- causes [db] to close.
752-
func (db *merkleDB) onEviction(node *node) error {
753-
if node == nil || node.hasValue() {
754-
// only persist intermediary nodes
744+
func (db *merkleDB) onEviction(n *node) error {
745+
// the evicted node isn't an intermediary node, so skip writing.
746+
if n == nil || n.hasValue() {
755747
return nil
756748
}
757749

758-
nodeBytes, err := node.marshal()
759-
if err != nil {
760-
db.onEvictionErr.Set(err)
761-
// Prevent reads/writes from/to [db.nodeDB] to avoid inconsistent state.
762-
_ = db.nodeDB.Close()
763-
// This is a fatal error.
764-
go db.Close()
750+
batch := db.nodeDB.NewBatch()
751+
if err := writeNodeToBatch(batch, n); err != nil {
765752
return err
766753
}
767754

768-
if err := db.nodeDB.Put(node.key.Bytes(), nodeBytes); err != nil {
755+
// Evict the oldest [evictionBatchSize] nodes from the cache
756+
// and write them to disk. We write a batch of them, rather than
757+
// just [n], so that we don't immediately evict and write another
758+
// node, because each time this method is called we do a disk write.
759+
var err error
760+
for removedCount := 0; removedCount < evictionBatchSize; removedCount++ {
761+
_, n, exists := db.nodeCache.removeOldest()
762+
if !exists {
763+
// The cache is empty.
764+
break
765+
}
766+
if n == nil || n.hasValue() {
767+
// only persist intermediary nodes
768+
continue
769+
}
770+
// Note this must be = not := since we check
771+
// [err] outside the loop.
772+
if err = writeNodeToBatch(batch, n); err != nil {
773+
break
774+
}
775+
}
776+
if err == nil {
777+
err = batch.Write()
778+
}
779+
if err != nil {
769780
db.onEvictionErr.Set(err)
770781
_ = db.nodeDB.Close()
771782
go db.Close()
@@ -774,6 +785,16 @@ func (db *merkleDB) onEviction(node *node) error {
774785
return nil
775786
}
776787

788+
// Writes [n] to [batch]. Assumes [n] is non-nil.
789+
func writeNodeToBatch(batch database.Batch, n *node) error {
790+
nodeBytes, err := n.marshal()
791+
if err != nil {
792+
return err
793+
}
794+
795+
return batch.Put(n.key.Bytes(), nodeBytes)
796+
}
797+
777798
// Put upserts the key/value pair into the db.
778799
func (db *merkleDB) Put(k, v []byte) error {
779800
return db.Insert(context.Background(), k, v)
@@ -859,19 +880,13 @@ func (db *merkleDB) commitChanges(ctx context.Context, trieToCommit *trieView) e
859880
return errNoNewRoot
860881
}
861882

862-
// commit any outstanding cache evicted nodes.
863-
// Note that we do this here because below we may Abort
864-
// [db.nodeDB], which would cause us to lose these changes.
865-
if err := db.nodeDB.Commit(); err != nil {
866-
return err
867-
}
883+
batch := db.nodeDB.NewBatch()
868884

869885
_, nodesSpan := db.tracer.Start(ctx, "MerkleDB.commitChanges.writeNodes")
870886
for key, nodeChange := range changes.nodes {
871887
if nodeChange.after == nil {
872888
db.metrics.IOKeyWrite()
873-
if err := db.nodeDB.Delete(key.Bytes()); err != nil {
874-
db.nodeDB.Abort()
889+
if err := batch.Delete(key.Bytes()); err != nil {
875890
nodesSpan.End()
876891
return err
877892
}
@@ -883,15 +898,7 @@ func (db *merkleDB) commitChanges(ctx context.Context, trieToCommit *trieView) e
883898
// Otherwise, intermediary nodes are persisted on cache eviction or
884899
// shutdown.
885900
db.metrics.IOKeyWrite()
886-
nodeBytes, err := nodeChange.after.marshal()
887-
if err != nil {
888-
db.nodeDB.Abort()
889-
nodesSpan.End()
890-
return err
891-
}
892-
893-
if err := db.nodeDB.Put(key.Bytes(), nodeBytes); err != nil {
894-
db.nodeDB.Abort()
901+
if err := writeNodeToBatch(batch, nodeChange.after); err != nil {
895902
nodesSpan.End()
896903
return err
897904
}
@@ -900,10 +907,9 @@ func (db *merkleDB) commitChanges(ctx context.Context, trieToCommit *trieView) e
900907
nodesSpan.End()
901908

902909
_, commitSpan := db.tracer.Start(ctx, "MerkleDB.commitChanges.dbCommit")
903-
err := db.nodeDB.Commit()
910+
err := batch.Write()
904911
commitSpan.End()
905912
if err != nil {
906-
db.nodeDB.Abort()
907913
return err
908914
}
909915

@@ -1122,11 +1128,13 @@ func (db *merkleDB) initializeRootIfNeeded() (ids.ID, error) {
11221128
if err != nil {
11231129
return ids.Empty, err
11241130
}
1125-
if err := db.nodeDB.Put(rootKey, rootBytes); err != nil {
1131+
1132+
batch := db.nodeDB.NewBatch()
1133+
if err := batch.Put(rootKey, rootBytes); err != nil {
11261134
return ids.Empty, err
11271135
}
11281136

1129-
return db.root.id, db.nodeDB.Commit()
1137+
return db.root.id, batch.Write()
11301138
}
11311139

11321140
// Returns a view of the trie as it was when it had root [rootID] for keys within range [start, end].

0 commit comments

Comments
 (0)