Skip to content

Commit 2eabd22

Browse files
MerkleDB Improve Node Size Calculation (#1950)
1 parent 7faba53 commit 2eabd22

File tree

4 files changed

+39
-42
lines changed

4 files changed

+39
-42
lines changed

x/merkledb/db.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -1211,9 +1211,11 @@ func addPrefixToKey(bufferPool *sync.Pool, prefix []byte, key []byte) []byte {
12111211
return prefixedKey
12121212
}
12131213

1214+
// cacheEntrySize returns a rough approximation of the memory consumed by storing the path and node
12141215
func cacheEntrySize(p path, n *node) int {
12151216
if n == nil {
12161217
return len(p)
12171218
}
1218-
return len(p) + len(n.bytes())
1219+
// nodes cache their bytes representation so the total memory consumed is roughly twice that
1220+
return len(p) + 2*len(n.bytes())
12191221
}

x/merkledb/intermediate_node_db.go

+6-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ type intermediateNodeDB struct {
2727
// from the cache, which will call [OnEviction].
2828
// A non-nil error returned from Put is considered fatal.
2929
// Keys in [nodeCache] aren't prefixed with [intermediateNodePrefix].
30-
nodeCache onEvictCache[path, *node]
30+
nodeCache onEvictCache[path, *node]
31+
// the number of bytes to evict during an eviction batch
3132
evictionBatchSize int
3233
metrics merkleMetrics
3334
}
@@ -57,6 +58,7 @@ func newIntermediateNodeDB(
5758
func (db *intermediateNodeDB) onEviction(key path, n *node) error {
5859
writeBatch := db.baseDB.NewBatch()
5960

61+
totalSize := cacheEntrySize(key, n)
6062
if err := db.addToBatch(writeBatch, key, n); err != nil {
6163
_ = db.baseDB.Close()
6264
return err
@@ -66,12 +68,14 @@ func (db *intermediateNodeDB) onEviction(key path, n *node) error {
6668
// and write them to disk. We write a batch of them, rather than
6769
// just [n], so that we don't immediately evict and write another
6870
// node, because each time this method is called we do a disk write.
69-
for writeBatch.Size() < db.evictionBatchSize {
71+
// Evicts a total number of bytes, rather than a number of nodes
72+
for totalSize < db.evictionBatchSize {
7073
key, n, exists := db.nodeCache.removeOldest()
7174
if !exists {
7275
// The cache is empty.
7376
break
7477
}
78+
totalSize += cacheEntrySize(key, n)
7579
if err := db.addToBatch(writeBatch, key, n); err != nil {
7680
_ = db.baseDB.Close()
7781
return err

x/merkledb/intermediate_node_db_test.go

+27-35
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ import (
2323
func TestIntermediateNodeDB(t *testing.T) {
2424
require := require.New(t)
2525

26-
cacheSize := 100
26+
// use exact multiple of node size so require.Equal(1, db.nodeCache.fifo.Len()) is correct later
27+
cacheSize := 200
2728
evictionBatchSize := cacheSize
2829
baseDB := memdb.New()
2930
db := newIntermediateNodeDB(
@@ -37,49 +38,40 @@ func TestIntermediateNodeDB(t *testing.T) {
3738
)
3839

3940
// Put a key-node pair
40-
key := newPath([]byte{0x01})
41-
node1 := &node{
42-
dbNode: dbNode{
43-
value: maybe.Some([]byte{0x01}),
44-
},
45-
}
46-
require.NoError(db.Put(key, node1))
41+
node1Key := newPath([]byte{0x01})
42+
node1 := newNode(nil, node1Key)
43+
node1.setValue(maybe.Some([]byte{byte(0x01)}))
44+
require.NoError(db.Put(node1Key, node1))
4745

4846
// Get the key-node pair from cache
49-
node1Read, err := db.Get(key)
47+
node1Read, err := db.Get(node1Key)
5048
require.NoError(err)
5149
require.Equal(node1, node1Read)
5250

5351
// Overwrite the key-node pair
54-
node1Updated := &node{
55-
dbNode: dbNode{
56-
value: maybe.Some([]byte{0x02}),
57-
},
58-
}
59-
require.NoError(db.Put(key, node1Updated))
52+
node1Updated := newNode(nil, node1Key)
53+
node1Updated.setValue(maybe.Some([]byte{byte(0x02)}))
54+
require.NoError(db.Put(node1Key, node1Updated))
6055

6156
// Assert the key-node pair was overwritten
62-
node1Read, err = db.Get(key)
57+
node1Read, err = db.Get(node1Key)
6358
require.NoError(err)
6459
require.Equal(node1Updated, node1Read)
6560

6661
// Delete the key-node pair
67-
require.NoError(db.Delete(key))
68-
_, err = db.Get(key)
62+
require.NoError(db.Delete(node1Key))
63+
_, err = db.Get(node1Key)
6964

7065
// Assert the key-node pair was deleted
7166
require.Equal(database.ErrNotFound, err)
7267

7368
// Put elements in the cache until it is full.
74-
expectedSize := cacheEntrySize(key, nil)
69+
expectedSize := 0
7570
added := 0
7671
for {
7772
key := newPath([]byte{byte(added)})
78-
node := &node{
79-
dbNode: dbNode{
80-
value: maybe.Some([]byte{byte(added)}),
81-
},
82-
}
73+
node := newNode(nil, EmptyPath)
74+
node.setValue(maybe.Some([]byte{byte(added)}))
8375
newExpectedSize := expectedSize + cacheEntrySize(key, node)
8476
if newExpectedSize > cacheSize {
8577
// Don't trigger eviction.
@@ -97,24 +89,24 @@ func TestIntermediateNodeDB(t *testing.T) {
9789
// Put one more element in the cache, which should trigger an eviction
9890
// of all but 2 elements. 2 elements remain rather than 1 element because of
9991
// the added key prefix increasing the size tracked by the batch.
100-
key = newPath([]byte{byte(cacheSize)})
101-
node := &node{
102-
dbNode: dbNode{
103-
value: maybe.Some([]byte{byte(cacheSize)}),
104-
},
105-
}
92+
key := newPath([]byte{byte(added)})
93+
node := newNode(nil, EmptyPath)
94+
node.setValue(maybe.Some([]byte{byte(added)}))
10695
require.NoError(db.Put(key, node))
10796

10897
// Assert cache has expected number of elements
109-
require.Equal(2, db.nodeCache.fifo.Len())
98+
require.Equal(1, db.nodeCache.fifo.Len())
11099
gotKey, _, ok := db.nodeCache.fifo.Oldest()
111100
require.True(ok)
112-
require.Equal(newPath([]byte{byte(added - 1)}), gotKey)
101+
require.Equal(newPath([]byte{byte(added)}), gotKey)
113102

114-
// Get a node from the base database (not cache)
115-
nodeRead, err := db.Get(newPath([]byte{0x03}))
103+
// Get a node from the base database
104+
// Use an early key that has been evicted from the cache
105+
_, inCache := db.nodeCache.Get(node1Key)
106+
require.False(inCache)
107+
nodeRead, err := db.Get(node1Key)
116108
require.NoError(err)
117-
require.Equal(maybe.Some([]byte{0x03}), nodeRead.value)
109+
require.Equal(maybe.Some([]byte{0x01}), nodeRead.value)
118110

119111
// Flush the cache.
120112
require.NoError(db.Flush())

x/merkledb/node.go

+3-4
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ type child struct {
3636
hasValue bool
3737
}
3838

39-
// node holds additional information on top of the dbNode that makes calulcations easier to do
39+
// node holds additional information on top of the dbNode that makes calculations easier to do
4040
type node struct {
4141
dbNode
4242
id ids.ID
@@ -159,11 +159,9 @@ func (n *node) removeChild(child *node) {
159159
}
160160

161161
// clone Returns a copy of [n].
162-
// nodeBytes is intentionally not included because it can cause a race.
163-
// nodes being evicted by the cache can write nodeBytes,
164-
// so reading them during the cloning would be a data race.
165162
// Note: value isn't cloned because it is never edited, only overwritten
166163
// if this ever changes, value will need to be copied as well
164+
// it is safe to clone all fields because they are only written/read while one or both of the db locks are held
167165
func (n *node) clone() *node {
168166
return &node{
169167
id: n.id,
@@ -173,6 +171,7 @@ func (n *node) clone() *node {
173171
children: maps.Clone(n.children),
174172
},
175173
valueDigest: n.valueDigest,
174+
nodeBytes: n.nodeBytes,
176175
}
177176
}
178177

0 commit comments

Comments
 (0)