Skip to content

MerkleDB Remove ID from Node to reduce size and removal channel creation. #2324

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Nov 29, 2023
4 changes: 2 additions & 2 deletions x/merkledb/codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ func (c *codecImpl) decodeDBNode(b []byte, n *dbNode) error {
return io.ErrUnexpectedEOF
}

n.children = make(map[byte]child, numChildren)
n.children = make(map[byte]*child, numChildren)
var previousChild uint64
for i := uint64(0); i < numChildren; i++ {
index, err := c.decodeUint(src)
Expand All @@ -184,7 +184,7 @@ func (c *codecImpl) decodeDBNode(b []byte, n *dbNode) error {
if err != nil {
return err
}
n.children[byte(index)] = child{
n.children[byte(index)] = &child{
compressedKey: compressedKey,
id: childID,
hasValue: hasValue,
Expand Down
8 changes: 4 additions & 4 deletions x/merkledb/codec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,15 +146,15 @@ func FuzzCodecDBNodeDeterministic(f *testing.F) {

numChildren := r.Intn(int(bf)) // #nosec G404

children := map[byte]child{}
children := map[byte]*child{}
for i := 0; i < numChildren; i++ {
var childID ids.ID
_, _ = r.Read(childID[:]) // #nosec G404

childKeyBytes := make([]byte, r.Intn(32)) // #nosec G404
_, _ = r.Read(childKeyBytes) // #nosec G404

children[byte(i)] = child{
children[byte(i)] = &child{
compressedKey: ToKey(childKeyBytes),
id: childID,
}
Expand Down Expand Up @@ -202,14 +202,14 @@ func FuzzEncodeHashValues(f *testing.F) {
for _, bf := range validBranchFactors { // Create a random node
r := rand.New(rand.NewSource(int64(randSeed))) // #nosec G404

children := map[byte]child{}
children := map[byte]*child{}
numChildren := r.Intn(int(bf)) // #nosec G404
for i := 0; i < numChildren; i++ {
compressedKeyLen := r.Intn(32) // #nosec G404
compressedKeyBytes := make([]byte, compressedKeyLen)
_, _ = r.Read(compressedKeyBytes) // #nosec G404

children[byte(i)] = child{
children[byte(i)] = &child{
compressedKey: ToKey(compressedKeyBytes),
id: ids.GenerateTestID(),
hasValue: r.Intn(2) == 1, // #nosec G404
Expand Down
59 changes: 29 additions & 30 deletions x/merkledb/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ type merkleDB struct {
// It is the node with a nil key and is the ancestor of all nodes in the trie.
// If it has a value or has multiple children, it is also the root of the trie.
sentinelNode *node
rootID ids.ID

// Valid children of this trie.
childViews []*trieView
Expand Down Expand Up @@ -260,14 +261,13 @@ func newDatabase(
tokenSize: BranchFactorToTokenSize[config.BranchFactor],
}

root, err := trieDB.initializeRootIfNeeded()
if err != nil {
if err := trieDB.initializeRoot(); err != nil {
return nil, err
}

// add current root to history (has no changes)
trieDB.history.record(&changeSummary{
rootID: root,
rootID: trieDB.rootID,
values: map[Key]*change[maybe.Maybe[[]byte]]{},
nodes: map[Key]*change[*node]{},
})
Expand Down Expand Up @@ -578,13 +578,7 @@ func (db *merkleDB) GetMerkleRoot(ctx context.Context) (ids.ID, error) {

// Assumes [db.lock] is read locked.
func (db *merkleDB) getMerkleRoot() ids.ID {
if !isSentinelNodeTheRoot(db.sentinelNode) {
// if the sentinel node should be skipped, the trie's root is the nil key node's only child
for _, childEntry := range db.sentinelNode.children {
return childEntry.id
}
}
return db.sentinelNode.id
return db.rootID
}

// isSentinelNodeTheRoot returns true if the passed in sentinel node has a value and or multiple child nodes
Expand Down Expand Up @@ -982,6 +976,7 @@ func (db *merkleDB) commitChanges(ctx context.Context, trieToCommit *trieView) e
// Only modify in-memory state after the commit succeeds
// so that we don't need to clean up on error.
db.sentinelNode = sentinelChange.after
db.rootID = changes.rootID
db.history.record(changes)
return nil
}
Expand Down Expand Up @@ -1161,34 +1156,38 @@ func (db *merkleDB) invalidateChildrenExcept(exception *trieView) {
}
}

func (db *merkleDB) initializeRootIfNeeded() (ids.ID, error) {
// not sure if the sentinel node exists or if it had a value
// check under both prefixes
func (db *merkleDB) initializeRoot() error {
// Not sure if the sentinel node exists or if it had a value,
// so check under both prefixes
var err error
db.sentinelNode, err = db.intermediateNodeDB.Get(Key{})

if errors.Is(err, database.ErrNotFound) {
// Didn't find the sentinel in the intermediateNodeDB, check the valueNodeDB
db.sentinelNode, err = db.valueNodeDB.Get(Key{})
}
if err == nil {
// sentinel node already exists, so calculate the root ID of the trie
db.sentinelNode.calculateID(db.metrics)
return db.getMerkleRoot(), nil
}
if !errors.Is(err, database.ErrNotFound) {
return ids.Empty, err
}

// sentinel node doesn't exist; make a new one.
db.sentinelNode = newNode(Key{})

// update its ID
db.sentinelNode.calculateID(db.metrics)
if err != nil {
if !errors.Is(err, database.ErrNotFound) {
return err
}

if err := db.intermediateNodeDB.Put(Key{}, db.sentinelNode); err != nil {
return ids.Empty, err
// Sentinel node doesn't exist in either database prefix.
// Make a new one and store it in the intermediateNodeDB
db.sentinelNode = newNode(Key{})
if err := db.intermediateNodeDB.Put(Key{}, db.sentinelNode); err != nil {
return err
}
}

return db.sentinelNode.id, nil
db.rootID = db.sentinelNode.calculateID(db.metrics)
if !isSentinelNodeTheRoot(db.sentinelNode) {
// If the sentinel node is not the root, the trie's root is the sentinel node's only child
for _, childEntry := range db.sentinelNode.children {
db.rootID = childEntry.id
}
}
return nil
}

// Returns a view of the trie as it was when it had root [rootID] for keys within range [start, end].
Expand Down Expand Up @@ -1289,7 +1288,7 @@ func (db *merkleDB) Clear() error {

// Clear root
db.sentinelNode = newNode(Key{})
db.sentinelNode.calculateID(db.metrics)
db.rootID = db.sentinelNode.calculateID(db.metrics)

// Clear history
db.history = newTrieHistory(db.history.maxHistoryLen)
Expand Down
4 changes: 2 additions & 2 deletions x/merkledb/history_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -660,8 +660,8 @@ func TestHistoryGetChangesToRoot(t *testing.T) {
rootID: ids.GenerateTestID(),
nodes: map[Key]*change[*node]{
ToKey([]byte{byte(i)}): {
before: &node{id: ids.GenerateTestID()},
after: &node{id: ids.GenerateTestID()},
before: &node{},
after: &node{},
},
},
values: map[Key]*change[maybe.Maybe[[]byte]]{
Expand Down
33 changes: 16 additions & 17 deletions x/merkledb/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
package merkledb

import (
"golang.org/x/exp/maps"
"golang.org/x/exp/slices"

"github.com/ava-labs/avalanchego/ids"
Expand All @@ -17,7 +16,7 @@ const HashLength = 32
// Representation of a node stored in the database.
type dbNode struct {
value maybe.Maybe[[]byte]
children map[byte]child
children map[byte]*child
}

type child struct {
Expand All @@ -29,7 +28,6 @@ type child struct {
// node holds additional information on top of the dbNode that makes calculations easier to do
type node struct {
dbNode
id ids.ID
key Key
nodeBytes []byte
valueDigest maybe.Maybe[[]byte]
Expand All @@ -39,7 +37,7 @@ type node struct {
func newNode(key Key) *node {
return &node{
dbNode: dbNode{
children: make(map[byte]child, 2),
children: make(map[byte]*child, 2),
},
key: key,
}
Expand Down Expand Up @@ -78,19 +76,14 @@ func (n *node) bytes() []byte {
// clear the cached values that will need to be recalculated whenever the node changes
// for example, node ID and byte representation
func (n *node) onNodeChanged() {
n.id = ids.Empty
n.nodeBytes = nil
}

// Returns and caches the ID of this node.
func (n *node) calculateID(metrics merkleMetrics) {
if n.id != ids.Empty {
return
}

func (n *node) calculateID(metrics merkleMetrics) ids.ID {
metrics.HashCalculated()
bytes := codec.encodeHashValues(n)
n.id = hashing.ComputeHash256Array(bytes)
return hashing.ComputeHash256Array(bytes)
}

// Set [n]'s value to [val].
Expand All @@ -114,16 +107,15 @@ func (n *node) setValueDigest() {
func (n *node) addChild(childNode *node, tokenSize int) {
n.setChildEntry(
childNode.key.Token(n.key.length, tokenSize),
child{
&child{
compressedKey: childNode.key.Skip(n.key.length + tokenSize),
id: childNode.id,
hasValue: childNode.hasValue(),
},
)
}

// Adds a child to [n] without a reference to the child node.
func (n *node) setChildEntry(index byte, childEntry child) {
func (n *node) setChildEntry(index byte, childEntry *child) {
n.onNodeChanged()
n.children[index] = childEntry
}
Expand All @@ -139,16 +131,23 @@ func (n *node) removeChild(child *node, tokenSize int) {
// if this ever changes, value will need to be copied as well
// it is safe to clone all fields because they are only written/read while one or both of the db locks are held
func (n *node) clone() *node {
return &node{
id: n.id,
result := &node{
key: n.key,
dbNode: dbNode{
value: n.value,
children: maps.Clone(n.children),
children: make(map[byte]*child, len(n.children)),
},
valueDigest: n.valueDigest,
nodeBytes: n.nodeBytes,
}
for key, existing := range n.children {
result.children[key] = &child{
compressedKey: existing.compressedKey,
id: existing.id,
hasValue: existing.hasValue,
}
}
return result
}

// Returns the ProofNode representation of this node.
Expand Down
2 changes: 1 addition & 1 deletion x/merkledb/proof.go
Original file line number Diff line number Diff line change
Expand Up @@ -847,7 +847,7 @@ func addPathInfo(
// We only need the IDs to be correct so that the calculated hash is correct.
n.setChildEntry(
index,
child{
&child{
id: childID,
compressedKey: compressedKey,
})
Expand Down
Loading