Skip to content
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
e562467
Shrink bytes stored in nodes
dboehm-avalabs Nov 16, 2023
6446181
Update db_test.go
dboehm-avalabs Nov 16, 2023
48d7a07
Update node.go
dboehm-avalabs Nov 16, 2023
ddace03
Update codec.go
dboehm-avalabs Nov 16, 2023
d66f8e7
reduce bytes
dboehm-avalabs Nov 16, 2023
2fa326e
Update trieview.go
dboehm-avalabs Nov 16, 2023
67e5f39
Merge branch 'ShrinkNodeStorage' into RemoveNodeBytes
dboehm-avalabs Nov 16, 2023
1e9e189
Update trieview.go
dboehm-avalabs Nov 16, 2023
603ca6c
Update db.go
dboehm-avalabs Nov 16, 2023
07bec1f
Merge branch 'ShrinkNodeStorage' into RemoveNodeBytes
dboehm-avalabs Nov 16, 2023
7908bb5
Merge branch 'dev' into ShrinkNodeStorage
Nov 22, 2023
b5a6dfa
nit
Nov 22, 2023
d6c0774
remove unused channel
Nov 22, 2023
4538d42
Update trieview.go
dboehm-avalabs Nov 22, 2023
f3461fa
Merge branch 'dev' into ShrinkNodeStorage
dboehm-avalabs Nov 27, 2023
39cc53b
Merge branch 'dev' into RemoveNodeBytes
dboehm-avalabs Nov 27, 2023
8462ead
Merge branch 'ShrinkNodeStorage' into RemoveNodeBytes
dboehm-avalabs Nov 27, 2023
a9ba3a1
Merge branch 'dev' into ShrinkNodeStorage
Nov 29, 2023
b640a5a
Merge branch 'ShrinkNodeStorage' into RemoveNodeBytes
dboehm-avalabs Nov 29, 2023
3b23a4c
Update db.go
dboehm-avalabs Nov 29, 2023
20df491
Update x/merkledb/codec.go
dboehm-avalabs Dec 12, 2023
2a3b29c
comments
dboehm-avalabs Dec 12, 2023
2a3f2a2
Merge branch 'dev' into RemoveNodeBytes
dboehm-avalabs Dec 12, 2023
4382155
Merge branch 'RemoveNodeBytes' of https://github.com/ava-labs/avalanc…
dboehm-avalabs Dec 12, 2023
9050a16
Merge branch 'dev' into RemoveNodeBytes
dboehm-avalabs Dec 12, 2023
278abec
Update codec.go
dboehm-avalabs Dec 12, 2023
2d14a85
Update codec.go
dboehm-avalabs Dec 12, 2023
66b4498
Merge branch 'dev' into RemoveNodeBytes
dboehm-avalabs Dec 13, 2023
ed026bc
Update codec_test.go
dboehm-avalabs Dec 13, 2023
1c1ced9
Update codec.go
dboehm-avalabs Dec 13, 2023
fd34f89
Update codec.go
dboehm-avalabs Dec 13, 2023
1f3363e
Update codec.go
dboehm-avalabs Dec 13, 2023
83892ed
Update codec.go
dboehm-avalabs Dec 13, 2023
20c6427
Merge branch 'dev' into RemoveNodeBytes
dboehm-avalabs Dec 14, 2023
de890c5
Merge branch 'dev' into RemoveNodeBytes
dboehm-avalabs Dec 14, 2023
40c9894
nits; add test
Dec 14, 2023
ebfba67
Merge branch 'RemoveNodeBytes' of github.com:ava-labs/avalanchego int…
Dec 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 38 additions & 15 deletions x/merkledb/codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,8 @@ const (
minDBNodeLen = minMaybeByteSliceLen + minVarIntLen
minChildLen = minVarIntLen + minKeyLen + ids.IDLen + boolLen

estimatedKeyLen = 64
estimatedValueLen = 64
estimatedCompressedKeyLen = 8
// Child index, child compressed key, child ID, child has value
estimatedNodeChildLen = minVarIntLen + estimatedCompressedKeyLen + ids.IDLen + boolLen
estimatedKeyLen = 64
estimatedValueLen = 64
// Child index, child ID
hashValuesChildLen = minVarIntLen + ids.IDLen
)
Expand Down Expand Up @@ -62,6 +59,7 @@ type encoderDecoder interface {
type encoder interface {
// Assumes [n] is non-nil.
encodeDBNode(n *dbNode) []byte
encodedDBNodeSize(n *dbNode) int

// Returns the bytes that will be hashed to generate [n]'s ID.
// Assumes [n] is non-nil.
Expand Down Expand Up @@ -91,16 +89,41 @@ type codecImpl struct {
varIntPool sync.Pool
}

func (c *codecImpl) encodeDBNode(n *dbNode) []byte {
var (
numChildren = len(n.children)
// Estimate size of [n] to prevent memory allocations
estimatedLen = estimatedValueLen + minVarIntLen + estimatedNodeChildLen*numChildren
buf = bytes.NewBuffer(make([]byte, 0, estimatedLen))
)
func (c *codecImpl) encodedDBNodeSize(n *dbNode) int {
// total the number of children pointers + bool indicating if it has a value + the value + the child entries for n.children
total := uintSize(uint64(len(n.children))) + boolSize() + len(n.value.Value())
// for each non-nil entry, we add the additional size of the child entry
for index, entry := range n.children {
total += childSize(index, entry)
}
return total
}

func childSize(index byte, childEntry *child) int {
return uintSize(uint64(index)) + len(ids.Empty) + keySize(childEntry.compressedKey) + boolSize()
}

func boolSize() int {
return 1
}

var log128 = math.Log(128)

func uintSize(value uint64) int {
if value == 0 {
return 1
}
return 1 + int(math.Log(float64(value))/log128)
}

func keySize(p Key) int {
return uintSize(uint64(p.length)) + bytesNeeded(p.length)
}

func (c *codecImpl) encodeDBNode(n *dbNode) []byte {
buf := bytes.NewBuffer(make([]byte, 0, c.encodedDBNodeSize(n)))
c.encodeMaybeByteSlice(buf, n.value)
c.encodeUint(buf, uint64(numChildren))
c.encodeUint(buf, uint64(len(n.children)))
// Note we insert children in order of increasing index
// for determinism.
keys := maps.Keys(n.children)
Expand Down Expand Up @@ -160,7 +183,7 @@ func (c *codecImpl) decodeDBNode(b []byte, n *dbNode) error {
return io.ErrUnexpectedEOF
}

n.children = make(map[byte]child, numChildren)
n.children = make(map[byte]*child, numChildren)
var previousChild uint64
for i := uint64(0); i < numChildren; i++ {
index, err := c.decodeUint(src)
Expand All @@ -184,7 +207,7 @@ func (c *codecImpl) decodeDBNode(b []byte, n *dbNode) error {
if err != nil {
return err
}
n.children[byte(index)] = child{
n.children[byte(index)] = &child{
compressedKey: compressedKey,
id: childID,
hasValue: hasValue,
Expand Down
8 changes: 4 additions & 4 deletions x/merkledb/codec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,15 +146,15 @@ func FuzzCodecDBNodeDeterministic(f *testing.F) {

numChildren := r.Intn(int(bf)) // #nosec G404

children := map[byte]child{}
children := map[byte]*child{}
for i := 0; i < numChildren; i++ {
var childID ids.ID
_, _ = r.Read(childID[:]) // #nosec G404

childKeyBytes := make([]byte, r.Intn(32)) // #nosec G404
_, _ = r.Read(childKeyBytes) // #nosec G404

children[byte(i)] = child{
children[byte(i)] = &child{
compressedKey: ToKey(childKeyBytes),
id: childID,
}
Expand Down Expand Up @@ -202,14 +202,14 @@ func FuzzEncodeHashValues(f *testing.F) {
for _, bf := range validBranchFactors { // Create a random node
r := rand.New(rand.NewSource(int64(randSeed))) // #nosec G404

children := map[byte]child{}
children := map[byte]*child{}
numChildren := r.Intn(int(bf)) // #nosec G404
for i := 0; i < numChildren; i++ {
compressedKeyLen := r.Intn(32) // #nosec G404
compressedKeyBytes := make([]byte, compressedKeyLen)
_, _ = r.Read(compressedKeyBytes) // #nosec G404

children[byte(i)] = child{
children[byte(i)] = &child{
compressedKey: ToKey(compressedKeyBytes),
id: ids.GenerateTestID(),
hasValue: r.Intn(2) == 1, // #nosec G404
Expand Down
61 changes: 30 additions & 31 deletions x/merkledb/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ type merkleDB struct {
// It is the node with a nil key and is the ancestor of all nodes in the trie.
// If it has a value or has multiple children, it is also the root of the trie.
sentinelNode *node
rootID ids.ID

// Valid children of this trie.
childViews []*trieView
Expand Down Expand Up @@ -260,14 +261,13 @@ func newDatabase(
tokenSize: BranchFactorToTokenSize[config.BranchFactor],
}

root, err := trieDB.initializeRootIfNeeded()
if err != nil {
if err := trieDB.initializeRoot(); err != nil {
return nil, err
}

// add current root to history (has no changes)
trieDB.history.record(&changeSummary{
rootID: root,
rootID: trieDB.rootID,
values: map[Key]*change[maybe.Maybe[[]byte]]{},
nodes: map[Key]*change[*node]{},
})
Expand Down Expand Up @@ -578,13 +578,7 @@ func (db *merkleDB) GetMerkleRoot(ctx context.Context) (ids.ID, error) {

// Assumes [db.lock] is read locked.
func (db *merkleDB) getMerkleRoot() ids.ID {
if !isSentinelNodeTheRoot(db.sentinelNode) {
// if the sentinel node should be skipped, the trie's root is the nil key node's only child
for _, childEntry := range db.sentinelNode.children {
return childEntry.id
}
}
return db.sentinelNode.id
return db.rootID
}

// isSentinelNodeTheRoot returns true if the passed in sentinel node has a value and or multiple child nodes
Expand Down Expand Up @@ -982,6 +976,7 @@ func (db *merkleDB) commitChanges(ctx context.Context, trieToCommit *trieView) e
// Only modify in-memory state after the commit succeeds
// so that we don't need to clean up on error.
db.sentinelNode = sentinelChange.after
db.rootID = changes.rootID
db.history.record(changes)
return nil
}
Expand Down Expand Up @@ -1161,34 +1156,38 @@ func (db *merkleDB) invalidateChildrenExcept(exception *trieView) {
}
}

func (db *merkleDB) initializeRootIfNeeded() (ids.ID, error) {
// not sure if the sentinel node exists or if it had a value
// check under both prefixes
func (db *merkleDB) initializeRoot() error {
// Not sure if the sentinel node exists or if it had a value,
// so check under both prefixes
var err error
db.sentinelNode, err = db.intermediateNodeDB.Get(Key{})

if errors.Is(err, database.ErrNotFound) {
// Didn't find the sentinel in the intermediateNodeDB, check the valueNodeDB
db.sentinelNode, err = db.valueNodeDB.Get(Key{})
}
if err == nil {
// sentinel node already exists, so calculate the root ID of the trie
db.sentinelNode.calculateID(db.metrics)
return db.getMerkleRoot(), nil
}
if !errors.Is(err, database.ErrNotFound) {
return ids.Empty, err
}

// sentinel node doesn't exist; make a new one.
db.sentinelNode = newNode(Key{})

// update its ID
db.sentinelNode.calculateID(db.metrics)
if err != nil {
if !errors.Is(err, database.ErrNotFound) {
return err
}

if err := db.intermediateNodeDB.Put(Key{}, db.sentinelNode); err != nil {
return ids.Empty, err
// Sentinel node doesn't exist in either database prefix.
// Make a new one and store it in the intermediateNodeDB
db.sentinelNode = newNode(Key{})
if err := db.intermediateNodeDB.Put(Key{}, db.sentinelNode); err != nil {
return err
}
}

return db.sentinelNode.id, nil
db.rootID = db.sentinelNode.calculateID(db.metrics)
if !isSentinelNodeTheRoot(db.sentinelNode) {
// If the sentinel node is not the root, the trie's root is the sentinel node's only child
for _, childEntry := range db.sentinelNode.children {
db.rootID = childEntry.id
}
}
return nil
}

// Returns a view of the trie as it was when it had root [rootID] for keys within range [start, end].
Expand Down Expand Up @@ -1289,7 +1288,7 @@ func (db *merkleDB) Clear() error {

// Clear root
db.sentinelNode = newNode(Key{})
db.sentinelNode.calculateID(db.metrics)
db.rootID = db.sentinelNode.calculateID(db.metrics)

// Clear history
db.history = newTrieHistory(db.history.maxHistoryLen)
Expand Down Expand Up @@ -1335,5 +1334,5 @@ func cacheEntrySize(key Key, n *node) int {
return len(key.Bytes())
}
// nodes cache their bytes representation so the total memory consumed is roughly twice that
return len(key.Bytes()) + 2*len(n.bytes())
return len(key.Bytes()) + codec.encodedDBNodeSize(&n.dbNode)
}
4 changes: 2 additions & 2 deletions x/merkledb/history_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -660,8 +660,8 @@ func TestHistoryGetChangesToRoot(t *testing.T) {
rootID: ids.GenerateTestID(),
nodes: map[Key]*change[*node]{
ToKey([]byte{byte(i)}): {
before: &node{id: ids.GenerateTestID()},
after: &node{id: ids.GenerateTestID()},
before: &node{},
after: &node{},
},
},
values: map[Key]*change[maybe.Maybe[[]byte]]{
Expand Down
55 changes: 19 additions & 36 deletions x/merkledb/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
package merkledb

import (
"golang.org/x/exp/maps"
"golang.org/x/exp/slices"

"github.com/ava-labs/avalanchego/ids"
Expand All @@ -17,7 +16,7 @@ const HashLength = 32
// Representation of a node stored in the database.
type dbNode struct {
value maybe.Maybe[[]byte]
children map[byte]child
children map[byte]*child
}

type child struct {
Expand All @@ -29,17 +28,15 @@ type child struct {
// node holds additional information on top of the dbNode that makes calculations easier to do
type node struct {
dbNode
id ids.ID
key Key
nodeBytes []byte
valueDigest maybe.Maybe[[]byte]
}

// Returns a new node with the given [key] and no value.
func newNode(key Key) *node {
return &node{
dbNode: dbNode{
children: make(map[byte]child, 2),
children: make(map[byte]*child, 2),
},
key: key,
}
Expand All @@ -52,9 +49,8 @@ func parseNode(key Key, nodeBytes []byte) (*node, error) {
return nil, err
}
result := &node{
dbNode: n,
key: key,
nodeBytes: nodeBytes,
dbNode: n,
key: key,
}

result.setValueDigest()
Expand All @@ -68,34 +64,18 @@ func (n *node) hasValue() bool {

// Returns the byte representation of this node.
func (n *node) bytes() []byte {
if n.nodeBytes == nil {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing this cached value means we'll have to reserialize the node before writing it. Are we concerned about the additional time that'll take? Seems to be a tradeoff between memory usage and CPU usage.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

anything we are writing is something we edited so this would have always been nil

n.nodeBytes = codec.encodeDBNode(&n.dbNode)
}

return n.nodeBytes
}

// clear the cached values that will need to be recalculated whenever the node changes
// for example, node ID and byte representation
func (n *node) onNodeChanged() {
n.id = ids.Empty
n.nodeBytes = nil
return codec.encodeDBNode(&n.dbNode)
}

// Returns and caches the ID of this node.
func (n *node) calculateID(metrics merkleMetrics) {
if n.id != ids.Empty {
return
}

func (n *node) calculateID(metrics merkleMetrics) ids.ID {
metrics.HashCalculated()
bytes := codec.encodeHashValues(n)
n.id = hashing.ComputeHash256Array(bytes)
return hashing.ComputeHash256Array(bytes)
}

// Set [n]'s value to [val].
func (n *node) setValue(val maybe.Maybe[[]byte]) {
n.onNodeChanged()
n.value = val
n.setValueDigest()
}
Expand All @@ -114,23 +94,20 @@ func (n *node) setValueDigest() {
func (n *node) addChild(childNode *node, tokenSize int) {
n.setChildEntry(
childNode.key.Token(n.key.length, tokenSize),
child{
&child{
compressedKey: childNode.key.Skip(n.key.length + tokenSize),
id: childNode.id,
hasValue: childNode.hasValue(),
},
)
}

// Adds a child to [n] without a reference to the child node.
func (n *node) setChildEntry(index byte, childEntry child) {
n.onNodeChanged()
func (n *node) setChildEntry(index byte, childEntry *child) {
n.children[index] = childEntry
}

// Removes [child] from [n]'s children.
func (n *node) removeChild(child *node, tokenSize int) {
n.onNodeChanged()
delete(n.children, child.key.Token(n.key.length, tokenSize))
}

Expand All @@ -139,16 +116,22 @@ func (n *node) removeChild(child *node, tokenSize int) {
// if this ever changes, value will need to be copied as well
// it is safe to clone all fields because they are only written/read while one or both of the db locks are held
func (n *node) clone() *node {
return &node{
id: n.id,
result := &node{
key: n.key,
dbNode: dbNode{
value: n.value,
children: maps.Clone(n.children),
children: make(map[byte]*child, len(n.children)),
},
valueDigest: n.valueDigest,
nodeBytes: n.nodeBytes,
}
for key, existing := range n.children {
result.children[key] = &child{
compressedKey: existing.compressedKey,
id: existing.id,
hasValue: existing.hasValue,
}
}
return result
}

// Returns the ProofNode representation of this node.
Expand Down
Loading