Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core, triedb/pathdb: calculate the size for batch pre-allocation #29106

Merged
merged 2 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions core/rawdb/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ var (
skeletonHeaderPrefix = []byte("S") // skeletonHeaderPrefix + num (uint64 big endian) -> header

// Path-based storage scheme of merkle patricia trie.
trieNodeAccountPrefix = []byte("A") // trieNodeAccountPrefix + hexPath -> trie node
trieNodeStoragePrefix = []byte("O") // trieNodeStoragePrefix + accountHash + hexPath -> trie node
TrieNodeAccountPrefix = []byte("A") // TrieNodeAccountPrefix + hexPath -> trie node
TrieNodeStoragePrefix = []byte("O") // TrieNodeStoragePrefix + accountHash + hexPath -> trie node
stateIDPrefix = []byte("L") // stateIDPrefix + state root -> state id

PreimagePrefix = []byte("secure-key-") // PreimagePrefix + hash -> preimage
Expand Down Expand Up @@ -265,15 +265,15 @@ func stateIDKey(root common.Hash) []byte {
return append(stateIDPrefix, root.Bytes()...)
}

// accountTrieNodeKey = trieNodeAccountPrefix + nodePath.
// accountTrieNodeKey = TrieNodeAccountPrefix + nodePath.
func accountTrieNodeKey(path []byte) []byte {
return append(trieNodeAccountPrefix, path...)
return append(TrieNodeAccountPrefix, path...)
}

// storageTrieNodeKey = trieNodeStoragePrefix + accountHash + nodePath.
// storageTrieNodeKey = TrieNodeStoragePrefix + accountHash + nodePath.
func storageTrieNodeKey(accountHash common.Hash, path []byte) []byte {
buf := make([]byte, len(trieNodeStoragePrefix)+common.HashLength+len(path))
n := copy(buf, trieNodeStoragePrefix)
buf := make([]byte, len(TrieNodeStoragePrefix)+common.HashLength+len(path))
n := copy(buf, TrieNodeStoragePrefix)
n += copy(buf[n:], accountHash.Bytes())
copy(buf[n:], path)
return buf
Expand All @@ -294,16 +294,16 @@ func IsLegacyTrieNode(key []byte, val []byte) bool {
// account trie node in path-based state scheme, and returns the resolved
// node path if so.
func ResolveAccountTrieNodeKey(key []byte) (bool, []byte) {
if !bytes.HasPrefix(key, trieNodeAccountPrefix) {
if !bytes.HasPrefix(key, TrieNodeAccountPrefix) {
return false, nil
}
// The remaining key should only consist a hex node path
// whose length is in the range 0 to 64 (64 is excluded
// since leaves are always wrapped with shortNode).
if len(key) >= len(trieNodeAccountPrefix)+common.HashLength*2 {
if len(key) >= len(TrieNodeAccountPrefix)+common.HashLength*2 {
return false, nil
}
return true, key[len(trieNodeAccountPrefix):]
return true, key[len(TrieNodeAccountPrefix):]
}

// IsAccountTrieNode reports whether a provided database entry is an account
Expand All @@ -317,20 +317,20 @@ func IsAccountTrieNode(key []byte) bool {
// trie node in path-based state scheme, and returns the resolved account hash
// and node path if so.
func ResolveStorageTrieNode(key []byte) (bool, common.Hash, []byte) {
if !bytes.HasPrefix(key, trieNodeStoragePrefix) {
if !bytes.HasPrefix(key, TrieNodeStoragePrefix) {
return false, common.Hash{}, nil
}
// The remaining key consists of 2 parts:
// - 32 bytes account hash
// - hex node path whose length is in the range 0 to 64
if len(key) < len(trieNodeStoragePrefix)+common.HashLength {
if len(key) < len(TrieNodeStoragePrefix)+common.HashLength {
return false, common.Hash{}, nil
}
if len(key) >= len(trieNodeStoragePrefix)+common.HashLength+common.HashLength*2 {
if len(key) >= len(TrieNodeStoragePrefix)+common.HashLength+common.HashLength*2 {
return false, common.Hash{}, nil
}
accountHash := common.BytesToHash(key[len(trieNodeStoragePrefix) : len(trieNodeStoragePrefix)+common.HashLength])
return true, accountHash, key[len(trieNodeStoragePrefix)+common.HashLength:]
accountHash := common.BytesToHash(key[len(TrieNodeStoragePrefix) : len(TrieNodeStoragePrefix)+common.HashLength])
return true, accountHash, key[len(TrieNodeStoragePrefix)+common.HashLength:]
}

// IsStorageTrieNode reports whether a provided database entry is a storage
Expand Down
15 changes: 14 additions & 1 deletion triedb/pathdb/nodebuffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,19 @@ func (b *nodebuffer) setSize(size int, db ethdb.KeyValueStore, clean *fastcache.
return b.flush(db, clean, id, false)
}

// allocBatch returns a database batch with pre-allocated buffer.
func (b *nodebuffer) allocBatch(db ethdb.KeyValueStore) ethdb.Batch {
var metasize int
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't you calculate it during the commit?

for path, n := range subset {
if orig, exist := current[path]; !exist {
delta += int64(len(n.Blob) + len(path))
} else {
delta += int64(len(n.Blob) - len(orig.Blob))
overwrite++
overwriteSize += int64(len(orig.Blob) + len(path))
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO it's cleaner to do it as late as possible instead of risking the counters going out of sync with the code. Also less code/fields to keep in mind.

for owner, nodes := range b.nodes {
if owner == (common.Hash{}) {
metasize += len(nodes) * len(rawdb.TrieNodeAccountPrefix) // database key prefix
} else {
metasize += len(nodes) * (len(rawdb.TrieNodeStoragePrefix) + common.HashLength) // database key prefix + owner
}
}
return db.NewBatchWithSize((metasize + int(b.size)) * 11 / 10) // extra 10% for potential pebble internal stuff
}

// flush persists the in-memory dirty trie node into the disk if the configured
// memory threshold is reached. Note, all data must be written atomically.
func (b *nodebuffer) flush(db ethdb.KeyValueStore, clean *fastcache.Cache, id uint64, force bool) error {
Expand All @@ -217,7 +230,7 @@ func (b *nodebuffer) flush(db ethdb.KeyValueStore, clean *fastcache.Cache, id ui
}
var (
start = time.Now()
batch = db.NewBatchWithSize(int(b.size))
batch = b.allocBatch(db)
)
nodes := writeNodes(batch, b.nodes, clean)
rawdb.WritePersistentStateID(batch, id)
Expand Down
Loading