Skip to content

eth,core: add a state size live tracer #31914

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
20 changes: 19 additions & 1 deletion core/blockchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -1539,15 +1539,33 @@ func (bc *BlockChain) writeBlockWithState(block *types.Block, receipts []*types.
log.Crit("Failed to write block into disk", "err", err)
}
// Commit all cached state changes into underlying memory database.
root, err := statedb.Commit(block.NumberU64(), bc.chainConfig.IsEIP158(block.Number()), bc.chainConfig.IsCancun(block.Number(), block.Time()))
update, err := statedb.CommitWithUpdate(block.NumberU64(), bc.chainConfig.IsEIP158(block.Number()), bc.chainConfig.IsCancun(block.Number(), block.Time()))
if err != nil {
return err
}
// Tracing the state changes if the logger is enabled.
if bc.logger != nil && bc.logger.OnStateCommit != nil {
sc := update.IntoChangeset()
bc.logger.OnStateCommit(&tracing.StateUpdate{
Number: block.NumberU64(),
Hash: block.Hash(),
Time: block.Time(),
Accounts: int64(sc.Accounts),
AccountSize: int64(sc.AccountSize),
Storages: int64(sc.Storages),
StorageSize: int64(sc.StorageSize),
Trienodes: int64(sc.Trienodes),
TrienodeSize: int64(sc.TrienodeSize),
Codes: int64(sc.Codes),
CodeSize: int64(sc.CodeSize),
})
}
// If node is running in path mode, skip explicit gc operation
// which is unnecessary in this mode.
if bc.triedb.Scheme() == rawdb.PathScheme {
return nil
}
root := update.Root
// If we're running an archive node, always flush
if bc.cacheConfig.TrieDirtyDisabled {
return bc.triedb.Commit(root, false)
Expand Down
36 changes: 23 additions & 13 deletions core/state/statedb.go
Original file line number Diff line number Diff line change
Expand Up @@ -956,7 +956,7 @@ func (s *StateDB) fastDeleteStorage(snaps *snapshot.Tree, addrHash common.Hash,
storageOrigins = make(map[common.Hash][]byte) // the set for tracking the original value of slot
)
stack := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
nodes.AddNode(path, trienode.NewDeleted())
nodes.AddNode(path, trienode.NewDeleted(len(blob)))
})
for iter.Next() {
slot := common.CopyBytes(iter.Slot())
Expand Down Expand Up @@ -1007,7 +1007,7 @@ func (s *StateDB) slowDeleteStorage(addr common.Address, addrHash common.Hash, r
if it.Hash() == (common.Hash{}) {
continue
}
nodes.AddNode(it.Path(), trienode.NewDeleted())
nodes.AddNode(it.Path(), trienode.NewDeleted(len(it.NodeBlob())))
}
if err := it.Error(); err != nil {
return nil, nil, nil, err
Expand Down Expand Up @@ -1113,7 +1113,7 @@ func (s *StateDB) GetTrie() Trie {

// commit gathers the state mutations accumulated along with the associated
// trie changes, resetting all internal flags with the new state as the base.
func (s *StateDB) commit(deleteEmptyObjects bool, noStorageWiping bool) (*stateUpdate, error) {
func (s *StateDB) commit(deleteEmptyObjects bool, noStorageWiping bool) (*StateUpdate, error) {
// Short circuit in case any database failure occurred earlier.
if s.dbErr != nil {
return nil, fmt.Errorf("commit aborted due to earlier error: %v", s.dbErr)
Expand Down Expand Up @@ -1270,15 +1270,15 @@ func (s *StateDB) commit(deleteEmptyObjects bool, noStorageWiping bool) (*stateU

// commitAndFlush is a wrapper of commit which also commits the state mutations
// to the configured data stores.
func (s *StateDB) commitAndFlush(block uint64, deleteEmptyObjects bool, noStorageWiping bool) (*stateUpdate, error) {
func (s *StateDB) commitAndFlush(block uint64, deleteEmptyObjects bool, noStorageWiping bool) (*StateUpdate, error) {
ret, err := s.commit(deleteEmptyObjects, noStorageWiping)
if err != nil {
return nil, err
}
// Commit dirty contract code if any exists
if db := s.db.TrieDB().Disk(); db != nil && len(ret.codes) > 0 {
if db := s.db.TrieDB().Disk(); db != nil && len(ret.Codes) > 0 {
batch := db.NewBatch()
for _, code := range ret.codes {
for _, code := range ret.Codes {
rawdb.WriteCode(batch, code.hash, code.blob)
}
if err := batch.Write(); err != nil {
Expand All @@ -1287,24 +1287,24 @@ func (s *StateDB) commitAndFlush(block uint64, deleteEmptyObjects bool, noStorag
}
if !ret.empty() {
// If snapshotting is enabled, update the snapshot tree with this new version
if snap := s.db.Snapshot(); snap != nil && snap.Snapshot(ret.originRoot) != nil {
if snap := s.db.Snapshot(); snap != nil && snap.Snapshot(ret.OriginRoot) != nil {
start := time.Now()
if err := snap.Update(ret.root, ret.originRoot, ret.accounts, ret.storages); err != nil {
log.Warn("Failed to update snapshot tree", "from", ret.originRoot, "to", ret.root, "err", err)
if err := snap.Update(ret.Root, ret.OriginRoot, ret.Accounts, ret.Storages); err != nil {
log.Warn("Failed to update snapshot tree", "from", ret.OriginRoot, "to", ret.Root, "err", err)
}
// Keep 128 diff layers in the memory, persistent layer is 129th.
// - head layer is paired with HEAD state
// - head-1 layer is paired with HEAD-1 state
// - head-127 layer(bottom-most diff layer) is paired with HEAD-127 state
if err := snap.Cap(ret.root, TriesInMemory); err != nil {
log.Warn("Failed to cap snapshot tree", "root", ret.root, "layers", TriesInMemory, "err", err)
if err := snap.Cap(ret.Root, TriesInMemory); err != nil {
log.Warn("Failed to cap snapshot tree", "root", ret.Root, "layers", TriesInMemory, "err", err)
}
s.SnapshotCommits += time.Since(start)
}
// If trie database is enabled, commit the state update as a new layer
if db := s.db.TrieDB(); db != nil {
start := time.Now()
if err := db.Update(ret.root, ret.originRoot, block, ret.nodes, ret.stateSet()); err != nil {
if err := db.Update(ret.Root, ret.OriginRoot, block, ret.Nodes, ret.stateSet()); err != nil {
return nil, err
}
s.TrieDBCommits += time.Since(start)
Expand Down Expand Up @@ -1333,7 +1333,17 @@ func (s *StateDB) Commit(block uint64, deleteEmptyObjects bool, noStorageWiping
if err != nil {
return common.Hash{}, err
}
return ret.root, nil
return ret.Root, nil
}

// CommitWithUpdate is similar to Commit, commits the state mutations,
// it returns the state update instead of the root hash.
func (s *StateDB) CommitWithUpdate(block uint64, deleteEmptyObjects bool, noStorageWiping bool) (*StateUpdate, error) {
ret, err := s.commitAndFlush(block, deleteEmptyObjects, noStorageWiping)
if err != nil {
return nil, err
}
return ret, nil
}

// Prepare handles the preparatory steps for executing a state transition with.
Expand Down
12 changes: 6 additions & 6 deletions core/state/statedb_fuzz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,11 +182,11 @@ func (test *stateTest) run() bool {
accountOrigin []map[common.Address][]byte
storages []map[common.Hash]map[common.Hash][]byte
storageOrigin []map[common.Address]map[common.Hash][]byte
copyUpdate = func(update *stateUpdate) {
accounts = append(accounts, maps.Clone(update.accounts))
accountOrigin = append(accountOrigin, maps.Clone(update.accountsOrigin))
storages = append(storages, maps.Clone(update.storages))
storageOrigin = append(storageOrigin, maps.Clone(update.storagesOrigin))
copyUpdate = func(update *StateUpdate) {
accounts = append(accounts, maps.Clone(update.Accounts))
accountOrigin = append(accountOrigin, maps.Clone(update.AccountsOrigin))
storages = append(storages, maps.Clone(update.Storages))
storageOrigin = append(storageOrigin, maps.Clone(update.StoragesOrigin))
}
disk = rawdb.NewMemoryDatabase()
tdb = triedb.NewDatabase(disk, &triedb.Config{PathDB: pathdb.Defaults})
Expand Down Expand Up @@ -236,7 +236,7 @@ func (test *stateTest) run() bool {
return true
}
copyUpdate(ret)
roots = append(roots, ret.root)
roots = append(roots, ret.Root)
}
for i := 0; i < len(test.actions); i++ {
root := types.EmptyRootHash
Expand Down
169 changes: 137 additions & 32 deletions core/state/stateupdate.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
"maps"

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/trie/trienode"
"github.com/ethereum/go-ethereum/triedb"
)
Expand All @@ -30,6 +32,11 @@ type contractCode struct {
blob []byte // blob is the binary representation of the contract code.
}

// CodeLen returns the length of the contract code blob.
func (c *contractCode) CodeLen() int {
return len(c.blob)
}

// accountDelete represents an operation for deleting an Ethereum account.
type accountDelete struct {
address common.Address // address is the unique account identifier
Expand Down Expand Up @@ -60,33 +67,33 @@ type accountUpdate struct {
storagesOriginByHash map[common.Hash][]byte
}

// stateUpdate represents the difference between two states resulting from state
// StateUpdate represents the difference between two states resulting from state
// execution. It contains information about mutated contract codes, accounts,
// and storage slots, along with their original values.
type stateUpdate struct {
originRoot common.Hash // hash of the state before applying mutation
root common.Hash // hash of the state after applying mutation
accounts map[common.Hash][]byte // accounts stores mutated accounts in 'slim RLP' encoding
accountsOrigin map[common.Address][]byte // accountsOrigin stores the original values of mutated accounts in 'slim RLP' encoding
type StateUpdate struct {
OriginRoot common.Hash // hash of the state before applying mutation
Root common.Hash // hash of the state after applying mutation
Accounts map[common.Hash][]byte // accounts stores mutated accounts in 'slim RLP' encoding
AccountsOrigin map[common.Address][]byte // accountsOrigin stores the original values of mutated accounts in 'slim RLP' encoding

// storages stores mutated slots in 'prefix-zero-trimmed' RLP format.
// Storages stores mutated slots in 'prefix-zero-trimmed' RLP format.
// The value is keyed by account hash and **storage slot key hash**.
storages map[common.Hash]map[common.Hash][]byte
Storages map[common.Hash]map[common.Hash][]byte

// storagesOrigin stores the original values of mutated slots in
// StoragesOrigin stores the original values of mutated slots in
// 'prefix-zero-trimmed' RLP format.
// (a) the value is keyed by account hash and **storage slot key** if rawStorageKey is true;
// (b) the value is keyed by account hash and **storage slot key hash** if rawStorageKey is false;
storagesOrigin map[common.Address]map[common.Hash][]byte
rawStorageKey bool
StoragesOrigin map[common.Address]map[common.Hash][]byte
RawStorageKey bool

codes map[common.Address]contractCode // codes contains the set of dirty codes
nodes *trienode.MergedNodeSet // Aggregated dirty nodes caused by state changes
Codes map[common.Address]contractCode // codes contains the set of dirty codes
Nodes *trienode.MergedNodeSet // Aggregated dirty nodes caused by state changes
}

// empty returns a flag indicating the state transition is empty or not.
func (sc *stateUpdate) empty() bool {
return sc.originRoot == sc.root
func (sc *StateUpdate) empty() bool {
return sc.OriginRoot == sc.Root
}

// newStateUpdate constructs a state update object by identifying the differences
Expand All @@ -95,7 +102,7 @@ func (sc *stateUpdate) empty() bool {
//
// rawStorageKey is a flag indicating whether to use the raw storage slot key or
// the hash of the slot key for constructing state update object.
func newStateUpdate(rawStorageKey bool, originRoot common.Hash, root common.Hash, deletes map[common.Hash]*accountDelete, updates map[common.Hash]*accountUpdate, nodes *trienode.MergedNodeSet) *stateUpdate {
func newStateUpdate(rawStorageKey bool, originRoot common.Hash, root common.Hash, deletes map[common.Hash]*accountDelete, updates map[common.Hash]*accountUpdate, nodes *trienode.MergedNodeSet) *StateUpdate {
var (
accounts = make(map[common.Hash][]byte)
accountsOrigin = make(map[common.Address][]byte)
Expand Down Expand Up @@ -161,29 +168,127 @@ func newStateUpdate(rawStorageKey bool, originRoot common.Hash, root common.Hash
}
}
}
return &stateUpdate{
originRoot: originRoot,
root: root,
accounts: accounts,
accountsOrigin: accountsOrigin,
storages: storages,
storagesOrigin: storagesOrigin,
rawStorageKey: rawStorageKey,
codes: codes,
nodes: nodes,
return &StateUpdate{
OriginRoot: originRoot,
Root: root,
Accounts: accounts,
AccountsOrigin: accountsOrigin,
Storages: storages,
StoragesOrigin: storagesOrigin,
RawStorageKey: rawStorageKey,
Codes: codes,
Nodes: nodes,
}
}

// stateSet converts the current stateUpdate object into a triedb.StateSet
// object. This function extracts the necessary data from the stateUpdate
// struct and formats it into the StateSet structure consumed by the triedb
// package.
func (sc *stateUpdate) stateSet() *triedb.StateSet {
func (sc *StateUpdate) stateSet() *triedb.StateSet {
return &triedb.StateSet{
Accounts: sc.accounts,
AccountsOrigin: sc.accountsOrigin,
Storages: sc.storages,
StoragesOrigin: sc.storagesOrigin,
RawStorageKey: sc.rawStorageKey,
Accounts: sc.Accounts,
AccountsOrigin: sc.AccountsOrigin,
Storages: sc.Storages,
StoragesOrigin: sc.StoragesOrigin,
RawStorageKey: sc.RawStorageKey,
}
}

// StateChangeset represents a state mutations that occurred during the execution of a block.
type StateChangeset struct {
Accounts int // Total number of accounts present in the state at this block
Storages int // Total number of storage entries across all accounts in the state at this block
Trienodes int // Total number of trie nodes present in the state at this block
Codes int // Total number of contract codes present in the state at this block, with 32 bytes hash as the identifier
AccountSize int // Combined size of all accounts in the state, with 20 bytes address as the identifier
StorageSize int // Combined size of all storage entries, with 32 bytes key as the identifier
TrienodeSize int // Combined size of all trie nodes, with varying size node path as the identifier (up to 64 bytes)
CodeSize int // Combined size of all contract codes in the state, with 20 bytes address as the identifier
}

// IntoChangeset converts the current StateUpdate into a StateChangeset.
func (sc *StateUpdate) IntoChangeset() *StateChangeset {
var (
accountSize, storageSize, nodeSize, codeSize int
accounts, storages, nodes, codes int
)

for addr, oldValue := range sc.AccountsOrigin {
addrHash := crypto.Keccak256Hash(addr.Bytes())
newValue, exists := sc.Accounts[addrHash]
if !exists {
log.Warn("State update missing account", "address", addr)
continue
}
if len(newValue) == 0 {
accounts -= 1
accountSize -= common.AddressLength
}
if len(oldValue) == 0 {
accounts += 1
accountSize += common.AddressLength
}
accountSize += len(newValue) - len(oldValue)
}
for addr, slots := range sc.StoragesOrigin {
addrHash := crypto.Keccak256Hash(addr.Bytes())
subset, exists := sc.Storages[addrHash]
if !exists {
log.Warn("State update missing storage", "address", addr)
continue
}
for key, oldValue := range slots {
var (
exists bool
newValue []byte
)
if sc.RawStorageKey {
newValue, exists = subset[crypto.Keccak256Hash(key.Bytes())]
} else {
newValue, exists = subset[key]
}
if !exists {
log.Warn("State update missing storage slot", "address", addr, "key", key)
continue
}
if len(newValue) == 0 {
storages -= 1
storageSize -= common.HashLength
}
if len(oldValue) == 0 {
storages += 1
storageSize += common.HashLength
}
storageSize += len(newValue) - len(oldValue)
}
}
for _, subset := range sc.Nodes.Sets {
for path, n := range subset.Nodes {
if len(n.Blob) == 0 {
nodes -= 1
nodeSize -= len(path) + common.HashLength
}
if n.OriginLen() == 0 {
nodes += 1
nodeSize += len(path) + common.HashLength
}
nodeSize += len(n.Blob) - n.OriginLen()
}
}
for _, code := range sc.Codes {
codes += 1
codeSize += code.CodeLen() + common.HashLength // no deduplication
}

return &StateChangeset{
Accounts: accounts,
AccountSize: accountSize,
Storages: storages,
StorageSize: storageSize,
Trienodes: nodes,
TrienodeSize: nodeSize,
Codes: codes,
CodeSize: codeSize,
}
}
Loading