From db9487f1e8f924d118cf54040e4cff370fd60b3b Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:11 +0800 Subject: [PATCH] core: split out detailed trie access metrics from insertion time (#19316) --- cmd/XDC/main.go | 1 + cmd/utils/flags.go | 6 +++- cmd/utils/flags_legacy.go | 2 +- core/blockchain.go | 26 ++++++++++++++++- core/state/state_object.go | 19 ++++++++++++ core/state/statedb.go | 47 ++++++++++++++++++++++++++---- metrics/metrics.go | 59 +++++++++++++++++++++++++++----------- 7 files changed, 135 insertions(+), 25 deletions(-) diff --git a/cmd/XDC/main.go b/cmd/XDC/main.go index 37b5dde48ab8..fb01ba2f5f77 100644 --- a/cmd/XDC/main.go +++ b/cmd/XDC/main.go @@ -163,6 +163,7 @@ var ( metricsFlags = []cli.Flag{ utils.MetricsEnabledFlag, + utils.MetricsEnabledExpensiveFlag, utils.MetricsHTTPFlag, utils.MetricsPortFlag, utils.MetricsEnableInfluxDBFlag, diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index 04ddbca93622..b42914b2a866 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -644,7 +644,11 @@ var ( Usage: "Enable metrics collection and reporting", Category: flags.MetricsCategory, } - + MetricsEnabledExpensiveFlag = &cli.BoolFlag{ + Name: "metrics-expensive", + Usage: "Enable expensive metrics collection and reporting", + Category: flags.MetricsCategory, + } // MetricsHTTPFlag defines the endpoint for a stand-alone metrics HTTP endpoint. // Since the pprof service enables sensitive/vulnerable behavior, this allows a user // to enable a public-OK metrics endpoint without having to worry about ALSO exposing diff --git a/cmd/utils/flags_legacy.go b/cmd/utils/flags_legacy.go index b47a74595613..3bd314292d52 100644 --- a/cmd/utils/flags_legacy.go +++ b/cmd/utils/flags_legacy.go @@ -50,7 +50,7 @@ var ( Usage: "Enable light client mode", Category: flags.DeprecatedCategory, } - // (Deprecated May 2020, shown in aliased flags section) + // Deprecated May 2020, shown in aliased flags section NoUSBFlag = &cli.BoolFlag{ Name: "nousb", Usage: "Disables monitoring for and managing USB hardware wallets (deprecated)", diff --git a/core/blockchain.go b/core/blockchain.go index ebd17ccbc56f..b90e461cc6ae 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -56,6 +56,16 @@ import ( ) var ( + accountReadTimer = metrics.NewRegisteredTimer("chain/account/reads", nil) + accountHashTimer = metrics.NewRegisteredTimer("chain/account/hashes", nil) + accountUpdateTimer = metrics.NewRegisteredTimer("chain/account/updates", nil) + accountCommitTimer = metrics.NewRegisteredTimer("chain/account/commits", nil) + + storageReadTimer = metrics.NewRegisteredTimer("chain/storage/reads", nil) + storageHashTimer = metrics.NewRegisteredTimer("chain/storage/hashes", nil) + storageUpdateTimer = metrics.NewRegisteredTimer("chain/storage/updates", nil) + storageCommitTimer = metrics.NewRegisteredTimer("chain/storage/commits", nil) + blockInsertTimer = metrics.NewRegisteredTimer("chain/inserts", nil) blockValidationTimer = metrics.NewRegisteredTimer("chain/validation", nil) blockExecutionTimer = metrics.NewRegisteredTimer("chain/execution", nil) @@ -1686,8 +1696,22 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, [] return i, events, coalescedLogs, err } + // Update the metrics subsystem with all the measurements + accountReadTimer.Update(statedb.AccountReads) + accountHashTimer.Update(statedb.AccountHashes) + accountUpdateTimer.Update(statedb.AccountUpdates) + accountCommitTimer.Update(statedb.AccountCommits) + + storageReadTimer.Update(statedb.StorageReads) + storageHashTimer.Update(statedb.StorageHashes) + storageUpdateTimer.Update(statedb.StorageUpdates) + storageCommitTimer.Update(statedb.StorageCommits) + + trieAccess := statedb.AccountReads + statedb.AccountHashes + statedb.AccountUpdates + statedb.AccountCommits + trieAccess += statedb.StorageReads + statedb.StorageHashes + statedb.StorageUpdates + statedb.StorageCommits + blockInsertTimer.UpdateSince(bstart) - blockExecutionTimer.Update(t1.Sub(t0)) + blockExecutionTimer.Update(t1.Sub(t0) - trieAccess) blockValidationTimer.Update(t2.Sub(t1)) blockWriteTimer.Update(t3.Sub(t2)) diff --git a/core/state/state_object.go b/core/state/state_object.go index ca695efe3b85..10fcfc8b3cb2 100644 --- a/core/state/state_object.go +++ b/core/state/state_object.go @@ -21,10 +21,12 @@ import ( "fmt" "io" "math/big" + "time" "github.com/XinFinOrg/XDPoSChain/common" "github.com/XinFinOrg/XDPoSChain/core/types" "github.com/XinFinOrg/XDPoSChain/crypto" + "github.com/XinFinOrg/XDPoSChain/metrics" "github.com/XinFinOrg/XDPoSChain/rlp" ) @@ -173,6 +175,10 @@ func (s *stateObject) GetCommittedState(db Database, key common.Hash) common.Has if s.fakeStorage != nil { return s.fakeStorage[key] } + // Track the amount of time wasted on reading the storge trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.db.StorageReads += time.Since(start) }(time.Now()) + } value := common.Hash{} // Load from DB in case it is missing. enc, err := s.getTrie(db).TryGet(key[:]) @@ -269,6 +275,10 @@ func (s *stateObject) setState(key, value common.Hash) { // updateTrie writes cached storage modifications into the object's storage trie. func (s *stateObject) updateTrie(db Database) Trie { + // Track the amount of time wasted on updating the storge trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.db.StorageUpdates += time.Since(start) }(time.Now()) + } tr := s.getTrie(db) for key, value := range s.dirtyStorage { delete(s.dirtyStorage, key) @@ -286,6 +296,11 @@ func (s *stateObject) updateTrie(db Database) Trie { // UpdateRoot sets the trie root to the current root hash of func (s *stateObject) updateRoot(db Database) { s.updateTrie(db) + + // Track the amount of time wasted on hashing the storge trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.db.StorageHashes += time.Since(start) }(time.Now()) + } s.data.Root = s.trie.Hash() } @@ -296,6 +311,10 @@ func (s *stateObject) CommitTrie(db Database) error { if s.dbErr != nil { return s.dbErr } + // Track the amount of time wasted on committing the storge trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.db.StorageCommits += time.Since(start) }(time.Now()) + } root, err := s.trie.Commit(nil) if err == nil { s.data.Root = root diff --git a/core/state/statedb.go b/core/state/statedb.go index 7782643f8a0e..0e0dc9e7e5b5 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -22,11 +22,13 @@ import ( "math/big" "sort" "sync" + "time" "github.com/XinFinOrg/XDPoSChain/common" "github.com/XinFinOrg/XDPoSChain/core/types" "github.com/XinFinOrg/XDPoSChain/crypto" "github.com/XinFinOrg/XDPoSChain/log" + "github.com/XinFinOrg/XDPoSChain/metrics" "github.com/XinFinOrg/XDPoSChain/params" "github.com/XinFinOrg/XDPoSChain/rlp" "github.com/XinFinOrg/XDPoSChain/trie" @@ -80,6 +82,16 @@ type StateDB struct { nextRevisionId int lock sync.Mutex + + // Measurements gathered during execution for debugging purposes + AccountReads time.Duration + AccountHashes time.Duration + AccountUpdates time.Duration + AccountCommits time.Duration + StorageReads time.Duration + StorageHashes time.Duration + StorageUpdates time.Duration + StorageCommits time.Duration } type AccountInfo struct { @@ -446,7 +458,13 @@ func (s *StateDB) GetTransientState(addr common.Address, key common.Hash) common // updateStateObject writes the given object to the trie. func (s *StateDB) updateStateObject(stateObject *stateObject) { + // Track the amount of time wasted on updating the account from the trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.AccountUpdates += time.Since(start) }(time.Now()) + } + // Encode the account and update the account trie addr := stateObject.Address() + data, err := rlp.EncodeToBytes(stateObject) if err != nil { panic(fmt.Errorf("can't encode object at %x: %v", addr[:], err)) @@ -456,7 +474,13 @@ func (s *StateDB) updateStateObject(stateObject *stateObject) { // deleteStateObject removes the given object from the state trie. func (s *StateDB) deleteStateObject(stateObject *stateObject) { + // Track the amount of time wasted on deleting the account from the trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.AccountUpdates += time.Since(start) }(time.Now()) + } + // Delete the account from the trie stateObject.deleted = true + addr := stateObject.Address() s.setError(s.trie.TryDelete(addr[:])) } @@ -471,15 +495,18 @@ func (s *StateDB) DeleteAddress(addr common.Address) { // Retrieve a state object given my the address. Returns nil if not found. func (s *StateDB) getStateObject(addr common.Address) (stateObject *stateObject) { - // Prefer 'live' objects. + // Prefer live objects if any is available if obj := s.stateObjects[addr]; obj != nil { if obj.deleted { return nil } return obj } - - // Load the object from the database. + // Track the amount of time wasted on loading the object from the database + if metrics.EnabledExpensive { + defer func(start time.Time) { s.AccountReads += time.Since(start) }(time.Now()) + } + // Load the object from the database enc, err := s.trie.TryGet(addr[:]) if len(enc) == 0 { s.setError(err) @@ -490,7 +517,7 @@ func (s *StateDB) getStateObject(addr common.Address) (stateObject *stateObject) log.Error("Failed to decode state object", "addr", addr, "err", err) return nil } - // Insert into the live set. + // Insert into the live set obj := newObject(s, addr, data, s.MarkStateObjectDirty) s.setStateObject(obj) return obj @@ -672,6 +699,11 @@ func (s *StateDB) Finalise(deleteEmptyObjects bool) { // goes into transaction receipts. func (s *StateDB) IntermediateRoot(deleteEmptyObjects bool) common.Hash { s.Finalise(deleteEmptyObjects) + + // Track the amount of time wasted on hashing the account trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.AccountHashes += time.Since(start) }(time.Now()) + } return s.trie.Hash() } @@ -714,7 +746,7 @@ func (s *StateDB) clearJournalAndRefund() { func (s *StateDB) Commit(deleteEmptyObjects bool) (root common.Hash, err error) { defer s.clearJournalAndRefund() - // Commit objects to the trie. + // Commit objects to the trie, measuring the elapsed time for addr, stateObject := range s.stateObjects { _, isDirty := s.stateObjectsDirty[addr] switch { @@ -737,7 +769,10 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (root common.Hash, err error) } delete(s.stateObjectsDirty, addr) } - // Write trie changes. + // Write the account trie changes, measuing the amount of wasted time + if metrics.EnabledExpensive { + defer func(start time.Time) { s.AccountCommits += time.Since(start) }(time.Now()) + } root, err = s.trie.Commit(func(leaf []byte, parent common.Hash) error { var account Account if err := rlp.DecodeBytes(leaf, &account); err != nil { diff --git a/metrics/metrics.go b/metrics/metrics.go index ebfa602d18d9..53c8e81fbed7 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -15,23 +15,41 @@ import ( ) // Enabled is checked by the constructor functions for all of the -// standard metrics. If it is true, the metric returned is a stub. +// standard metrics. If it is true, the metric returned is a stub. // // This global kill-switch helps quantify the observer effect and makes // for less cluttered pprof profiles. var Enabled = false -// MetricsEnabledFlag is the CLI flag name to use to enable metrics collections. -const MetricsEnabledFlag = "metrics" +// EnabledExpensive is a soft-flag meant for external packages to check if costly +// metrics gathering is allowed or not. The goal is to separate standard metrics +// for health monitoring and debug metrics that might impact runtime performance. +var EnabledExpensive = false + +// enablerFlags is the CLI flag names to use to enable metrics collections. +var enablerFlags = []string{"metrics"} + +// expensiveEnablerFlags is the CLI flag names to use to enable metrics collections. +var expensiveEnablerFlags = []string{"metrics-expensive"} // Init enables or disables the metrics system. Since we need this to run before // any other code gets to create meters and timers, we'll actually do an ugly hack // and peek into the command line args for the metrics flag. func init() { for _, arg := range os.Args { - if flag := strings.TrimLeft(arg, "-"); flag == MetricsEnabledFlag { - log.Info("Enabling metrics collection") - Enabled = true + flag := strings.TrimLeft(arg, "-") + + for _, enabler := range enablerFlags { + if !Enabled && flag == enabler { + log.Info("Enabling metrics collection") + Enabled = true + } + } + for _, enabler := range expensiveEnablerFlags { + if !Enabled && flag == enabler { + log.Info("Enabling expensive metrics collection") + EnabledExpensive = true + } } } } @@ -57,27 +75,36 @@ func CollectProcessMetrics(refresh time.Duration) { memPauses := GetOrRegisterMeter("system/memory/pauses", DefaultRegistry) var diskReads, diskReadBytes, diskWrites, diskWriteBytes Meter + var diskReadBytesCounter, diskWriteBytesCounter Counter if err := ReadDiskStats(diskstats[0]); err == nil { diskReads = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry) diskReadBytes = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry) + diskReadBytesCounter = GetOrRegisterCounter("system/disk/readbytes", DefaultRegistry) diskWrites = GetOrRegisterMeter("system/disk/writecount", DefaultRegistry) diskWriteBytes = GetOrRegisterMeter("system/disk/writedata", DefaultRegistry) + diskWriteBytesCounter = GetOrRegisterCounter("system/disk/writebytes", DefaultRegistry) } else { log.Debug("Failed to read disk metrics", "err", err) } // Iterate loading the different stats and updating the meters for i := 1; ; i++ { - runtime.ReadMemStats(memstats[i%2]) - memAllocs.Mark(int64(memstats[i%2].Mallocs - memstats[(i-1)%2].Mallocs)) - memFrees.Mark(int64(memstats[i%2].Frees - memstats[(i-1)%2].Frees)) - memInuse.Mark(int64(memstats[i%2].Alloc - memstats[(i-1)%2].Alloc)) - memPauses.Mark(int64(memstats[i%2].PauseTotalNs - memstats[(i-1)%2].PauseTotalNs)) + location1 := i % 2 + location2 := (i - 1) % 2 + + runtime.ReadMemStats(memstats[location1]) + memAllocs.Mark(int64(memstats[location1].Mallocs - memstats[location2].Mallocs)) + memFrees.Mark(int64(memstats[location1].Frees - memstats[location2].Frees)) + memInuse.Mark(int64(memstats[location1].Alloc - memstats[location2].Alloc)) + memPauses.Mark(int64(memstats[location1].PauseTotalNs - memstats[location2].PauseTotalNs)) + + if ReadDiskStats(diskstats[location1]) == nil { + diskReads.Mark(diskstats[location1].ReadCount - diskstats[location2].ReadCount) + diskReadBytes.Mark(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes) + diskWrites.Mark(diskstats[location1].WriteCount - diskstats[location2].WriteCount) + diskWriteBytes.Mark(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes) - if ReadDiskStats(diskstats[i%2]) == nil { - diskReads.Mark(diskstats[i%2].ReadCount - diskstats[(i-1)%2].ReadCount) - diskReadBytes.Mark(diskstats[i%2].ReadBytes - diskstats[(i-1)%2].ReadBytes) - diskWrites.Mark(diskstats[i%2].WriteCount - diskstats[(i-1)%2].WriteCount) - diskWriteBytes.Mark(diskstats[i%2].WriteBytes - diskstats[(i-1)%2].WriteBytes) + diskReadBytesCounter.Inc(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes) + diskWriteBytesCounter.Inc(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes) } time.Sleep(refresh) }