Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core/state/snapshot: detect and clean up dangling storage snapshot in generation #24811

Merged
merged 30 commits into from
May 23, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
8604adb
core/state/snapshot: check dangling storages when generating snapshot
rjl493456442 Apr 8, 2022
51f8a0e
core/state/snapshot: polish
rjl493456442 Apr 8, 2022
0ee1ebe
core/state/snapshot: wipe the last part of the dangling storages
rjl493456442 Apr 8, 2022
9b9dba1
core/state/snapshot: fix and add tests
rjl493456442 Apr 8, 2022
617eda9
core/state/snapshot: fix comment
rjl493456442 Apr 8, 2022
bbb6b94
README: remove mentions of fast sync (#24656)
nuoomnoy02 Apr 7, 2022
816f6cb
core, cmd: expose dangling storage detector for wider usage
rjl493456442 Apr 26, 2022
af23c13
core/state/snapshot: rename variable
rjl493456442 Apr 26, 2022
87d8bc3
core, ethdb: use global iterators for snapshot generation
rjl493456442 Apr 28, 2022
f4a489d
core/state/snapshot: polish
rjl493456442 May 4, 2022
5f37c25
cmd, core/state/snapshot: polish
rjl493456442 May 4, 2022
0584fc6
core/state/snapshot: polish
rjl493456442 May 4, 2022
546ce97
Update core/state/snapshot/generate.go
rjl493456442 May 4, 2022
b88d7ac
ethdb: extend db test suite and fix memorydb iterator
rjl493456442 May 5, 2022
e00ff21
ethdb/dbtest: rollback changes
rjl493456442 May 5, 2022
54caa24
ethdb/memorydb: simplify iteration
rjl493456442 May 5, 2022
7fca158
core/state/snapshot: update dangling counter
rjl493456442 May 5, 2022
e178af1
core/state/snapshot: release iterators
rjl493456442 May 7, 2022
3acad8d
core/state/snapshot: update metrics
rjl493456442 May 7, 2022
9a1ccd9
core/state/snapshot: update time metrics
rjl493456442 May 7, 2022
5df1225
metrics/influxdb: temp solution to present counter meaningfully, remo…
rjl493456442 May 7, 2022
d3fb321
add debug log, revert later
rjl493456442 May 7, 2022
83f60af
core/state/snapshot: fix iterator panic
rjl493456442 May 7, 2022
78ed542
all: customized snapshot iterator for backward iteration
rjl493456442 May 7, 2022
254666a
core, ethdb: polish
rjl493456442 May 9, 2022
1f5442d
core/state/snapshot: remove debug log
rjl493456442 May 9, 2022
55577d0
core/state/snapshot: address comments from peter
rjl493456442 May 10, 2022
61dcb92
core/state/snapshot: reopen the iterator at the next position
rjl493456442 May 10, 2022
c80a059
ethdb, core/state/snapshot: address comment from peter
rjl493456442 May 10, 2022
25b0392
core/state/snapshot: reopen exhausted iterators
rjl493456442 May 23, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
core/state/snapshot: check dangling storages when generating snapshot
  • Loading branch information
rjl493456442 committed May 9, 2022
commit 8604adb0c11c24c3e7797cc355ed6a9b46c6bf20
105 changes: 105 additions & 0 deletions core/state/snapshot/dangling.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
// Copyright 2022 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.

package snapshot

import (
"bytes"
"time"

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/ethdb"
)

// danglingRange describes the range for detecting dangling storages.
type danglingRange struct {
db ethdb.KeyValueStore // The database stores the snapshot data
start []byte // The start of the key range
limit []byte // The last of the key range

result []common.Hash // The list of account hashes which have the dangling storages
duration time.Duration // Total time spent on the iteration
}

// newDanglingRange initializes a dangling storage scanner and detects all the
// dangling accounts out.
func newDanglingRange(db ethdb.KeyValueStore, start, limit []byte) *danglingRange {
r := &danglingRange{
db: db,
start: start,
limit: limit,
}
r.result, r.duration = r.detect()
snapDanglingStoragesCounter.Inc(int64(len(r.result)))
snapDanglingStoragesTimer.Update(r.duration)
return r
}

// detect iterates the storage snapshot in the specified key range and
// returns a list of account hash of the dangling storages. Note both
// start and limit are included for iteration.
func (r *danglingRange) detect() ([]common.Hash, time.Duration) {
var (
checked []byte
result []common.Hash
start = time.Now()
)
iter := rawdb.NewKeyLengthIterator(r.db.NewIterator(rawdb.SnapshotStoragePrefix, r.start), len(rawdb.SnapshotStoragePrefix)+2*common.HashLength)
defer iter.Release()

for iter.Next() {
account := iter.Key()[len(rawdb.SnapshotStoragePrefix) : len(rawdb.SnapshotStoragePrefix)+common.HashLength]
if r.limit != nil && bytes.Compare(account, r.limit) > 0 {
break
}
// Skip unnecessary checks for checked storage.
if bytes.Equal(account, checked) {
continue
}
checked = common.CopyBytes(account)

// Check the presence of the corresponding account.
accountHash := common.BytesToHash(account)
data := rawdb.ReadAccountSnapshot(r.db, accountHash)
if len(data) != 0 {
continue
}
result = append(result, accountHash)
}
return result, time.Since(start)
}

// cleanup wipes the dangling storages which fall within the range before the given key.
func (r *danglingRange) cleanup(limit []byte) error {
var (
err error
wiped int
)
for _, accountHash := range r.result {
if bytes.Compare(accountHash.Bytes(), limit) >= 0 {
break
}
prefix := append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...)
keyLen := len(rawdb.SnapshotStoragePrefix) + 2*common.HashLength
if err = wipeKeyRange(r.db, "storage", prefix, nil, nil, keyLen, snapWipedStorageMeter, false); err != nil {
break
}
wiped += 1
}
r.result = r.result[wiped:]
return err
}
41 changes: 30 additions & 11 deletions core/state/snapshot/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ var (
snapMissallStorageMeter = metrics.NewRegisteredMeter("state/snapshot/generation/storage/missall", nil)
snapSuccessfulRangeProofMeter = metrics.NewRegisteredMeter("state/snapshot/generation/proof/success", nil)
snapFailedRangeProofMeter = metrics.NewRegisteredMeter("state/snapshot/generation/proof/failure", nil)
snapDanglingStoragesCounter = metrics.NewRegisteredCounter("state/snapshot/generation/storage/dangling/counter", nil)
snapDanglingStoragesTimer = metrics.NewRegisteredTimer("state/snapshot/generation/storage/dangling/timer", nil)

// snapAccountProveCounter measures time spent on the account proving
snapAccountProveCounter = metrics.NewRegisteredCounter("state/snapshot/generation/duration/account/prove", nil)
Expand Down Expand Up @@ -371,12 +373,16 @@ func (dl *diskLayer) proveRange(stats *generatorStats, root common.Hash, prefix

// onStateCallback is a function that is called by generateRange, when processing a range of
// accounts or storage slots. For each element, the callback is invoked.
// If 'delete' is true, then this element (and potential slots) needs to be deleted from the snapshot.
// If 'write' is true, then this element needs to be updated with the 'val'.
// If 'write' is false, then this element is already correct, and needs no update. However,
// for accounts, the storage trie of the account needs to be checked.
//
// - If 'delete' is true, then this element (and potential slots) needs to be deleted from the snapshot.
// - If 'write' is true, then this element needs to be updated with the 'val'.
// - If 'write' is false, then this element is already correct, and needs no update.
// The 'val' is the canonical encoding of the value (not the slim format for accounts)
type onStateCallback func(key []byte, val []byte, write bool, delete bool) error
//
// However, for accounts, the storage trie of the account needs to be checked. Also,
// dangling storages(storage exists but the corresponding account is missing) need to
// be cleaned up. The range between the prevKey
type onStateCallback func(key []byte, val []byte, r *danglingRange, write bool, delete bool) error

// generateRange generates the state segment with particular prefix. Generation can
// either verify the correctness of existing state through range-proof and skip
Expand Down Expand Up @@ -404,7 +410,11 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string,
// The verification is passed, process each state with the given
// callback function. If this state represents a contract, the
// corresponding storage check will be performed in the callback
if err := result.forEach(func(key []byte, val []byte) error { return onState(key, val, false, false) }); err != nil {
var r *danglingRange
if kind != "storage" {
r = newDanglingRange(dl.diskdb, origin, result.last())
}
if err := result.forEach(func(key []byte, val []byte) error { return onState(key, val, r, false, false) }); err != nil {
return false, nil, err
}
// Only abort the iteration when both database and trie are exhausted
Expand Down Expand Up @@ -466,6 +476,11 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string,
internal time.Duration
)
nodeIt.AddResolver(snapNodeCache)

var r *danglingRange
if kind != "storage" {
r = newDanglingRange(dl.diskdb, origin, result.last())
}
for iter.Next() {
if last != nil && bytes.Compare(iter.Key, last) > 0 {
trieMore = true
Expand All @@ -478,7 +493,7 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string,
if cmp := bytes.Compare(kvkeys[0], iter.Key); cmp < 0 {
// delete the key
istart := time.Now()
if err := onState(kvkeys[0], nil, false, true); err != nil {
if err := onState(kvkeys[0], nil, r, false, true); err != nil {
return false, nil, err
}
kvkeys = kvkeys[1:]
Expand All @@ -500,7 +515,7 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string,
break
}
istart := time.Now()
if err := onState(iter.Key, iter.Value, write, false); err != nil {
if err := onState(iter.Key, iter.Value, r, write, false); err != nil {
return false, nil, err
}
internal += time.Since(istart)
Expand All @@ -511,7 +526,7 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string,
// Delete all stale snapshot states remaining
istart := time.Now()
for _, key := range kvkeys {
if err := onState(key, nil, false, true); err != nil {
if err := onState(key, nil, r, false, true); err != nil {
return false, nil, err
}
deleted += 1
Expand Down Expand Up @@ -573,7 +588,7 @@ func (dl *diskLayer) checkAndFlush(current []byte, batch ethdb.Batch, stats *gen
// generateStorages generates the missing storage slots of the specific contract.
// It's supposed to restart the generation from the given origin position.
func generateStorages(dl *diskLayer, account common.Hash, storageRoot common.Hash, storeMarker []byte, batch ethdb.Batch, stats *generatorStats, logged *time.Time) error {
onStorage := func(key []byte, val []byte, write bool, delete bool) error {
onStorage := func(key []byte, val []byte, r *danglingRange, write bool, delete bool) error {
defer func(start time.Time) {
snapStorageWriteCounter.Inc(time.Since(start).Nanoseconds())
}(time.Now())
Expand Down Expand Up @@ -620,11 +635,15 @@ func generateStorages(dl *diskLayer, account common.Hash, storageRoot common.Has
// storage slots in the main trie. It's supposed to restart the generation
// from the given origin position.
func generateAccounts(dl *diskLayer, accMarker []byte, batch ethdb.Batch, stats *generatorStats, logged *time.Time) error {
onAccount := func(key []byte, val []byte, write bool, delete bool) error {
onAccount := func(key []byte, val []byte, r *danglingRange, write bool, delete bool) error {
var (
start = time.Now()
accountHash = common.BytesToHash(key)
)
// Clean up the dangling storages which have no corresponding accounts present.
if err := r.cleanup(key); err != nil {
return err
}
if delete {
rawdb.DeleteAccountSnapshot(batch, accountHash)
snapWipedAccountMeter.Mark(1)
Expand Down