Skip to content

Commit e640267

Browse files
authored
core/state/snapshot: fix journal recovery from generating old journal (ethereum#21775)
* core/state/snapshot: print warning if failed to resolve journal * core/state/snapshot: fix snapshot recovery When we meet the snapshot journal consisted with: - disk layer generator with new-format - diff layer journal with old-format The base layer should be returned without error. The broken diff layer can be reconstructed later but we definitely don't want to reconstruct the huge diff layer. * core: add tests
1 parent 3eebf34 commit e640267

File tree

2 files changed

+81
-12
lines changed

2 files changed

+81
-12
lines changed

core/blockchain_snapshot_test.go

Lines changed: 71 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,11 @@ import (
4343
// (v) Geth restarts normally, but it's requested to be rewound to a lower point via SetHead
4444
// (vi) Geth restarts normally with a stale snapshot
4545
type snapshotTest struct {
46-
legacy bool // Flag whether the loaded snapshot is in legacy format
47-
crash bool // Flag whether the Geth restarts from the previous crash
48-
gapped int // Number of blocks to insert without enabling snapshot
49-
setHead uint64 // Block number to set head back to
46+
legacy bool // Flag whether the loaded snapshot is in legacy format
47+
crash bool // Flag whether the Geth restarts from the previous crash
48+
restartCrash int // Number of blocks to insert after the normal stop, then the crash happens
49+
gapped int // Number of blocks to insert without enabling snapshot
50+
setHead uint64 // Block number to set head back to
5051

5152
chainBlocks int // Number of blocks to generate for the canonical chain
5253
snapshotBlock uint64 // Block number of the relevant snapshot disk layer
@@ -565,10 +566,50 @@ func TestSetHeadWithLegacySnapshot(t *testing.T) {
565566
})
566567
}
567568

569+
// Tests the Geth was running with snapshot(legacy-format) enabled and upgrades
570+
// the disk layer journal(journal generator) to latest format. After that the Geth
571+
// is restarted from a crash. In this case Geth will find the new-format disk layer
572+
// journal but with legacy-format diff journal(the new-format is never committed),
573+
// and the invalid diff journal is expected to be dropped.
574+
func TestRecoverSnapshotFromCrashWithLegacyDiffJournal(t *testing.T) {
575+
// Chain:
576+
// G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD)
577+
//
578+
// Commit: G
579+
// Snapshot: G
580+
//
581+
// SetHead(0)
582+
//
583+
// ------------------------------
584+
//
585+
// Expected in leveldb:
586+
// G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10
587+
//
588+
// Expected head header : C10
589+
// Expected head fast block: C10
590+
// Expected head block : C8
591+
// Expected snapshot disk : C10
592+
testSnapshot(t, &snapshotTest{
593+
legacy: true,
594+
crash: false,
595+
restartCrash: 2,
596+
gapped: 0,
597+
setHead: 0,
598+
chainBlocks: 8,
599+
snapshotBlock: 0,
600+
commitBlock: 0,
601+
expCanonicalBlocks: 10,
602+
expHeadHeader: 10,
603+
expHeadFastBlock: 10,
604+
expHeadBlock: 8, // The persisted state in the first running
605+
expSnapshotBottom: 10, // The persisted disk layer in the second running
606+
})
607+
}
608+
568609
func testSnapshot(t *testing.T, tt *snapshotTest) {
569610
// It's hard to follow the test case, visualize the input
570-
//log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true))))
571-
//fmt.Println(tt.dump())
611+
// log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true))))
612+
// fmt.Println(tt.dump())
572613

573614
// Create a temporary persistent database
574615
datadir, err := ioutil.TempDir("", "")
@@ -694,6 +735,30 @@ func testSnapshot(t *testing.T, tt *snapshotTest) {
694735
chain.SetHead(tt.setHead)
695736
chain.Stop()
696737

738+
chain, err = NewBlockChain(db, nil, params.AllEthashProtocolChanges, engine, vm.Config{}, nil, nil)
739+
if err != nil {
740+
t.Fatalf("Failed to recreate chain: %v", err)
741+
}
742+
defer chain.Stop()
743+
} else if tt.restartCrash != 0 {
744+
// Firstly, stop the chain properly, with all snapshot journal
745+
// and state committed.
746+
chain.Stop()
747+
748+
// Restart chain, forcibly flush the disk layer journal with new format
749+
newBlocks, _ := GenerateChain(params.TestChainConfig, blocks[len(blocks)-1], engine, gendb, tt.restartCrash, func(i int, b *BlockGen) {})
750+
chain, err = NewBlockChain(db, cacheConfig, params.AllEthashProtocolChanges, engine, vm.Config{}, nil, nil)
751+
if err != nil {
752+
t.Fatalf("Failed to recreate chain: %v", err)
753+
}
754+
chain.InsertChain(newBlocks)
755+
chain.Snapshot().Cap(newBlocks[len(newBlocks)-1].Root(), 0)
756+
757+
// Simulate the blockchain crash
758+
// Don't call chain.Stop here, so that no snapshot
759+
// journal and latest state will be committed
760+
761+
// Restart the chain after the crash
697762
chain, err = NewBlockChain(db, nil, params.AllEthashProtocolChanges, engine, vm.Config{}, nil, nil)
698763
if err != nil {
699764
t.Fatalf("Failed to recreate chain: %v", err)

core/state/snapshot/journal.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,9 @@ func loadAndParseJournal(db ethdb.KeyValueStore, base *diskLayer) (snapshot, jou
103103
// Retrieve the diff layer journal. It's possible that the journal is
104104
// not existent, e.g. the disk layer is generating while that the Geth
105105
// crashes without persisting the diff journal.
106-
// So if there is no journal, or the journal is not matched with disk
107-
// layer, we just discard all diffs and try to recover them later.
106+
// So if there is no journal, or the journal is invalid(e.g. the journal
107+
// is not matched with disk layer; or the it's the legacy-format journal,
108+
// etc.), we just discard all diffs and try to recover them later.
108109
journal := rawdb.ReadSnapshotJournal(db)
109110
if len(journal) == 0 {
110111
log.Warn("Loaded snapshot journal", "diskroot", base.root, "diffs", "missing")
@@ -115,13 +116,16 @@ func loadAndParseJournal(db ethdb.KeyValueStore, base *diskLayer) (snapshot, jou
115116
// Firstly, resolve the first element as the journal version
116117
version, err := r.Uint()
117118
if err != nil {
118-
return nil, journalGenerator{}, err
119+
log.Warn("Failed to resolve the journal version", "error", err)
120+
return base, generator, nil
119121
}
120122
if version != journalVersion {
121-
return nil, journalGenerator{}, fmt.Errorf("journal version mismatch, want %d got %v", journalVersion, version)
123+
log.Warn("Discarded the snapshot journal with wrong version", "required", journalVersion, "got", version)
124+
return base, generator, nil
122125
}
123126
// Secondly, resolve the disk layer root, ensure it's continuous
124-
// with disk layer.
127+
// with disk layer. Note now we can ensure it's the snapshot journal
128+
// correct version, so we expect everything can be resolved properly.
125129
var root common.Hash
126130
if err := r.Decode(&root); err != nil {
127131
return nil, journalGenerator{}, errors.New("missing disk layer root")
@@ -159,7 +163,7 @@ func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int,
159163
var legacy bool
160164
snapshot, generator, err := loadAndParseJournal(diskdb, base)
161165
if err != nil {
162-
log.Debug("Failed to load new-format journal", "error", err)
166+
log.Warn("Failed to load new-format journal", "error", err)
163167
snapshot, generator, err = loadAndParseLegacyJournal(diskdb, base)
164168
legacy = true
165169
}

0 commit comments

Comments
 (0)