diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index 86eb440ce676..9adfbf64ff9a 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -214,9 +214,9 @@ var ( Usage: `Blockchain garbage collection mode ("full", "archive")`, Value: "full", } - SnapshotFlag = cli.BoolFlag{ + SnapshotFlag = cli.BoolTFlag{ Name: "snapshot", - Usage: `Enables snapshot-database mode -- experimental work in progress feature`, + Usage: `Enables snapshot-database mode (default = enable)`, } TxLookupLimitFlag = cli.Int64Flag{ Name: "txlookuplimit", @@ -1695,7 +1695,8 @@ func SetEthConfig(ctx *cli.Context, stack *node.Node, cfg *eth.Config) { if ctx.GlobalIsSet(CacheFlag.Name) || ctx.GlobalIsSet(CacheSnapshotFlag.Name) { cfg.SnapshotCache = ctx.GlobalInt(CacheFlag.Name) * ctx.GlobalInt(CacheSnapshotFlag.Name) / 100 } - if !ctx.GlobalIsSet(SnapshotFlag.Name) { + if !ctx.GlobalBool(SnapshotFlag.Name) { + cfg.TrieCleanCache += cfg.SnapshotCache cfg.SnapshotCache = 0 // Disabled } if ctx.GlobalIsSet(DocRootFlag.Name) { @@ -1946,7 +1947,7 @@ func MakeChain(ctx *cli.Context, stack *node.Node, readOnly bool) (chain *core.B TrieTimeLimit: eth.DefaultConfig.TrieTimeout, SnapshotLimit: eth.DefaultConfig.SnapshotCache, } - if !ctx.GlobalIsSet(SnapshotFlag.Name) { + if !ctx.GlobalBool(SnapshotFlag.Name) { cache.SnapshotLimit = 0 // Disabled } if ctx.GlobalIsSet(CacheFlag.Name) || ctx.GlobalIsSet(CacheTrieFlag.Name) { diff --git a/core/blockchain.go b/core/blockchain.go index 8c3863957653..bf09157d5ce6 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -203,9 +203,10 @@ type BlockChain struct { processor Processor // Block transaction processor interface vmConfig vm.Config - badBlocks *lru.Cache // Bad block cache - shouldPreserve func(*types.Block) bool // Function used to determine whether should preserve the given block. - terminateInsert func(common.Hash, uint64) bool // Testing hook used to terminate ancient receipt chain insertion. + badBlocks *lru.Cache // Bad block cache + shouldPreserve func(*types.Block) bool // Function used to determine whether should preserve the given block. + terminateInsert func(common.Hash, uint64) bool // Testing hook used to terminate ancient receipt chain insertion. + writeLegacyJournal bool // Testing flag used to flush the snapshot journal in legacy format. } // NewBlockChain returns a fully initialised block chain using information @@ -277,9 +278,29 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par // Make sure the state associated with the block is available head := bc.CurrentBlock() if _, err := state.New(head.Root(), bc.stateCache, bc.snaps); err != nil { - log.Warn("Head state missing, repairing", "number", head.Number(), "hash", head.Hash()) - if err := bc.SetHead(head.NumberU64()); err != nil { - return nil, err + // Head state is missing, before the state recovery, find out the + // disk layer point of snapshot(if it's enabled). Make sure the + // rewound point is lower than disk layer. + var diskRoot common.Hash + if bc.cacheConfig.SnapshotLimit > 0 { + diskRoot = rawdb.ReadSnapshotRoot(bc.db) + } + if diskRoot != (common.Hash{}) { + log.Warn("Head state missing, repairing", "number", head.Number(), "hash", head.Hash(), "snaproot", diskRoot) + + snapDisk, err := bc.SetHeadBeyondRoot(head.NumberU64(), diskRoot) + if err != nil { + return nil, err + } + // Chain rewound, persist old snapshot number to indicate recovery procedure + if snapDisk != 0 { + rawdb.WriteSnapshotRecoveryNumber(bc.db, snapDisk) + } + } else { + log.Warn("Head state missing, repairing", "number", head.Number(), "hash", head.Hash()) + if err := bc.SetHead(head.NumberU64()); err != nil { + return nil, err + } } } // Ensure that a previous crash in SetHead doesn't leave extra ancients @@ -335,7 +356,18 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par } // Load any existing snapshot, regenerating it if loading failed if bc.cacheConfig.SnapshotLimit > 0 { - bc.snaps = snapshot.New(bc.db, bc.stateCache.TrieDB(), bc.cacheConfig.SnapshotLimit, bc.CurrentBlock().Root(), !bc.cacheConfig.SnapshotWait) + // If the chain was rewound past the snapshot persistent layer (causing + // a recovery block number to be persisted to disk), check if we're still + // in recovery mode and in that case, don't invalidate the snapshot on a + // head mismatch. + var recover bool + + head := bc.CurrentBlock() + if layer := rawdb.ReadSnapshotRecoveryNumber(bc.db); layer != nil && *layer > head.NumberU64() { + log.Warn("Enabling snapshot recovery", "chainhead", head.NumberU64(), "diskbase", *layer) + recover = true + } + bc.snaps = snapshot.New(bc.db, bc.stateCache.TrieDB(), bc.cacheConfig.SnapshotLimit, head.Root(), !bc.cacheConfig.SnapshotWait, recover) } // Take ownership of this particular state go bc.update() @@ -435,9 +467,25 @@ func (bc *BlockChain) loadLastState() error { // was fast synced or full synced and in which state, the method will try to // delete minimal data from disk whilst retaining chain consistency. func (bc *BlockChain) SetHead(head uint64) error { + _, err := bc.SetHeadBeyondRoot(head, common.Hash{}) + return err +} + +// SetHeadBeyondRoot rewinds the local chain to a new head with the extra condition +// that the rewind must pass the specified state root. This method is meant to be +// used when rewiding with snapshots enabled to ensure that we go back further than +// persistent disk layer. Depending on whether the node was fast synced or full, and +// in which state, the method will try to delete minimal data from disk whilst +// retaining chain consistency. +// +// The method returns the block number where the requested root cap was found. +func (bc *BlockChain) SetHeadBeyondRoot(head uint64, root common.Hash) (uint64, error) { bc.chainmu.Lock() defer bc.chainmu.Unlock() + // Track the block number of the requested root hash + var rootNumber uint64 // (no root == always 0) + // Retrieve the last pivot block to short circuit rollbacks beyond it and the // current freezer limit to start nuking id underflown pivot := rawdb.ReadLastPivotNumber(bc.db) @@ -453,8 +501,16 @@ func (bc *BlockChain) SetHead(head uint64) error { log.Error("Gap in the chain, rewinding to genesis", "number", header.Number, "hash", header.Hash()) newHeadBlock = bc.genesisBlock } else { - // Block exists, keep rewinding until we find one with state + // Block exists, keep rewinding until we find one with state, + // keeping rewinding until we exceed the optional threshold + // root hash + beyondRoot := (root == common.Hash{}) // Flag whether we're beyond the requested root (no root, always true) + for { + // If a root threshold was requested but not yet crossed, check + if root != (common.Hash{}) && !beyondRoot && newHeadBlock.Root() == root { + beyondRoot, rootNumber = true, newHeadBlock.NumberU64() + } if _, err := state.New(newHeadBlock.Root(), bc.stateCache, bc.snaps); err != nil { log.Info("Block state missing, rewinding further", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash()) if pivot == nil || newHeadBlock.NumberU64() > *pivot { @@ -465,8 +521,12 @@ func (bc *BlockChain) SetHead(head uint64) error { newHeadBlock = bc.genesisBlock } } - log.Info("Rewound to block with state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash()) - break + if beyondRoot || newHeadBlock.NumberU64() == 0 { + log.Info("Rewound to block with state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash()) + break + } + log.Info("Skipping block with threshold state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash(), "root", newHeadBlock.Root()) + newHeadBlock = bc.GetBlock(newHeadBlock.ParentHash(), newHeadBlock.NumberU64()-1) // Keep rewinding } } rawdb.WriteHeadBlockHash(db, newHeadBlock.Hash()) @@ -547,7 +607,7 @@ func (bc *BlockChain) SetHead(head uint64) error { bc.txLookupCache.Purge() bc.futureBlocks.Purge() - return bc.loadLastState() + return rootNumber, bc.loadLastState() } // FastSyncCommitHead sets the current head block to the one defined by the hash @@ -590,6 +650,11 @@ func (bc *BlockChain) Snapshot() *snapshot.Tree { return bc.snaps } +// Add this to solve conflicts due to cherry-picking +func (bc *BlockChain) Snapshots() *snapshot.Tree { + return bc.Snapshot() +} + // CurrentFastBlock retrieves the current fast-sync head block of the canonical // chain. The block is retrieved from the blockchain's internal cache. func (bc *BlockChain) CurrentFastBlock() *types.Block { @@ -899,8 +964,14 @@ func (bc *BlockChain) Stop() { var snapBase common.Hash if bc.snaps != nil { var err error - if snapBase, err = bc.snaps.Journal(bc.CurrentBlock().Root()); err != nil { - log.Error("Failed to journal state snapshot", "err", err) + if bc.writeLegacyJournal { + if snapBase, err = bc.snaps.LegacyJournal(bc.CurrentBlock().Root()); err != nil { + log.Error("Failed to journal state snapshot", "err", err) + } + } else { + if snapBase, err = bc.snaps.Journal(bc.CurrentBlock().Root()); err != nil { + log.Error("Failed to journal state snapshot", "err", err) + } } } // Ensure the state of a recent block is also stored to disk before exiting. diff --git a/core/blockchain_repair_test.go b/core/blockchain_repair_test.go index f104a93061aa..478248070344 100644 --- a/core/blockchain_repair_test.go +++ b/core/blockchain_repair_test.go @@ -25,6 +25,7 @@ import ( "io/ioutil" "os" "testing" + "time" "github.com/celo-org/celo-blockchain/common" mockEngine "github.com/celo-org/celo-blockchain/consensus/consensustest" @@ -38,7 +39,10 @@ import ( // committed to disk and then the process crashed. In this case we expect the full // chain to be rolled back to the committed block, but the chain data itself left // in the database for replaying. -func TestShortRepair(t *testing.T) { +func TestShortRepair(t *testing.T) { testShortRepair(t, false) } +func TestShortRepairWithSnapshots(t *testing.T) { testShortRepair(t, true) } + +func testShortRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // @@ -68,14 +72,17 @@ func TestShortRepair(t *testing.T) { expHeadHeader: 8, expHeadFastBlock: 8, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a short canonical chain where the fast sync pivot point was // already committed, after which the process crashed. In this case we expect the full // chain to be rolled back to the committed block, but the chain data itself left in // the database for replaying. -func TestShortFastSyncedRepair(t *testing.T) { +func TestShortFastSyncedRepair(t *testing.T) { testShortFastSyncedRepair(t, false) } +func TestShortFastSyncedRepairWithSnapshots(t *testing.T) { testShortFastSyncedRepair(t, true) } + +func testShortFastSyncedRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // @@ -105,14 +112,17 @@ func TestShortFastSyncedRepair(t *testing.T) { expHeadHeader: 8, expHeadFastBlock: 8, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a short canonical chain where the fast sync pivot point was // not yet committed, but the process crashed. In this case we expect the chain to // detect that it was fast syncing and not delete anything, since we can just pick // up directly where we left off. -func TestShortFastSyncingRepair(t *testing.T) { +func TestShortFastSyncingRepair(t *testing.T) { testShortFastSyncingRepair(t, false) } +func TestShortFastSyncingRepairWithSnapshots(t *testing.T) { testShortFastSyncingRepair(t, true) } + +func testShortFastSyncingRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // @@ -142,7 +152,7 @@ func TestShortFastSyncingRepair(t *testing.T) { expHeadHeader: 8, expHeadFastBlock: 8, expHeadBlock: 0, - }) + }, snapshots) } // Tests a recovery for a short canonical chain and a shorter side chain, where a @@ -150,7 +160,10 @@ func TestShortFastSyncingRepair(t *testing.T) { // test scenario the side chain is below the committed block. In this case we expect // the canonical chain to be rolled back to the committed block, but the chain data // itself left in the database for replaying. -func TestShortOldForkedRepair(t *testing.T) { +func TestShortOldForkedRepair(t *testing.T) { testShortOldForkedRepair(t, false) } +func TestShortOldForkedRepairWithSnapshots(t *testing.T) { testShortOldForkedRepair(t, true) } + +func testShortOldForkedRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // └->S1->S2->S3 @@ -182,7 +195,7 @@ func TestShortOldForkedRepair(t *testing.T) { expHeadHeader: 8, expHeadFastBlock: 8, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a short canonical chain and a shorter side chain, where @@ -191,6 +204,13 @@ func TestShortOldForkedRepair(t *testing.T) { // this case we expect the canonical chain to be rolled back to the committed block, // but the chain data itself left in the database for replaying. func TestShortOldForkedFastSyncedRepair(t *testing.T) { + testShortOldForkedFastSyncedRepair(t, false) +} +func TestShortOldForkedFastSyncedRepairWithSnapshots(t *testing.T) { + testShortOldForkedFastSyncedRepair(t, true) +} + +func testShortOldForkedFastSyncedRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // └->S1->S2->S3 @@ -222,7 +242,7 @@ func TestShortOldForkedFastSyncedRepair(t *testing.T) { expHeadHeader: 8, expHeadFastBlock: 8, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a short canonical chain and a shorter side chain, where @@ -231,6 +251,13 @@ func TestShortOldForkedFastSyncedRepair(t *testing.T) { // the chain to detect that it was fast syncing and not delete anything, since we // can just pick up directly where we left off. func TestShortOldForkedFastSyncingRepair(t *testing.T) { + testShortOldForkedFastSyncingRepair(t, false) +} +func TestShortOldForkedFastSyncingRepairWithSnapshots(t *testing.T) { + testShortOldForkedFastSyncingRepair(t, true) +} + +func testShortOldForkedFastSyncingRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // └->S1->S2->S3 @@ -262,7 +289,7 @@ func TestShortOldForkedFastSyncingRepair(t *testing.T) { expHeadHeader: 8, expHeadFastBlock: 8, expHeadBlock: 0, - }) + }, snapshots) } // Tests a recovery for a short canonical chain and a shorter side chain, where a @@ -270,7 +297,10 @@ func TestShortOldForkedFastSyncingRepair(t *testing.T) { // test scenario the side chain reaches above the committed block. In this case we // expect the canonical chain to be rolled back to the committed block, but the // chain data itself left in the database for replaying. -func TestShortNewlyForkedRepair(t *testing.T) { +func TestShortNewlyForkedRepair(t *testing.T) { testShortNewlyForkedRepair(t, false) } +func TestShortNewlyForkedRepairWithSnapshots(t *testing.T) { testShortNewlyForkedRepair(t, true) } + +func testShortNewlyForkedRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // └->S1->S2->S3->S4->S5->S6 @@ -302,7 +332,7 @@ func TestShortNewlyForkedRepair(t *testing.T) { expHeadHeader: 8, expHeadFastBlock: 8, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a short canonical chain and a shorter side chain, where @@ -311,6 +341,13 @@ func TestShortNewlyForkedRepair(t *testing.T) { // In this case we expect the canonical chain to be rolled back to the committed // block, but the chain data itself left in the database for replaying. func TestShortNewlyForkedFastSyncedRepair(t *testing.T) { + testShortNewlyForkedFastSyncedRepair(t, false) +} +func TestShortNewlyForkedFastSyncedRepairWithSnapshots(t *testing.T) { + testShortNewlyForkedFastSyncedRepair(t, true) +} + +func testShortNewlyForkedFastSyncedRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // └->S1->S2->S3->S4->S5->S6 @@ -342,7 +379,7 @@ func TestShortNewlyForkedFastSyncedRepair(t *testing.T) { expHeadHeader: 8, expHeadFastBlock: 8, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a short canonical chain and a shorter side chain, where @@ -351,6 +388,13 @@ func TestShortNewlyForkedFastSyncedRepair(t *testing.T) { // case we expect the chain to detect that it was fast syncing and not delete // anything, since we can just pick up directly where we left off. func TestShortNewlyForkedFastSyncingRepair(t *testing.T) { + testShortNewlyForkedFastSyncingRepair(t, false) +} +func TestShortNewlyForkedFastSyncingRepairWithSnapshots(t *testing.T) { + testShortNewlyForkedFastSyncingRepair(t, true) +} + +func testShortNewlyForkedFastSyncingRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // └->S1->S2->S3->S4->S5->S6 @@ -382,14 +426,17 @@ func TestShortNewlyForkedFastSyncingRepair(t *testing.T) { expHeadHeader: 8, expHeadFastBlock: 8, expHeadBlock: 0, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks where a recent // block - newer than the ancient limit - was already committed to disk and then // the process crashed. In this case we expect the chain to be rolled back to the // committed block, with everything afterwads kept as fast sync data. -func TestLongShallowRepair(t *testing.T) { +func TestLongShallowRepair(t *testing.T) { testLongShallowRepair(t, false) } +func TestLongShallowRepairWithSnapshots(t *testing.T) { testLongShallowRepair(t, true) } + +func testLongShallowRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // @@ -424,14 +471,17 @@ func TestLongShallowRepair(t *testing.T) { expHeadHeader: 18, expHeadFastBlock: 18, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks where a recent // block - older than the ancient limit - was already committed to disk and then // the process crashed. In this case we expect the chain to be rolled back to the // committed block, with everything afterwads deleted. -func TestLongDeepRepair(t *testing.T) { +func TestLongDeepRepair(t *testing.T) { testLongDeepRepair(t, false) } +func TestLongDeepRepairWithSnapshots(t *testing.T) { testLongDeepRepair(t, true) } + +func testLongDeepRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // @@ -465,7 +515,7 @@ func TestLongDeepRepair(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks where the fast @@ -473,6 +523,13 @@ func TestLongDeepRepair(t *testing.T) { // which the process crashed. In this case we expect the chain to be rolled back // to the committed block, with everything afterwads kept as fast sync data. func TestLongFastSyncedShallowRepair(t *testing.T) { + testLongFastSyncedShallowRepair(t, false) +} +func TestLongFastSyncedShallowRepairWithSnapshots(t *testing.T) { + testLongFastSyncedShallowRepair(t, true) +} + +func testLongFastSyncedShallowRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // @@ -507,14 +564,17 @@ func TestLongFastSyncedShallowRepair(t *testing.T) { expHeadHeader: 18, expHeadFastBlock: 18, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks where the fast // sync pivot point - older than the ancient limit - was already committed, after // which the process crashed. In this case we expect the chain to be rolled back // to the committed block, with everything afterwads deleted. -func TestLongFastSyncedDeepRepair(t *testing.T) { +func TestLongFastSyncedDeepRepair(t *testing.T) { testLongFastSyncedDeepRepair(t, false) } +func TestLongFastSyncedDeepRepairWithSnapshots(t *testing.T) { testLongFastSyncedDeepRepair(t, true) } + +func testLongFastSyncedDeepRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // @@ -548,7 +608,7 @@ func TestLongFastSyncedDeepRepair(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks where the fast @@ -557,6 +617,13 @@ func TestLongFastSyncedDeepRepair(t *testing.T) { // syncing and not delete anything, since we can just pick up directly where we // left off. func TestLongFastSyncingShallowRepair(t *testing.T) { + testLongFastSyncingShallowRepair(t, false) +} +func TestLongFastSyncingShallowRepairWithSnapshots(t *testing.T) { + testLongFastSyncingShallowRepair(t, true) +} + +func testLongFastSyncingShallowRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // @@ -591,7 +658,7 @@ func TestLongFastSyncingShallowRepair(t *testing.T) { expHeadHeader: 18, expHeadFastBlock: 18, expHeadBlock: 0, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks where the fast @@ -599,7 +666,10 @@ func TestLongFastSyncingShallowRepair(t *testing.T) { // process crashed. In this case we expect the chain to detect that it was fast // syncing and not delete anything, since we can just pick up directly where we // left off. -func TestLongFastSyncingDeepRepair(t *testing.T) { +func TestLongFastSyncingDeepRepair(t *testing.T) { testLongFastSyncingDeepRepair(t, false) } +func TestLongFastSyncingDeepRepairWithSnapshots(t *testing.T) { testLongFastSyncingDeepRepair(t, true) } + +func testLongFastSyncingDeepRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // @@ -634,7 +704,7 @@ func TestLongFastSyncingDeepRepair(t *testing.T) { expHeadHeader: 24, expHeadFastBlock: 24, expHeadBlock: 0, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -644,6 +714,13 @@ func TestLongFastSyncingDeepRepair(t *testing.T) { // rolled back to the committed block, with everything afterwads kept as fast // sync data; the side chain completely nuked by the freezer. func TestLongOldForkedShallowRepair(t *testing.T) { + testLongOldForkedShallowRepair(t, false) +} +func TestLongOldForkedShallowRepairWithSnapshots(t *testing.T) { + testLongOldForkedShallowRepair(t, true) +} + +func testLongOldForkedShallowRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3 @@ -679,7 +756,7 @@ func TestLongOldForkedShallowRepair(t *testing.T) { expHeadHeader: 18, expHeadFastBlock: 18, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -688,7 +765,10 @@ func TestLongOldForkedShallowRepair(t *testing.T) { // chain is below the committed block. In this case we expect the canonical chain // to be rolled back to the committed block, with everything afterwads deleted; // the side chain completely nuked by the freezer. -func TestLongOldForkedDeepRepair(t *testing.T) { +func TestLongOldForkedDeepRepair(t *testing.T) { testLongOldForkedDeepRepair(t, false) } +func TestLongOldForkedDeepRepairWithSnapshots(t *testing.T) { testLongOldForkedDeepRepair(t, true) } + +func testLongOldForkedDeepRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3 @@ -723,7 +803,7 @@ func TestLongOldForkedDeepRepair(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -733,6 +813,13 @@ func TestLongOldForkedDeepRepair(t *testing.T) { // to be rolled back to the committed block, with everything afterwads kept as // fast sync data; the side chain completely nuked by the freezer. func TestLongOldForkedFastSyncedShallowRepair(t *testing.T) { + testLongOldForkedFastSyncedShallowRepair(t, false) +} +func TestLongOldForkedFastSyncedShallowRepairWithSnapshots(t *testing.T) { + testLongOldForkedFastSyncedShallowRepair(t, true) +} + +func testLongOldForkedFastSyncedShallowRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3 @@ -768,7 +855,7 @@ func TestLongOldForkedFastSyncedShallowRepair(t *testing.T) { expHeadHeader: 18, expHeadFastBlock: 18, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -778,6 +865,13 @@ func TestLongOldForkedFastSyncedShallowRepair(t *testing.T) { // chain to be rolled back to the committed block, with everything afterwads deleted; // the side chain completely nuked by the freezer. func TestLongOldForkedFastSyncedDeepRepair(t *testing.T) { + testLongOldForkedFastSyncedDeepRepair(t, false) +} +func TestLongOldForkedFastSyncedDeepRepairWithSnapshots(t *testing.T) { + testLongOldForkedFastSyncedDeepRepair(t, true) +} + +func testLongOldForkedFastSyncedDeepRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3 @@ -812,7 +906,7 @@ func TestLongOldForkedFastSyncedDeepRepair(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -822,6 +916,13 @@ func TestLongOldForkedFastSyncedDeepRepair(t *testing.T) { // that it was fast syncing and not delete anything. The side chain is completely // nuked by the freezer. func TestLongOldForkedFastSyncingShallowRepair(t *testing.T) { + testLongOldForkedFastSyncingShallowRepair(t, false) +} +func TestLongOldForkedFastSyncingShallowRepairWithSnapshots(t *testing.T) { + testLongOldForkedFastSyncingShallowRepair(t, true) +} + +func testLongOldForkedFastSyncingShallowRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3 @@ -857,7 +958,7 @@ func TestLongOldForkedFastSyncingShallowRepair(t *testing.T) { expHeadHeader: 18, expHeadFastBlock: 18, expHeadBlock: 0, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -867,6 +968,13 @@ func TestLongOldForkedFastSyncingShallowRepair(t *testing.T) { // that it was fast syncing and not delete anything. The side chain is completely // nuked by the freezer. func TestLongOldForkedFastSyncingDeepRepair(t *testing.T) { + testLongOldForkedFastSyncingDeepRepair(t, false) +} +func TestLongOldForkedFastSyncingDeepRepairWithSnapshots(t *testing.T) { + testLongOldForkedFastSyncingDeepRepair(t, true) +} + +func testLongOldForkedFastSyncingDeepRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3 @@ -902,7 +1010,7 @@ func TestLongOldForkedFastSyncingDeepRepair(t *testing.T) { expHeadHeader: 24, expHeadFastBlock: 24, expHeadBlock: 0, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -912,6 +1020,13 @@ func TestLongOldForkedFastSyncingDeepRepair(t *testing.T) { // rolled back to the committed block, with everything afterwads kept as fast // sync data; the side chain completely nuked by the freezer. func TestLongNewerForkedShallowRepair(t *testing.T) { + testLongNewerForkedShallowRepair(t, false) +} +func TestLongNewerForkedShallowRepairWithSnapshots(t *testing.T) { + testLongNewerForkedShallowRepair(t, true) +} + +func testLongNewerForkedShallowRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -947,7 +1062,7 @@ func TestLongNewerForkedShallowRepair(t *testing.T) { expHeadHeader: 18, expHeadFastBlock: 18, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -956,7 +1071,10 @@ func TestLongNewerForkedShallowRepair(t *testing.T) { // chain is above the committed block. In this case we expect the canonical chain // to be rolled back to the committed block, with everything afterwads deleted; // the side chain completely nuked by the freezer. -func TestLongNewerForkedDeepRepair(t *testing.T) { +func TestLongNewerForkedDeepRepair(t *testing.T) { testLongNewerForkedDeepRepair(t, false) } +func TestLongNewerForkedDeepRepairWithSnapshots(t *testing.T) { testLongNewerForkedDeepRepair(t, true) } + +func testLongNewerForkedDeepRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -991,7 +1109,7 @@ func TestLongNewerForkedDeepRepair(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -1001,6 +1119,13 @@ func TestLongNewerForkedDeepRepair(t *testing.T) { // to be rolled back to the committed block, with everything afterwads kept as fast // sync data; the side chain completely nuked by the freezer. func TestLongNewerForkedFastSyncedShallowRepair(t *testing.T) { + testLongNewerForkedFastSyncedShallowRepair(t, false) +} +func TestLongNewerForkedFastSyncedShallowRepairWithSnapshots(t *testing.T) { + testLongNewerForkedFastSyncedShallowRepair(t, true) +} + +func testLongNewerForkedFastSyncedShallowRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1036,7 +1161,7 @@ func TestLongNewerForkedFastSyncedShallowRepair(t *testing.T) { expHeadHeader: 18, expHeadFastBlock: 18, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -1046,6 +1171,13 @@ func TestLongNewerForkedFastSyncedShallowRepair(t *testing.T) { // chain to be rolled back to the committed block, with everything afterwads deleted; // the side chain completely nuked by the freezer. func TestLongNewerForkedFastSyncedDeepRepair(t *testing.T) { + testLongNewerForkedFastSyncedDeepRepair(t, false) +} +func TestLongNewerForkedFastSyncedDeepRepairWithSnapshots(t *testing.T) { + testLongNewerForkedFastSyncedDeepRepair(t, true) +} + +func testLongNewerForkedFastSyncedDeepRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1080,7 +1212,7 @@ func TestLongNewerForkedFastSyncedDeepRepair(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -1090,6 +1222,13 @@ func TestLongNewerForkedFastSyncedDeepRepair(t *testing.T) { // that it was fast syncing and not delete anything. The side chain is completely // nuked by the freezer. func TestLongNewerForkedFastSyncingShallowRepair(t *testing.T) { + testLongNewerForkedFastSyncingShallowRepair(t, false) +} +func TestLongNewerForkedFastSyncingShallowRepairWithSnapshots(t *testing.T) { + testLongNewerForkedFastSyncingShallowRepair(t, true) +} + +func testLongNewerForkedFastSyncingShallowRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1125,7 +1264,7 @@ func TestLongNewerForkedFastSyncingShallowRepair(t *testing.T) { expHeadHeader: 18, expHeadFastBlock: 18, expHeadBlock: 0, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -1135,6 +1274,13 @@ func TestLongNewerForkedFastSyncingShallowRepair(t *testing.T) { // that it was fast syncing and not delete anything. The side chain is completely // nuked by the freezer. func TestLongNewerForkedFastSyncingDeepRepair(t *testing.T) { + testLongNewerForkedFastSyncingDeepRepair(t, false) +} +func TestLongNewerForkedFastSyncingDeepRepairWithSnapshots(t *testing.T) { + testLongNewerForkedFastSyncingDeepRepair(t, true) +} + +func testLongNewerForkedFastSyncingDeepRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1170,13 +1316,13 @@ func TestLongNewerForkedFastSyncingDeepRepair(t *testing.T) { expHeadHeader: 24, expHeadFastBlock: 24, expHeadBlock: 0, - }) + }, snapshots) } -func testRepair(t *testing.T, tt *rewindTest) { +func testRepair(t *testing.T, tt *rewindTest, snapshots bool) { // It's hard to follow the test case, visualize the input //log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) - //fmt.Println(tt.dump(true)) + // fmt.Println(tt.dump(true)) // Create a temporary persistent database datadir, err := ioutil.TempDir("", "") @@ -1195,8 +1341,18 @@ func testRepair(t *testing.T, tt *rewindTest) { var ( genesis = new(Genesis).MustCommit(db) engine = mockEngine.NewFaker() + config = &CacheConfig{ + TrieCleanLimit: 256, + TrieDirtyLimit: 256, + TrieTimeLimit: 5 * time.Minute, + SnapshotLimit: 0, // Disable snapshot by default + } ) - chain, err := NewBlockChain(db, nil, params.IstanbulTestChainConfig, engine, vm.Config{}, nil, nil) + if snapshots { + config.SnapshotLimit = 256 + config.SnapshotWait = true + } + chain, err := NewBlockChain(db, config, params.IstanbulTestChainConfig, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to create chain: %v", err) } @@ -1219,6 +1375,11 @@ func testRepair(t *testing.T, tt *rewindTest) { } if tt.commitBlock > 0 { chain.stateCache.TrieDB().Commit(canonblocks[tt.commitBlock-1].Root(), true) + if snapshots { + if err := chain.snaps.Cap(canonblocks[tt.commitBlock-1].Root(), 0); err != nil { + t.Fatalf("Failed to flatten snapshots: %v", err) + } + } } if _, err := chain.InsertChain(canonblocks[tt.commitBlock:]); err != nil { t.Fatalf("Failed to import canonical chain tail: %v", err) diff --git a/core/blockchain_sethead_test.go b/core/blockchain_sethead_test.go index e912941a3b25..911924a5d052 100644 --- a/core/blockchain_sethead_test.go +++ b/core/blockchain_sethead_test.go @@ -25,6 +25,7 @@ import ( "os" "strings" "testing" + "time" "github.com/celo-org/celo-blockchain/common" mockEngine "github.com/celo-org/celo-blockchain/consensus/consensustest" @@ -149,7 +150,10 @@ func (tt *rewindTest) Dump(crash bool) string { // chain to be rolled back to the committed block. Everything above the sethead // point should be deleted. In between the committed block and the requested head // the data can remain as "fast sync" data to avoid redownloading it. -func TestShortSetHead(t *testing.T) { +func TestShortSetHead(t *testing.T) { testShortSetHead(t, false) } +func TestShortSetHeadWithSnapshots(t *testing.T) { testShortSetHead(t, true) } + +func testShortSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // @@ -180,7 +184,7 @@ func TestShortSetHead(t *testing.T) { expHeadHeader: 7, expHeadFastBlock: 7, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a short canonical chain where the fast sync pivot point was @@ -189,7 +193,10 @@ func TestShortSetHead(t *testing.T) { // Everything above the sethead point should be deleted. In between the committed // block and the requested head the data can remain as "fast sync" data to avoid // redownloading it. -func TestShortFastSyncedSetHead(t *testing.T) { +func TestShortFastSyncedSetHead(t *testing.T) { testShortFastSyncedSetHead(t, false) } +func TestShortFastSyncedSetHeadWithSnapshots(t *testing.T) { testShortFastSyncedSetHead(t, true) } + +func testShortFastSyncedSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // @@ -220,7 +227,7 @@ func TestShortFastSyncedSetHead(t *testing.T) { expHeadHeader: 7, expHeadFastBlock: 7, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a short canonical chain where the fast sync pivot point was @@ -228,7 +235,10 @@ func TestShortFastSyncedSetHead(t *testing.T) { // detect that it was fast syncing and delete everything from the new head, since // we can just pick up fast syncing from there. The head full block should be set // to the genesis. -func TestShortFastSyncingSetHead(t *testing.T) { +func TestShortFastSyncingSetHead(t *testing.T) { testShortFastSyncingSetHead(t, false) } +func TestShortFastSyncingSetHeadWithSnapshots(t *testing.T) { testShortFastSyncingSetHead(t, true) } + +func testShortFastSyncingSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // @@ -259,7 +269,7 @@ func TestShortFastSyncingSetHead(t *testing.T) { expHeadHeader: 7, expHeadFastBlock: 7, expHeadBlock: 0, - }) + }, snapshots) } // Tests a sethead for a short canonical chain and a shorter side chain, where a @@ -269,7 +279,10 @@ func TestShortFastSyncingSetHead(t *testing.T) { // above the sethead point should be deleted. In between the committed block and // the requested head the data can remain as "fast sync" data to avoid redownloading // it. The side chain should be left alone as it was shorter. -func TestShortOldForkedSetHead(t *testing.T) { +func TestShortOldForkedSetHead(t *testing.T) { testShortOldForkedSetHead(t, false) } +func TestShortOldForkedSetHeadWithSnapshots(t *testing.T) { testShortOldForkedSetHead(t, true) } + +func testShortOldForkedSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // └->S1->S2->S3 @@ -302,7 +315,7 @@ func TestShortOldForkedSetHead(t *testing.T) { expHeadHeader: 7, expHeadFastBlock: 7, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a short canonical chain and a shorter side chain, where @@ -313,6 +326,13 @@ func TestShortOldForkedSetHead(t *testing.T) { // committed block and the requested head the data can remain as "fast sync" data // to avoid redownloading it. The side chain should be left alone as it was shorter. func TestShortOldForkedFastSyncedSetHead(t *testing.T) { + testShortOldForkedFastSyncedSetHead(t, false) +} +func TestShortOldForkedFastSyncedSetHeadWithSnapshots(t *testing.T) { + testShortOldForkedFastSyncedSetHead(t, true) +} + +func testShortOldForkedFastSyncedSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // └->S1->S2->S3 @@ -345,7 +365,7 @@ func TestShortOldForkedFastSyncedSetHead(t *testing.T) { expHeadHeader: 7, expHeadFastBlock: 7, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a short canonical chain and a shorter side chain, where @@ -355,6 +375,13 @@ func TestShortOldForkedFastSyncedSetHead(t *testing.T) { // head, since we can just pick up fast syncing from there. The head full block // should be set to the genesis. func TestShortOldForkedFastSyncingSetHead(t *testing.T) { + testShortOldForkedFastSyncingSetHead(t, false) +} +func TestShortOldForkedFastSyncingSetHeadWithSnapshots(t *testing.T) { + testShortOldForkedFastSyncingSetHead(t, true) +} + +func testShortOldForkedFastSyncingSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // └->S1->S2->S3 @@ -387,7 +414,7 @@ func TestShortOldForkedFastSyncingSetHead(t *testing.T) { expHeadHeader: 7, expHeadFastBlock: 7, expHeadBlock: 0, - }) + }, snapshots) } // Tests a sethead for a short canonical chain and a shorter side chain, where a @@ -401,7 +428,10 @@ func TestShortOldForkedFastSyncingSetHead(t *testing.T) { // The side chain could be left to be if the fork point was before the new head // we are deleting to, but it would be exceedingly hard to detect that case and // properly handle it, so we'll trade extra work in exchange for simpler code. -func TestShortNewlyForkedSetHead(t *testing.T) { +func TestShortNewlyForkedSetHead(t *testing.T) { testShortNewlyForkedSetHead(t, false) } +func TestShortNewlyForkedSetHeadWithSnapshots(t *testing.T) { testShortNewlyForkedSetHead(t, true) } + +func testShortNewlyForkedSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8 @@ -434,7 +464,7 @@ func TestShortNewlyForkedSetHead(t *testing.T) { expHeadHeader: 7, expHeadFastBlock: 7, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a short canonical chain and a shorter side chain, where @@ -448,6 +478,13 @@ func TestShortNewlyForkedSetHead(t *testing.T) { // we are deleting to, but it would be exceedingly hard to detect that case and // properly handle it, so we'll trade extra work in exchange for simpler code. func TestShortNewlyForkedFastSyncedSetHead(t *testing.T) { + testShortNewlyForkedFastSyncedSetHead(t, false) +} +func TestShortNewlyForkedFastSyncedSetHeadWithSnapshots(t *testing.T) { + testShortNewlyForkedFastSyncedSetHead(t, true) +} + +func testShortNewlyForkedFastSyncedSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8 @@ -480,7 +517,7 @@ func TestShortNewlyForkedFastSyncedSetHead(t *testing.T) { expHeadHeader: 7, expHeadFastBlock: 7, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a short canonical chain and a shorter side chain, where @@ -494,6 +531,13 @@ func TestShortNewlyForkedFastSyncedSetHead(t *testing.T) { // we are deleting to, but it would be exceedingly hard to detect that case and // properly handle it, so we'll trade extra work in exchange for simpler code. func TestShortNewlyForkedFastSyncingSetHead(t *testing.T) { + testShortNewlyForkedFastSyncingSetHead(t, false) +} +func TestShortNewlyForkedFastSyncingSetHeadWithSnapshots(t *testing.T) { + testShortNewlyForkedFastSyncingSetHead(t, true) +} + +func testShortNewlyForkedFastSyncingSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8 @@ -526,7 +570,7 @@ func TestShortNewlyForkedFastSyncingSetHead(t *testing.T) { expHeadHeader: 7, expHeadFastBlock: 7, expHeadBlock: 0, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks where a recent @@ -535,7 +579,10 @@ func TestShortNewlyForkedFastSyncingSetHead(t *testing.T) { // to the committed block. Everything above the sethead point should be deleted. // In between the committed block and the requested head the data can remain as // "fast sync" data to avoid redownloading it. -func TestLongShallowSetHead(t *testing.T) { +func TestLongShallowSetHead(t *testing.T) { testLongShallowSetHead(t, false) } +func TestLongShallowSetHeadWithSnapshots(t *testing.T) { testLongShallowSetHead(t, true) } + +func testLongShallowSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // @@ -571,7 +618,7 @@ func TestLongShallowSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks where a recent @@ -579,7 +626,10 @@ func TestLongShallowSetHead(t *testing.T) { // sethead was called. In this case we expect the full chain to be rolled back // to the committed block. Since the ancient limit was underflown, everything // needs to be deleted onwards to avoid creating a gap. -func TestLongDeepSetHead(t *testing.T) { +func TestLongDeepSetHead(t *testing.T) { testLongDeepSetHead(t, false) } +func TestLongDeepSetHeadWithSnapshots(t *testing.T) { testLongDeepSetHead(t, true) } + +func testLongDeepSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // @@ -614,7 +664,7 @@ func TestLongDeepSetHead(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks where the fast @@ -624,6 +674,13 @@ func TestLongDeepSetHead(t *testing.T) { // deleted. In between the committed block and the requested head the data can // remain as "fast sync" data to avoid redownloading it. func TestLongFastSyncedShallowSetHead(t *testing.T) { + testLongFastSyncedShallowSetHead(t, false) +} +func TestLongFastSyncedShallowSetHeadWithSnapshots(t *testing.T) { + testLongFastSyncedShallowSetHead(t, true) +} + +func testLongFastSyncedShallowSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // @@ -659,7 +716,7 @@ func TestLongFastSyncedShallowSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks where the fast @@ -667,7 +724,10 @@ func TestLongFastSyncedShallowSetHead(t *testing.T) { // which sethead was called. In this case we expect the full chain to be rolled // back to the committed block. Since the ancient limit was underflown, everything // needs to be deleted onwards to avoid creating a gap. -func TestLongFastSyncedDeepSetHead(t *testing.T) { +func TestLongFastSyncedDeepSetHead(t *testing.T) { testLongFastSyncedDeepSetHead(t, false) } +func TestLongFastSyncedDeepSetHeadWithSnapshots(t *testing.T) { testLongFastSyncedDeepSetHead(t, true) } + +func testLongFastSyncedDeepSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // @@ -702,7 +762,7 @@ func TestLongFastSyncedDeepSetHead(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks where the fast @@ -711,6 +771,13 @@ func TestLongFastSyncedDeepSetHead(t *testing.T) { // syncing and delete everything from the new head, since we can just pick up fast // syncing from there. func TestLongFastSyncingShallowSetHead(t *testing.T) { + testLongFastSyncingShallowSetHead(t, false) +} +func TestLongFastSyncingShallowSetHeadWithSnapshots(t *testing.T) { + testLongFastSyncingShallowSetHead(t, true) +} + +func testLongFastSyncingShallowSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // @@ -746,7 +813,7 @@ func TestLongFastSyncingShallowSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 0, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks where the fast @@ -755,6 +822,13 @@ func TestLongFastSyncingShallowSetHead(t *testing.T) { // syncing and delete everything from the new head, since we can just pick up fast // syncing from there. func TestLongFastSyncingDeepSetHead(t *testing.T) { + testLongFastSyncingDeepSetHead(t, false) +} +func TestLongFastSyncingDeepSetHeadWithSnapshots(t *testing.T) { + testLongFastSyncingDeepSetHead(t, true) +} + +func testLongFastSyncingDeepSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // @@ -789,7 +863,7 @@ func TestLongFastSyncingDeepSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 0, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter side @@ -800,6 +874,13 @@ func TestLongFastSyncingDeepSetHead(t *testing.T) { // can remain as "fast sync" data to avoid redownloading it. The side chain is nuked // by the freezer. func TestLongOldForkedShallowSetHead(t *testing.T) { + testLongOldForkedShallowSetHead(t, false) +} +func TestLongOldForkedShallowSetHeadWithSnapshots(t *testing.T) { + testLongOldForkedShallowSetHead(t, true) +} + +func testLongOldForkedShallowSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3 @@ -836,7 +917,7 @@ func TestLongOldForkedShallowSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter side @@ -845,7 +926,10 @@ func TestLongOldForkedShallowSetHead(t *testing.T) { // chain to be rolled back to the committed block. Since the ancient limit was // underflown, everything needs to be deleted onwards to avoid creating a gap. The // side chain is nuked by the freezer. -func TestLongOldForkedDeepSetHead(t *testing.T) { +func TestLongOldForkedDeepSetHead(t *testing.T) { testLongOldForkedDeepSetHead(t, false) } +func TestLongOldForkedDeepSetHeadWithSnapshots(t *testing.T) { testLongOldForkedDeepSetHead(t, true) } + +func testLongOldForkedDeepSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3 @@ -881,7 +965,7 @@ func TestLongOldForkedDeepSetHead(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -893,6 +977,13 @@ func TestLongOldForkedDeepSetHead(t *testing.T) { // requested head the data can remain as "fast sync" data to avoid redownloading // it. The side chain is nuked by the freezer. func TestLongOldForkedFastSyncedShallowSetHead(t *testing.T) { + testLongOldForkedFastSyncedShallowSetHead(t, false) +} +func TestLongOldForkedFastSyncedShallowSetHeadWithSnapshots(t *testing.T) { + testLongOldForkedFastSyncedShallowSetHead(t, true) +} + +func testLongOldForkedFastSyncedShallowSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3 @@ -929,7 +1020,7 @@ func TestLongOldForkedFastSyncedShallowSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -940,6 +1031,13 @@ func TestLongOldForkedFastSyncedShallowSetHead(t *testing.T) { // underflown, everything needs to be deleted onwards to avoid creating a gap. The // side chain is nuked by the freezer. func TestLongOldForkedFastSyncedDeepSetHead(t *testing.T) { + testLongOldForkedFastSyncedDeepSetHead(t, false) +} +func TestLongOldForkedFastSyncedDeepSetHeadWithSnapshots(t *testing.T) { + testLongOldForkedFastSyncedDeepSetHead(t, true) +} + +func testLongOldForkedFastSyncedDeepSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3 @@ -975,7 +1073,7 @@ func TestLongOldForkedFastSyncedDeepSetHead(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -986,6 +1084,13 @@ func TestLongOldForkedFastSyncedDeepSetHead(t *testing.T) { // just pick up fast syncing from there. The side chain is completely nuked by the // freezer. func TestLongOldForkedFastSyncingShallowSetHead(t *testing.T) { + testLongOldForkedFastSyncingShallowSetHead(t, false) +} +func TestLongOldForkedFastSyncingShallowSetHeadWithSnapshots(t *testing.T) { + testLongOldForkedFastSyncingShallowSetHead(t, true) +} + +func testLongOldForkedFastSyncingShallowSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3 @@ -1022,7 +1127,7 @@ func TestLongOldForkedFastSyncingShallowSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 0, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -1033,6 +1138,13 @@ func TestLongOldForkedFastSyncingShallowSetHead(t *testing.T) { // just pick up fast syncing from there. The side chain is completely nuked by the // freezer. func TestLongOldForkedFastSyncingDeepSetHead(t *testing.T) { + testLongOldForkedFastSyncingDeepSetHead(t, false) +} +func TestLongOldForkedFastSyncingDeepSetHeadWithSnapshots(t *testing.T) { + testLongOldForkedFastSyncingDeepSetHead(t, true) +} + +func testLongOldForkedFastSyncingDeepSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3 @@ -1068,7 +1180,7 @@ func TestLongOldForkedFastSyncingDeepSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 0, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -1077,6 +1189,13 @@ func TestLongOldForkedFastSyncingDeepSetHead(t *testing.T) { // chain is above the committed block. In this case the freezer will delete the // sidechain since it's dangling, reverting to TestLongShallowSetHead. func TestLongNewerForkedShallowSetHead(t *testing.T) { + testLongNewerForkedShallowSetHead(t, false) +} +func TestLongNewerForkedShallowSetHeadWithSnapshots(t *testing.T) { + testLongNewerForkedShallowSetHead(t, true) +} + +func testLongNewerForkedShallowSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1113,7 +1232,7 @@ func TestLongNewerForkedShallowSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -1122,6 +1241,13 @@ func TestLongNewerForkedShallowSetHead(t *testing.T) { // chain is above the committed block. In this case the freezer will delete the // sidechain since it's dangling, reverting to TestLongDeepSetHead. func TestLongNewerForkedDeepSetHead(t *testing.T) { + testLongNewerForkedDeepSetHead(t, false) +} +func TestLongNewerForkedDeepSetHeadWithSnapshots(t *testing.T) { + testLongNewerForkedDeepSetHead(t, true) +} + +func testLongNewerForkedDeepSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1157,7 +1283,7 @@ func TestLongNewerForkedDeepSetHead(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -1166,6 +1292,13 @@ func TestLongNewerForkedDeepSetHead(t *testing.T) { // the side chain is above the committed block. In this case the freezer will delete // the sidechain since it's dangling, reverting to TestLongFastSyncedShallowSetHead. func TestLongNewerForkedFastSyncedShallowSetHead(t *testing.T) { + testLongNewerForkedFastSyncedShallowSetHead(t, false) +} +func TestLongNewerForkedFastSyncedShallowSetHeadWithSnapshots(t *testing.T) { + testLongNewerForkedFastSyncedShallowSetHead(t, true) +} + +func testLongNewerForkedFastSyncedShallowSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1202,7 +1335,7 @@ func TestLongNewerForkedFastSyncedShallowSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -1211,6 +1344,13 @@ func TestLongNewerForkedFastSyncedShallowSetHead(t *testing.T) { // the side chain is above the committed block. In this case the freezer will delete // the sidechain since it's dangling, reverting to TestLongFastSyncedDeepSetHead. func TestLongNewerForkedFastSyncedDeepSetHead(t *testing.T) { + testLongNewerForkedFastSyncedDeepSetHead(t, false) +} +func TestLongNewerForkedFastSyncedDeepSetHeadWithSnapshots(t *testing.T) { + testLongNewerForkedFastSyncedDeepSetHead(t, true) +} + +func testLongNewerForkedFastSyncedDeepSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1246,7 +1386,7 @@ func TestLongNewerForkedFastSyncedDeepSetHead(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -1255,6 +1395,13 @@ func TestLongNewerForkedFastSyncedDeepSetHead(t *testing.T) { // chain is above the committed block. In this case the freezer will delete the // sidechain since it's dangling, reverting to TestLongFastSyncinghallowSetHead. func TestLongNewerForkedFastSyncingShallowSetHead(t *testing.T) { + testLongNewerForkedFastSyncingShallowSetHead(t, false) +} +func TestLongNewerForkedFastSyncingShallowSetHeadWithSnapshots(t *testing.T) { + testLongNewerForkedFastSyncingShallowSetHead(t, true) +} + +func testLongNewerForkedFastSyncingShallowSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1291,7 +1438,7 @@ func TestLongNewerForkedFastSyncingShallowSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 0, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -1300,6 +1447,13 @@ func TestLongNewerForkedFastSyncingShallowSetHead(t *testing.T) { // chain is above the committed block. In this case the freezer will delete the // sidechain since it's dangling, reverting to TestLongFastSyncingDeepSetHead. func TestLongNewerForkedFastSyncingDeepSetHead(t *testing.T) { + testLongNewerForkedFastSyncingDeepSetHead(t, false) +} +func TestLongNewerForkedFastSyncingDeepSetHeadWithSnapshots(t *testing.T) { + testLongNewerForkedFastSyncingDeepSetHead(t, true) +} + +func testLongNewerForkedFastSyncingDeepSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1335,13 +1489,13 @@ func TestLongNewerForkedFastSyncingDeepSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 0, - }) + }, snapshots) } -func testSetHead(t *testing.T, tt *rewindTest) { +func testSetHead(t *testing.T, tt *rewindTest, snapshots bool) { // It's hard to follow the test case, visualize the input - //log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) - //fmt.Println(tt.dump(false)) + // log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + // fmt.Println(tt.dump(false)) // Create a temporary persistent database datadir, err := ioutil.TempDir("", "") @@ -1360,8 +1514,18 @@ func testSetHead(t *testing.T, tt *rewindTest) { var ( genesis = new(Genesis).MustCommit(db) engine = mockEngine.NewFaker() + config = &CacheConfig{ + TrieCleanLimit: 256, + TrieDirtyLimit: 256, + TrieTimeLimit: 5 * time.Minute, + SnapshotLimit: 0, // Disable snapshot + } ) - chain, err := NewBlockChain(db, nil, params.IstanbulTestChainConfig, engine, vm.Config{}, nil, nil) + if snapshots { + config.SnapshotLimit = 256 + config.SnapshotWait = true + } + chain, err := NewBlockChain(db, config, params.IstanbulTestChainConfig, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to create chain: %v", err) } @@ -1384,6 +1548,11 @@ func testSetHead(t *testing.T, tt *rewindTest) { } if tt.commitBlock > 0 { chain.stateCache.TrieDB().Commit(canonblocks[tt.commitBlock-1].Root(), true) + if snapshots { + if err := chain.snaps.Cap(canonblocks[tt.commitBlock-1].Root(), 0); err != nil { + t.Fatalf("Failed to flatten snapshots: %v", err) + } + } } if _, err := chain.InsertChain(canonblocks[tt.commitBlock:]); err != nil { t.Fatalf("Failed to import canonical chain tail: %v", err) diff --git a/core/blockchain_snapshot_test.go b/core/blockchain_snapshot_test.go new file mode 100644 index 000000000000..93548f98141e --- /dev/null +++ b/core/blockchain_snapshot_test.go @@ -0,0 +1,1024 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +// Tests that abnormal program termination (i.e.crash) and restart can recovery +// the snapshot properly if the snapshot is enabled. + +package core + +import ( + "bytes" + "fmt" + "io/ioutil" + "os" + "strings" + "testing" + "time" + + "github.com/celo-org/celo-blockchain/consensus" + mockEngine "github.com/celo-org/celo-blockchain/consensus/consensustest" + "github.com/celo-org/celo-blockchain/core/rawdb" + "github.com/celo-org/celo-blockchain/core/state/snapshot" + "github.com/celo-org/celo-blockchain/core/types" + "github.com/celo-org/celo-blockchain/core/vm" + "github.com/celo-org/celo-blockchain/ethdb" + "github.com/celo-org/celo-blockchain/params" +) + +// snapshotTestBasic wraps the common testing fields in the snapshot tests. +type snapshotTestBasic struct { + legacy bool // Wether write the snapshot journal in legacy format + chainBlocks int // Number of blocks to generate for the canonical chain + snapshotBlock uint64 // Block number of the relevant snapshot disk layer + commitBlock uint64 // Block number for which to commit the state to disk + + expCanonicalBlocks int // Number of canonical blocks expected to remain in the database (excl. genesis) + expHeadHeader uint64 // Block number of the expected head header + expHeadFastBlock uint64 // Block number of the expected head fast sync block + expHeadBlock uint64 // Block number of the expected head full block + expSnapshotBottom uint64 // The block height corresponding to the snapshot disk layer + + // share fields, set in runtime + datadir string + db ethdb.Database + gendb ethdb.Database + engine consensus.Engine +} + +func (basic *snapshotTestBasic) prepare(t *testing.T) (*BlockChain, []*types.Block) { + // Create a temporary persistent database + datadir, err := ioutil.TempDir("", "") + if err != nil { + t.Fatalf("Failed to create temporary datadir: %v", err) + } + os.RemoveAll(datadir) + + db, err := rawdb.NewLevelDBDatabaseWithFreezer(datadir, 0, 0, datadir, "") + if err != nil { + t.Fatalf("Failed to create persistent database: %v", err) + } + // Initialize a fresh chain + var ( + genesis = new(Genesis).MustCommit(db) + engine = mockEngine.NewFaker() + gendb = rawdb.NewMemoryDatabase() + + // Snapshot is enabled, the first snapshot is created from the Genesis. + // The snapshot memory allowance is 256MB, it means no snapshot flush + // will happen during the block insertion. + cacheConfig = defaultCacheConfig + ) + chain, err := NewBlockChain(db, cacheConfig, params.IstanbulTestChainConfig, engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to create chain: %v", err) + } + blocks, _ := GenerateChain(params.TestChainConfig, genesis, engine, gendb, basic.chainBlocks, func(i int, b *BlockGen) {}) + + // Insert the blocks with configured settings. + var breakpoints []uint64 + if basic.commitBlock > basic.snapshotBlock { + breakpoints = append(breakpoints, basic.snapshotBlock, basic.commitBlock) + } else { + breakpoints = append(breakpoints, basic.commitBlock, basic.snapshotBlock) + } + var startPoint uint64 + for _, point := range breakpoints { + if _, err := chain.InsertChain(blocks[startPoint:point]); err != nil { + t.Fatalf("Failed to import canonical chain start: %v", err) + } + startPoint = point + + if basic.commitBlock > 0 && basic.commitBlock == point { + chain.stateCache.TrieDB().Commit(blocks[point-1].Root(), true) + } + if basic.snapshotBlock > 0 && basic.snapshotBlock == point { + if basic.legacy { + // Here we commit the snapshot disk root to simulate + // committing the legacy snapshot. + rawdb.WriteSnapshotRoot(db, blocks[point-1].Root()) + } else { + // Flushing the entire snap tree into the disk, the + // relavant (a) snapshot root and (b) snapshot generator + // will be persisted atomically. + chain.snaps.Cap(blocks[point-1].Root(), 0) + diskRoot, blockRoot := chain.snaps.DiskRoot(), blocks[point-1].Root() + if !bytes.Equal(diskRoot.Bytes(), blockRoot.Bytes()) { + t.Fatalf("Failed to flush disk layer change, want %x, got %x", blockRoot, diskRoot) + } + } + } + } + if _, err := chain.InsertChain(blocks[startPoint:]); err != nil { + t.Fatalf("Failed to import canonical chain tail: %v", err) + } + + // Set runtime fields + basic.datadir = datadir + basic.db = db + basic.gendb = gendb + basic.engine = engine + + // Ugly hack, notify the chain to flush the journal in legacy format + // if it's requested. + if basic.legacy { + chain.writeLegacyJournal = true + } + return chain, blocks +} + +func (basic *snapshotTestBasic) verify(t *testing.T, chain *BlockChain, blocks []*types.Block) { + // Iterate over all the remaining blocks and ensure there are no gaps + verifyNoGaps(t, chain, true, blocks) + verifyCutoff(t, chain, true, blocks, basic.expCanonicalBlocks) + + if head := chain.CurrentHeader(); head.Number.Uint64() != basic.expHeadHeader { + t.Errorf("Head header mismatch: have %d, want %d", head.Number, basic.expHeadHeader) + } + if head := chain.CurrentFastBlock(); head.NumberU64() != basic.expHeadFastBlock { + t.Errorf("Head fast block mismatch: have %d, want %d", head.NumberU64(), basic.expHeadFastBlock) + } + if head := chain.CurrentBlock(); head.NumberU64() != basic.expHeadBlock { + t.Errorf("Head block mismatch: have %d, want %d", head.NumberU64(), basic.expHeadBlock) + } + + // Check the disk layer, ensure they are matched + block := chain.GetBlockByNumber(basic.expSnapshotBottom) + if block == nil { + t.Errorf("The correspnding block[%d] of snapshot disk layer is missing", basic.expSnapshotBottom) + } else if !bytes.Equal(chain.snaps.DiskRoot().Bytes(), block.Root().Bytes()) { + t.Errorf("The snapshot disk layer root is incorrect, want %x, get %x", block.Root(), chain.snaps.DiskRoot()) + } + + // Check the snapshot, ensure it's integrated + if err := snapshot.VerifyState(chain.snaps, block.Root()); err != nil { + t.Errorf("The disk layer is not integrated %v", err) + } +} + +func (basic *snapshotTestBasic) Dump() string { + buffer := new(strings.Builder) + + fmt.Fprint(buffer, "Chain:\n G") + for i := 0; i < basic.chainBlocks; i++ { + fmt.Fprintf(buffer, "->C%d", i+1) + } + fmt.Fprint(buffer, " (HEAD)\n\n") + + fmt.Fprintf(buffer, "Commit: G") + if basic.commitBlock > 0 { + fmt.Fprintf(buffer, ", C%d", basic.commitBlock) + } + fmt.Fprint(buffer, "\n") + + fmt.Fprintf(buffer, "Snapshot: G") + if basic.snapshotBlock > 0 { + fmt.Fprintf(buffer, ", C%d", basic.snapshotBlock) + } + fmt.Fprint(buffer, "\n") + + //if crash { + // fmt.Fprintf(buffer, "\nCRASH\n\n") + //} else { + // fmt.Fprintf(buffer, "\nSetHead(%d)\n\n", basic.setHead) + //} + fmt.Fprintf(buffer, "------------------------------\n\n") + + fmt.Fprint(buffer, "Expected in leveldb:\n G") + for i := 0; i < basic.expCanonicalBlocks; i++ { + fmt.Fprintf(buffer, "->C%d", i+1) + } + fmt.Fprintf(buffer, "\n\n") + fmt.Fprintf(buffer, "Expected head header : C%d\n", basic.expHeadHeader) + fmt.Fprintf(buffer, "Expected head fast block: C%d\n", basic.expHeadFastBlock) + if basic.expHeadBlock == 0 { + fmt.Fprintf(buffer, "Expected head block : G\n") + } else { + fmt.Fprintf(buffer, "Expected head block : C%d\n", basic.expHeadBlock) + } + if basic.expSnapshotBottom == 0 { + fmt.Fprintf(buffer, "Expected snapshot disk : G\n") + } else { + fmt.Fprintf(buffer, "Expected snapshot disk : C%d\n", basic.expSnapshotBottom) + } + return buffer.String() +} + +func (basic *snapshotTestBasic) teardown() { + basic.db.Close() + basic.gendb.Close() + os.RemoveAll(basic.datadir) +} + +// snapshotTest is a test case type for normal snapshot recovery. +// It can be used for testing that restart Geth normally. +type snapshotTest struct { + snapshotTestBasic +} + +func (snaptest *snapshotTest) test(t *testing.T) { + // It's hard to follow the test case, visualize the input + // log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + // fmt.Println(tt.dump()) + chain, blocks := snaptest.prepare(t) + + // Restart the chain normally + chain.Stop() + newchain, err := NewBlockChain(snaptest.db, nil, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + defer newchain.Stop() + + snaptest.verify(t, newchain, blocks) +} + +// crashSnapshotTest is a test case type for innormal snapshot recovery. +// It can be used for testing that restart Geth after the crash. +type crashSnapshotTest struct { + snapshotTestBasic +} + +func (snaptest *crashSnapshotTest) test(t *testing.T) { + // It's hard to follow the test case, visualize the input + // log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + // fmt.Println(tt.dump()) + chain, blocks := snaptest.prepare(t) + + // Pull the plug on the database, simulating a hard crash + db := chain.db + db.Close() + + // Start a new blockchain back up and see where the repair leads us + newdb, err := rawdb.NewLevelDBDatabaseWithFreezer(snaptest.datadir, 0, 0, snaptest.datadir, "") + if err != nil { + t.Fatalf("Failed to reopen persistent database: %v", err) + } + defer newdb.Close() + + // The interesting thing is: instead of starting the blockchain after + // the crash, we do restart twice here: one after the crash and one + // after the normal stop. It's used to ensure the broken snapshot + // can be detected all the time. + newchain, err := NewBlockChain(newdb, nil, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + newchain.Stop() + + newchain, err = NewBlockChain(newdb, nil, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + defer newchain.Stop() + + snaptest.verify(t, newchain, blocks) +} + +// gappedSnapshotTest is a test type used to test this scenario: +// - have a complete snapshot +// - restart without enabling the snapshot +// - insert a few blocks +// - restart with enabling the snapshot again +type gappedSnapshotTest struct { + snapshotTestBasic + gapped int // Number of blocks to insert without enabling snapshot +} + +func (snaptest *gappedSnapshotTest) test(t *testing.T) { + // It's hard to follow the test case, visualize the input + // log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + // fmt.Println(tt.dump()) + chain, blocks := snaptest.prepare(t) + + // Insert blocks without enabling snapshot if gapping is required. + chain.Stop() + gappedBlocks, _ := GenerateChain(params.TestChainConfig, blocks[len(blocks)-1], snaptest.engine, snaptest.gendb, snaptest.gapped, func(i int, b *BlockGen) {}) + + // Insert a few more blocks without enabling snapshot + var cacheConfig = &CacheConfig{ + TrieCleanLimit: 256, + TrieDirtyLimit: 256, + TrieTimeLimit: 5 * time.Minute, + SnapshotLimit: 0, + } + newchain, err := NewBlockChain(snaptest.db, cacheConfig, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + newchain.InsertChain(gappedBlocks) + newchain.Stop() + + // Restart the chain with enabling the snapshot + newchain, err = NewBlockChain(snaptest.db, nil, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + defer newchain.Stop() + + snaptest.verify(t, newchain, blocks) +} + +// setHeadSnapshotTest is the test type used to test this scenario: +// - have a complete snapshot +// - set the head to a lower point +// - restart +type setHeadSnapshotTest struct { + snapshotTestBasic + setHead uint64 // Block number to set head back to +} + +func (snaptest *setHeadSnapshotTest) test(t *testing.T) { + // It's hard to follow the test case, visualize the input + // log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + // fmt.Println(tt.dump()) + chain, blocks := snaptest.prepare(t) + + // Rewind the chain if setHead operation is required. + chain.SetHead(snaptest.setHead) + chain.Stop() + + newchain, err := NewBlockChain(snaptest.db, nil, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + defer newchain.Stop() + + snaptest.verify(t, newchain, blocks) +} + +// restartCrashSnapshotTest is the test type used to test this scenario: +// - have a complete snapshot +// - restart chain +// - insert more blocks with enabling the snapshot +// - commit the snapshot +// - crash +// - restart again +type restartCrashSnapshotTest struct { + snapshotTestBasic + newBlocks int +} + +func (snaptest *restartCrashSnapshotTest) test(t *testing.T) { + // It's hard to follow the test case, visualize the input + // log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + // fmt.Println(tt.dump()) + chain, blocks := snaptest.prepare(t) + + // Firstly, stop the chain properly, with all snapshot journal + // and state committed. + chain.Stop() + + newchain, err := NewBlockChain(snaptest.db, nil, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + newBlocks, _ := GenerateChain(params.TestChainConfig, blocks[len(blocks)-1], snaptest.engine, snaptest.gendb, snaptest.newBlocks, func(i int, b *BlockGen) {}) + newchain.InsertChain(newBlocks) + + // Commit the entire snapshot into the disk if requested. Note only + // (a) snapshot root and (b) snapshot generator will be committed, + // the diff journal is not. + newchain.Snapshots().Cap(newBlocks[len(newBlocks)-1].Root(), 0) + + // Simulate the blockchain crash + // Don't call chain.Stop here, so that no snapshot + // journal and latest state will be committed + + // Restart the chain after the crash + newchain, err = NewBlockChain(snaptest.db, nil, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + defer newchain.Stop() + + snaptest.verify(t, newchain, blocks) +} + +// wipeCrashSnapshotTest is the test type used to test this scenario: +// - have a complete snapshot +// - restart, insert more blocks without enabling the snapshot +// - restart again with enabling the snapshot +// - crash +type wipeCrashSnapshotTest struct { + snapshotTestBasic + newBlocks int +} + +func (snaptest *wipeCrashSnapshotTest) test(t *testing.T) { + // It's hard to follow the test case, visualize the input + // log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + // fmt.Println(tt.dump()) + chain, blocks := snaptest.prepare(t) + + // Firstly, stop the chain properly, with all snapshot journal + // and state committed. + chain.Stop() + + config := &CacheConfig{ + TrieCleanLimit: 256, + TrieDirtyLimit: 256, + TrieTimeLimit: 5 * time.Minute, + SnapshotLimit: 0, + } + newchain, err := NewBlockChain(snaptest.db, config, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + newBlocks, _ := GenerateChain(params.TestChainConfig, blocks[len(blocks)-1], snaptest.engine, snaptest.gendb, snaptest.newBlocks, func(i int, b *BlockGen) {}) + newchain.InsertChain(newBlocks) + newchain.Stop() + + // Restart the chain, the wiper should starts working + config = &CacheConfig{ + TrieCleanLimit: 256, + TrieDirtyLimit: 256, + TrieTimeLimit: 5 * time.Minute, + SnapshotLimit: 256, + SnapshotWait: false, // Don't wait rebuild + } + _, err = NewBlockChain(snaptest.db, config, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + // Simulate the blockchain crash. + + newchain, err = NewBlockChain(snaptest.db, nil, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + snaptest.verify(t, newchain, blocks) +} + +// Tests a Geth restart with valid snapshot. Before the shutdown, all snapshot +// journal will be persisted correctly. In this case no snapshot recovery is +// required. +func TestRestartWithNewSnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G + // + // SetHead(0) + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8 + // + // Expected head header : C8 + // Expected head fast block: C8 + // Expected head block : C8 + // Expected snapshot disk : G + test := &snapshotTest{ + snapshotTestBasic{ + legacy: false, + chainBlocks: 8, + snapshotBlock: 0, + commitBlock: 0, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 8, + expSnapshotBottom: 0, // Initial disk layer built from genesis + }, + } + test.test(t) + test.teardown() +} + +// Tests a Geth restart with valid but "legacy" snapshot. Before the shutdown, +// all snapshot journal will be persisted correctly. In this case no snapshot +// recovery is required. +func TestRestartWithLegacySnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G + // + // SetHead(0) + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8 + // + // Expected head header : C8 + // Expected head fast block: C8 + // Expected head block : C8 + // Expected snapshot disk : G + t.Skip("Legacy format testing is not supported") + test := &snapshotTest{ + snapshotTestBasic{ + legacy: true, + chainBlocks: 8, + snapshotBlock: 0, + commitBlock: 0, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 8, + expSnapshotBottom: 0, // Initial disk layer built from genesis + }, + } + test.test(t) + test.teardown() +} + +// Tests a Geth was crashed and restarts with a broken snapshot. In this case the +// chain head should be rewound to the point with available state. And also the +// new head should must be lower than disk layer. But there is no committed point +// so the chain should be rewound to genesis and the disk layer should be left +// for recovery. +func TestNoCommitCrashWithNewSnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G, C4 + // + // CRASH + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8 + // + // Expected head header : C8 + // Expected head fast block: C8 + // Expected head block : G + // Expected snapshot disk : C4 + test := &crashSnapshotTest{ + snapshotTestBasic{ + legacy: false, + chainBlocks: 8, + snapshotBlock: 4, + commitBlock: 0, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 0, + expSnapshotBottom: 4, // Last committed disk layer, wait recovery + }, + } + test.test(t) + test.teardown() +} + +// Tests a Geth was crashed and restarts with a broken snapshot. In this case the +// chain head should be rewound to the point with available state. And also the +// new head should must be lower than disk layer. But there is only a low committed +// point so the chain should be rewound to committed point and the disk layer +// should be left for recovery. +func TestLowCommitCrashWithNewSnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G, C2 + // Snapshot: G, C4 + // + // CRASH + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8 + // + // Expected head header : C8 + // Expected head fast block: C8 + // Expected head block : C2 + // Expected snapshot disk : C4 + test := &crashSnapshotTest{ + snapshotTestBasic{ + legacy: false, + chainBlocks: 8, + snapshotBlock: 4, + commitBlock: 2, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 2, + expSnapshotBottom: 4, // Last committed disk layer, wait recovery + }, + } + test.test(t) + test.teardown() +} + +// Tests a Geth was crashed and restarts with a broken snapshot. In this case +// the chain head should be rewound to the point with available state. And also +// the new head should must be lower than disk layer. But there is only a high +// committed point so the chain should be rewound to genesis and the disk layer +// should be left for recovery. +func TestHighCommitCrashWithNewSnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G, C6 + // Snapshot: G, C4 + // + // CRASH + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8 + // + // Expected head header : C8 + // Expected head fast block: C8 + // Expected head block : G + // Expected snapshot disk : C4 + test := &crashSnapshotTest{ + snapshotTestBasic{ + legacy: false, + chainBlocks: 8, + snapshotBlock: 4, + commitBlock: 6, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 0, + expSnapshotBottom: 4, // Last committed disk layer, wait recovery + }, + } + test.test(t) + test.teardown() +} + +// Tests a Geth was crashed and restarts with a broken and "legacy format" +// snapshot. In this case the entire legacy snapshot should be discared +// and rebuild from the new chain head. The new head here refers to the +// genesis because there is no committed point. +func TestNoCommitCrashWithLegacySnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G, C4 + // + // CRASH + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8 + // + // Expected head header : C8 + // Expected head fast block: C8 + // Expected head block : G + // Expected snapshot disk : G + t.Skip("Legacy format testing is not supported") + test := &crashSnapshotTest{ + snapshotTestBasic{ + legacy: true, + chainBlocks: 8, + snapshotBlock: 4, + commitBlock: 0, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 0, + expSnapshotBottom: 0, // Rebuilt snapshot from the latest HEAD(genesis) + }, + } + test.test(t) + test.teardown() +} + +// Tests a Geth was crashed and restarts with a broken and "legacy format" +// snapshot. In this case the entire legacy snapshot should be discared +// and rebuild from the new chain head. The new head here refers to the +// block-2 because it's committed into the disk. +func TestLowCommitCrashWithLegacySnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G, C2 + // Snapshot: G, C4 + // + // CRASH + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8 + // + // Expected head header : C8 + // Expected head fast block: C8 + // Expected head block : C2 + // Expected snapshot disk : C2 + t.Skip("Legacy format testing is not supported") + test := &crashSnapshotTest{ + snapshotTestBasic{ + legacy: true, + chainBlocks: 8, + snapshotBlock: 4, + commitBlock: 2, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 2, + expSnapshotBottom: 2, // Rebuilt snapshot from the latest HEAD + }, + } + test.test(t) + test.teardown() +} + +// Tests a Geth was crashed and restarts with a broken and "legacy format" +// snapshot. In this case the entire legacy snapshot should be discared +// and rebuild from the new chain head. +// +// The new head here refers to the the genesis, the reason is: +// - the state of block-6 is committed into the disk +// - the legacy disk layer of block-4 is committed into the disk +// - the head is rewound the genesis in order to find an available +// state lower than disk layer +func TestHighCommitCrashWithLegacySnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G, C6 + // Snapshot: G, C4 + // + // CRASH + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8 + // + // Expected head header : C8 + // Expected head fast block: C8 + // Expected head block : G + // Expected snapshot disk : G + t.Skip("Legacy format testing is not supported") + test := &crashSnapshotTest{ + snapshotTestBasic{ + legacy: true, + chainBlocks: 8, + snapshotBlock: 4, + commitBlock: 6, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 0, + expSnapshotBottom: 0, // Rebuilt snapshot from the latest HEAD(genesis) + }, + } + test.test(t) + test.teardown() +} + +// Tests a Geth was running with snapshot enabled. Then restarts without +// enabling snapshot and after that re-enable the snapshot again. In this +// case the snapshot should be rebuilt with latest chain head. +func TestGappedNewSnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G + // + // SetHead(0) + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10 + // + // Expected head header : C10 + // Expected head fast block: C10 + // Expected head block : C10 + // Expected snapshot disk : C10 + test := &gappedSnapshotTest{ + snapshotTestBasic: snapshotTestBasic{ + legacy: false, + chainBlocks: 8, + snapshotBlock: 0, + commitBlock: 0, + expCanonicalBlocks: 10, + expHeadHeader: 10, + expHeadFastBlock: 10, + expHeadBlock: 10, + expSnapshotBottom: 10, // Rebuilt snapshot from the latest HEAD + }, + gapped: 2, + } + test.test(t) + test.teardown() +} + +// Tests a Geth was running with leagcy snapshot enabled. Then restarts +// without enabling snapshot and after that re-enable the snapshot again. +// In this case the snapshot should be rebuilt with latest chain head. +func TestGappedLegacySnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G + // + // SetHead(0) + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10 + // + // Expected head header : C10 + // Expected head fast block: C10 + // Expected head block : C10 + // Expected snapshot disk : C10 + t.Skip("Legacy format testing is not supported") + test := &gappedSnapshotTest{ + snapshotTestBasic: snapshotTestBasic{ + legacy: true, + chainBlocks: 8, + snapshotBlock: 0, + commitBlock: 0, + expCanonicalBlocks: 10, + expHeadHeader: 10, + expHeadFastBlock: 10, + expHeadBlock: 10, + expSnapshotBottom: 10, // Rebuilt snapshot from the latest HEAD + }, + gapped: 2, + } + test.test(t) + test.teardown() +} + +// Tests the Geth was running with snapshot enabled and resetHead is applied. +// In this case the head is rewound to the target(with state available). After +// that the chain is restarted and the original disk layer is kept. +func TestSetHeadWithNewSnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G + // + // SetHead(4) + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4 + // + // Expected head header : C4 + // Expected head fast block: C4 + // Expected head block : C4 + // Expected snapshot disk : G + test := &setHeadSnapshotTest{ + snapshotTestBasic: snapshotTestBasic{ + legacy: false, + chainBlocks: 8, + snapshotBlock: 0, + commitBlock: 0, + expCanonicalBlocks: 4, + expHeadHeader: 4, + expHeadFastBlock: 4, + expHeadBlock: 4, + expSnapshotBottom: 0, // The initial disk layer is built from the genesis + }, + setHead: 4, + } + test.test(t) + test.teardown() +} + +// Tests the Geth was running with snapshot(legacy-format) enabled and resetHead +// is applied. In this case the head is rewound to the target(with state available). +// After that the chain is restarted and the original disk layer is kept. +func TestSetHeadWithLegacySnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G + // + // SetHead(4) + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4 + // + // Expected head header : C4 + // Expected head fast block: C4 + // Expected head block : C4 + // Expected snapshot disk : G + t.Skip("Legacy format testing is not supported") + test := &setHeadSnapshotTest{ + snapshotTestBasic: snapshotTestBasic{ + legacy: true, + chainBlocks: 8, + snapshotBlock: 0, + commitBlock: 0, + expCanonicalBlocks: 4, + expHeadHeader: 4, + expHeadFastBlock: 4, + expHeadBlock: 4, + expSnapshotBottom: 0, // The initial disk layer is built from the genesis + }, + setHead: 4, + } + test.test(t) + test.teardown() +} + +// Tests the Geth was running with snapshot(legacy-format) enabled and upgrades +// the disk layer journal(journal generator) to latest format. After that the Geth +// is restarted from a crash. In this case Geth will find the new-format disk layer +// journal but with legacy-format diff journal(the new-format is never committed), +// and the invalid diff journal is expected to be dropped. +func TestRecoverSnapshotFromCrashWithLegacyDiffJournal(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G + // + // SetHead(0) + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10 + // + // Expected head header : C10 + // Expected head fast block: C10 + // Expected head block : C8 + // Expected snapshot disk : C10 + t.Skip("Legacy format testing is not supported") + test := &restartCrashSnapshotTest{ + snapshotTestBasic: snapshotTestBasic{ + legacy: true, + chainBlocks: 8, + snapshotBlock: 0, + commitBlock: 0, + expCanonicalBlocks: 10, + expHeadHeader: 10, + expHeadFastBlock: 10, + expHeadBlock: 8, // The persisted state in the first running + expSnapshotBottom: 10, // The persisted disk layer in the second running + }, + newBlocks: 2, + } + test.test(t) + test.teardown() +} + +// Tests the Geth was running with a complete snapshot and then imports a few +// more new blocks on top without enabling the snapshot. After the restart, +// crash happens. Check everything is ok after the restart. +func TestRecoverSnapshotFromWipingCrash(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G + // + // SetHead(0) + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10 + // + // Expected head header : C10 + // Expected head fast block: C10 + // Expected head block : C8 + // Expected snapshot disk : C10 + test := &wipeCrashSnapshotTest{ + snapshotTestBasic: snapshotTestBasic{ + legacy: false, + chainBlocks: 8, + snapshotBlock: 4, + commitBlock: 0, + expCanonicalBlocks: 10, + expHeadHeader: 10, + expHeadFastBlock: 10, + expHeadBlock: 10, + expSnapshotBottom: 10, + }, + newBlocks: 2, + } + test.test(t) + test.teardown() +} diff --git a/core/rawdb/accessors_snapshot.go b/core/rawdb/accessors_snapshot.go index e5aefbe17dab..eef8acc01852 100644 --- a/core/rawdb/accessors_snapshot.go +++ b/core/rawdb/accessors_snapshot.go @@ -17,6 +17,8 @@ package rawdb import ( + "encoding/binary" + "github.com/celo-org/celo-blockchain/common" "github.com/celo-org/celo-blockchain/ethdb" "github.com/celo-org/celo-blockchain/log" @@ -118,3 +120,58 @@ func DeleteSnapshotJournal(db ethdb.KeyValueWriter) { log.Crit("Failed to remove snapshot journal", "err", err) } } + +// ReadSnapshotGenerator retrieves the serialized snapshot generator saved at +// the last shutdown. +func ReadSnapshotGenerator(db ethdb.KeyValueReader) []byte { + data, _ := db.Get(snapshotGeneratorKey) + return data +} + +// WriteSnapshotGenerator stores the serialized snapshot generator to save at +// shutdown. +func WriteSnapshotGenerator(db ethdb.KeyValueWriter, generator []byte) { + if err := db.Put(snapshotGeneratorKey, generator); err != nil { + log.Crit("Failed to store snapshot generator", "err", err) + } +} + +// DeleteSnapshotGenerator deletes the serialized snapshot generator saved at +// the last shutdown +func DeleteSnapshotGenerator(db ethdb.KeyValueWriter) { + if err := db.Delete(snapshotGeneratorKey); err != nil { + log.Crit("Failed to remove snapshot generator", "err", err) + } +} + +// ReadSnapshotRecoveryNumber retrieves the block number of the last persisted +// snapshot layer. +func ReadSnapshotRecoveryNumber(db ethdb.KeyValueReader) *uint64 { + data, _ := db.Get(snapshotRecoveryKey) + if len(data) == 0 { + return nil + } + if len(data) != 8 { + return nil + } + number := binary.BigEndian.Uint64(data) + return &number +} + +// WriteSnapshotRecoveryNumber stores the block number of the last persisted +// snapshot layer. +func WriteSnapshotRecoveryNumber(db ethdb.KeyValueWriter, number uint64) { + var buf [8]byte + binary.BigEndian.PutUint64(buf[:], number) + if err := db.Put(snapshotRecoveryKey, buf[:]); err != nil { + log.Crit("Failed to store snapshot recovery number", "err", err) + } +} + +// DeleteSnapshotRecoveryNumber deletes the block number of the last persisted +// snapshot layer. +func DeleteSnapshotRecoveryNumber(db ethdb.KeyValueWriter) { + if err := db.Delete(snapshotRecoveryKey); err != nil { + log.Crit("Failed to remove snapshot recovery number", "err", err) + } +} diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go index 628e428b5b96..d827220b79ec 100644 --- a/core/rawdb/schema.go +++ b/core/rawdb/schema.go @@ -50,6 +50,12 @@ var ( // snapshotJournalKey tracks the in-memory diff layers across restarts. snapshotJournalKey = []byte("SnapshotJournal") + // snapshotGeneratorKey tracks the snapshot generation marker across restarts. + snapshotGeneratorKey = []byte("SnapshotGenerator") + + // snapshotRecoveryKey tracks the snapshot recovery marker across restarts. + snapshotRecoveryKey = []byte("SnapshotRecovery") + // txIndexTailKey tracks the oldest block whose transactions have been indexed. txIndexTailKey = []byte("TransactionIndexTail") diff --git a/core/state/snapshot/conversion.go b/core/state/snapshot/conversion.go index dd1741ce6e23..b464c28dca8e 100644 --- a/core/state/snapshot/conversion.go +++ b/core/state/snapshot/conversion.go @@ -240,7 +240,7 @@ func generateTrieRoot(it Iterator, account common.Hash, generatorFn trieGenerato } in <- leaf - // Accumulate the generaation statistic if it's required. + // Accumulate the generation statistic if it's required. processed++ if time.Since(logged) > 3*time.Second && stats != nil { if account == (common.Hash{}) { diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go index 900b34e9d7d6..aed58f17a281 100644 --- a/core/state/snapshot/difflayer.go +++ b/core/state/snapshot/difflayer.go @@ -44,7 +44,7 @@ var ( // aggregatorItemLimit is an approximate number of items that will end up // in the agregator layer before it's flushed out to disk. A plain account // weighs around 14B (+hash), a storage slot 32B (+hash), a deleted slot - // 0B (+hash). Slots are mostly set/unset in lockstep, so thet average at + // 0B (+hash). Slots are mostly set/unset in lockstep, so that average at // 16B (+hash). All in all, the average entry seems to be 15+32=47B. Use a // smaller number to be on the safe side. aggregatorItemLimit = aggregatorMemoryLimit / 42 @@ -114,9 +114,9 @@ type diffLayer struct { // deleted, all data in other set belongs to the "new" A. destructSet map[common.Hash]struct{} // Keyed markers for deleted (and potentially) recreated accounts accountList []common.Hash // List of account for iteration. If it exists, it's sorted, otherwise it's nil - accountData map[common.Hash][]byte // Keyed accounts for direct retrival (nil means deleted) + accountData map[common.Hash][]byte // Keyed accounts for direct retrieval (nil means deleted) storageList map[common.Hash][]common.Hash // List of storage slots for iterated retrievals, one per account. Any existing lists are sorted if non-nil - storageData map[common.Hash]map[common.Hash][]byte // Keyed storage slots for direct retrival. one per account (nil means deleted) + storageData map[common.Hash]map[common.Hash][]byte // Keyed storage slots for direct retrieval. one per account (nil means deleted) diffed *bloomfilter.Filter // Bloom filter tracking all the diffed items up to the disk layer @@ -191,19 +191,15 @@ func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]s if blob == nil { panic(fmt.Sprintf("account %#x nil", accountHash)) } + // Determine memory size and track the dirty writes + dl.memory += uint64(common.HashLength + len(blob)) + snapshotDirtyAccountWriteMeter.Mark(int64(len(blob))) } for accountHash, slots := range storage { if slots == nil { panic(fmt.Sprintf("storage %#x nil", accountHash)) } - } - // Determine memory size and track the dirty writes - for _, data := range accounts { - dl.memory += uint64(common.HashLength + len(data)) - snapshotDirtyAccountWriteMeter.Mark(int64(len(data))) - } - // Determine memory size and track the dirty writes - for _, slots := range storage { + // Determine memory size and track the dirty writes for _, data := range slots { dl.memory += uint64(common.HashLength + len(data)) snapshotDirtyStorageWriteMeter.Mark(int64(len(data))) @@ -300,13 +296,17 @@ func (dl *diffLayer) AccountRLP(hash common.Hash) ([]byte, error) { if !hit { hit = dl.diffed.Contains(destructBloomHasher(hash)) } + var origin *diskLayer + if !hit { + origin = dl.origin // extract origin while holding the lock + } dl.lock.RUnlock() // If the bloom filter misses, don't even bother with traversing the memory // diff layers, reach straight into the bottom persistent disk layer - if !hit { + if origin != nil { snapshotBloomAccountMissMeter.Mark(1) - return dl.origin.AccountRLP(hash) + return origin.AccountRLP(hash) } // The bloom filter hit, start poking in the internal maps return dl.accountRLP(hash, 0) @@ -362,13 +362,17 @@ func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) ([]byte, erro if !hit { hit = dl.diffed.Contains(destructBloomHasher(accountHash)) } + var origin *diskLayer + if !hit { + origin = dl.origin // extract origin while holding the lock + } dl.lock.RUnlock() // If the bloom filter misses, don't even bother with traversing the memory // diff layers, reach straight into the bottom persistent disk layer - if !hit { + if origin != nil { snapshotBloomStorageMissMeter.Mark(1) - return dl.origin.Storage(accountHash, storageHash) + return origin.Storage(accountHash, storageHash) } // The bloom filter hit, start poking in the internal maps return dl.storage(accountHash, storageHash, 0) @@ -482,7 +486,7 @@ func (dl *diffLayer) flatten() snapshot { } } -// AccountList returns a sorted list of all accounts in this difflayer, including +// AccountList returns a sorted list of all accounts in this diffLayer, including // the deleted ones. // // Note, the returned slice is not a copy, so do not modify it. @@ -513,7 +517,7 @@ func (dl *diffLayer) AccountList() []common.Hash { return dl.accountList } -// StorageList returns a sorted list of all storage slot hashes in this difflayer +// StorageList returns a sorted list of all storage slot hashes in this diffLayer // for the given account. If the whole storage is destructed in this layer, then // an additional flag *destructed = true* will be returned, otherwise the flag is // false. Besides, the returned list will include the hash of deleted storage slot. diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go index 2c4282f2a730..cc48c84747cc 100644 --- a/core/state/snapshot/difflayer_test.go +++ b/core/state/snapshot/difflayer_test.go @@ -314,7 +314,7 @@ func BenchmarkSearchSlot(b *testing.B) { // With accountList and sorting // BenchmarkFlatten-6 50 29890856 ns/op // -// Without sorting and tracking accountlist +// Without sorting and tracking accountList // BenchmarkFlatten-6 300 5511511 ns/op func BenchmarkFlatten(b *testing.B) { fill := func(parent snapshot) *diffLayer { diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go index d6195d6e6f6f..833cabee1c03 100644 --- a/core/state/snapshot/disklayer.go +++ b/core/state/snapshot/disklayer.go @@ -31,7 +31,7 @@ import ( // diskLayer is a low level persistent snapshot built on top of a key-value store. type diskLayer struct { diskdb ethdb.KeyValueStore // Key-value store containing the base snapshot - triedb *trie.Database // Trie node cache for reconstuction purposes + triedb *trie.Database // Trie node cache for reconstruction purposes cache *fastcache.Cache // Cache to avoid hitting the disk for direct access root common.Hash // Root hash of the base snapshot diff --git a/core/state/snapshot/disklayer_test.go b/core/state/snapshot/disklayer_test.go index 79d015b8a544..364bdcae4a9a 100644 --- a/core/state/snapshot/disklayer_test.go +++ b/core/state/snapshot/disklayer_test.go @@ -28,6 +28,7 @@ import ( "github.com/celo-org/celo-blockchain/ethdb" "github.com/celo-org/celo-blockchain/ethdb/leveldb" "github.com/celo-org/celo-blockchain/ethdb/memorydb" + "github.com/celo-org/celo-blockchain/rlp" ) // reverse reverses the contents of a byte slice. It's used to update random accs @@ -429,6 +430,81 @@ func TestDiskPartialMerge(t *testing.T) { } } +// Tests that when the bottom-most diff layer is merged into the disk +// layer whether the corresponding generator is persisted correctly. +func TestDiskGeneratorPersistence(t *testing.T) { + var ( + accOne = randomHash() + accTwo = randomHash() + accOneSlotOne = randomHash() + accOneSlotTwo = randomHash() + + accThree = randomHash() + accThreeSlot = randomHash() + baseRoot = randomHash() + diffRoot = randomHash() + diffTwoRoot = randomHash() + genMarker = append(randomHash().Bytes(), randomHash().Bytes()...) + ) + // Testing scenario 1, the disk layer is still under the construction. + db := rawdb.NewMemoryDatabase() + + rawdb.WriteAccountSnapshot(db, accOne, accOne[:]) + rawdb.WriteStorageSnapshot(db, accOne, accOneSlotOne, accOneSlotOne[:]) + rawdb.WriteStorageSnapshot(db, accOne, accOneSlotTwo, accOneSlotTwo[:]) + rawdb.WriteSnapshotRoot(db, baseRoot) + + // Create a disk layer based on all above updates + snaps := &Tree{ + layers: map[common.Hash]snapshot{ + baseRoot: &diskLayer{ + diskdb: db, + cache: fastcache.New(500 * 1024), + root: baseRoot, + genMarker: genMarker, + }, + }, + } + // Modify or delete some accounts, flatten everything onto disk + if err := snaps.Update(diffRoot, baseRoot, nil, map[common.Hash][]byte{ + accTwo: accTwo[:], + }, nil); err != nil { + t.Fatalf("failed to update snapshot tree: %v", err) + } + if err := snaps.Cap(diffRoot, 0); err != nil { + t.Fatalf("failed to flatten snapshot tree: %v", err) + } + blob := rawdb.ReadSnapshotGenerator(db) + var generator journalGenerator + if err := rlp.DecodeBytes(blob, &generator); err != nil { + t.Fatalf("Failed to decode snapshot generator %v", err) + } + if !bytes.Equal(generator.Marker, genMarker) { + t.Fatalf("Generator marker is not matched") + } + // Test scenario 2, the disk layer is fully generated + // Modify or delete some accounts, flatten everything onto disk + if err := snaps.Update(diffTwoRoot, diffRoot, nil, map[common.Hash][]byte{ + accThree: accThree.Bytes(), + }, map[common.Hash]map[common.Hash][]byte{ + accThree: {accThreeSlot: accThreeSlot.Bytes()}, + }); err != nil { + t.Fatalf("failed to update snapshot tree: %v", err) + } + diskLayer := snaps.layers[snaps.diskRoot()].(*diskLayer) + diskLayer.genMarker = nil // Construction finished + if err := snaps.Cap(diffTwoRoot, 0); err != nil { + t.Fatalf("failed to flatten snapshot tree: %v", err) + } + blob = rawdb.ReadSnapshotGenerator(db) + if err := rlp.DecodeBytes(blob, &generator); err != nil { + t.Fatalf("Failed to decode snapshot generator %v", err) + } + if len(generator.Marker) != 0 { + t.Fatalf("Failed to update snapshot generator") + } +} + // Tests that merging something into a disk layer persists it into the database // and invalidates any previously written and cached values, discarding anything // after the in-progress generation marker. diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 6f3e198a1dfd..1f0ed4ec192b 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -19,6 +19,7 @@ package snapshot import ( "bytes" "encoding/binary" + "fmt" "math/big" "time" @@ -54,9 +55,11 @@ type generatorStats struct { // Log creates an contextual log with the given message and the context pulled // from the internally maintained statistics. -func (gs *generatorStats) Log(msg string, marker []byte) { +func (gs *generatorStats) Log(msg string, root common.Hash, marker []byte) { var ctx []interface{} - + if root != (common.Hash{}) { + ctx = append(ctx, []interface{}{"root", root}...) + } // Figure out whether we're after or within an account switch len(marker) { case common.HashLength: @@ -98,36 +101,79 @@ func generateSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache i wiper = wipeSnapshot(diskdb, true) } // Create a new disk layer with an initialized state marker at zero - rawdb.WriteSnapshotRoot(diskdb, root) - + var ( + stats = &generatorStats{wiping: wiper, start: time.Now()} + batch = diskdb.NewBatch() + genMarker = []byte{} // Initialized but empty! + ) + rawdb.WriteSnapshotRoot(batch, root) + journalProgress(batch, genMarker, stats) + if err := batch.Write(); err != nil { + log.Crit("Failed to write initialized state marker", "error", err) + } base := &diskLayer{ diskdb: diskdb, triedb: triedb, root: root, cache: fastcache.New(cache * 1024 * 1024), - genMarker: []byte{}, // Initialized but empty! + genMarker: genMarker, genPending: make(chan struct{}), genAbort: make(chan chan *generatorStats), } - go base.generate(&generatorStats{wiping: wiper, start: time.Now()}) + go base.generate(stats) + log.Debug("Start snapshot generation", "root", root) return base } +// journalProgress persists the generator stats into the database to resume later. +func journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorStats) { + // Write out the generator marker. Note it's a standalone disk layer generator + // which is not mixed with journal. It's ok if the generator is persisted while + // journal is not. + entry := journalGenerator{ + Done: marker == nil, + Marker: marker, + } + if stats != nil { + entry.Wiping = (stats.wiping != nil) + entry.Accounts = stats.accounts + entry.Slots = stats.slots + entry.Storage = uint64(stats.storage) + } + blob, err := rlp.EncodeToBytes(entry) + if err != nil { + panic(err) // Cannot happen, here to catch dev errors + } + var logstr string + switch { + case marker == nil: + logstr = "done" + case bytes.Equal(marker, []byte{}): + logstr = "empty" + case len(marker) == common.HashLength: + logstr = fmt.Sprintf("%#x", marker) + default: + logstr = fmt.Sprintf("%#x:%#x", marker[:common.HashLength], marker[common.HashLength:]) + } + log.Debug("Journalled generator progress", "progress", logstr) + rawdb.WriteSnapshotGenerator(db, blob) +} + // generate is a background thread that iterates over the state and storage tries, // constructing the state snapshot. All the arguments are purely for statistics -// gethering and logging, since the method surfs the blocks as they arrive, often +// gathering and logging, since the method surfs the blocks as they arrive, often // being restarted. func (dl *diskLayer) generate(stats *generatorStats) { // If a database wipe is in operation, wait until it's done if stats.wiping != nil { - stats.Log("Wiper running, state snapshotting paused", dl.genMarker) + stats.Log("Wiper running, state snapshotting paused", common.Hash{}, dl.genMarker) select { // If wiper is done, resume normal mode of operation case <-stats.wiping: stats.wiping = nil stats.start = time.Now() - // If generator was aboted during wipe, return + // If generator was aborted during wipe, return case abort := <-dl.genAbort: abort <- stats return @@ -137,13 +183,13 @@ func (dl *diskLayer) generate(stats *generatorStats) { accTrie, err := trie.NewSecure(dl.root, dl.triedb) if err != nil { // The account trie is missing (GC), surf the chain until one becomes available - stats.Log("Trie missing, state snapshotting paused", dl.genMarker) + stats.Log("Trie missing, state snapshotting paused", dl.root, dl.genMarker) abort := <-dl.genAbort abort <- stats return } - stats.Log("Resuming state snapshot generation", dl.genMarker) + stats.Log("Resuming state snapshot generation", dl.root, dl.genMarker) var accMarker []byte if len(dl.genMarker) > 0 { // []byte{} is the start, use nil for that @@ -184,15 +230,19 @@ func (dl *diskLayer) generate(stats *generatorStats) { if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil { // Only write and set the marker if we actually did something useful if batch.ValueSize() > 0 { + // Ensure the generator entry is in sync with the data + marker := accountHash[:] + journalProgress(batch, marker, stats) + batch.Write() batch.Reset() dl.lock.Lock() - dl.genMarker = accountHash[:] + dl.genMarker = marker dl.lock.Unlock() } if abort != nil { - stats.Log("Aborting state snapshot generation", accountHash[:]) + stats.Log("Aborting state snapshot generation", dl.root, accountHash[:]) abort <- stats return } @@ -201,7 +251,10 @@ func (dl *diskLayer) generate(stats *generatorStats) { if acc.Root != emptyRoot { storeTrie, err := trie.NewSecure(acc.Root, dl.triedb) if err != nil { - log.Crit("Storage trie inaccessible for snapshot generation", "err", err) + log.Error("Generator failed to access storage trie", "accroot", dl.root, "acchash", common.BytesToHash(accIt.Key), "stroot", acc.Root, "err", err) + abort := <-dl.genAbort + abort <- stats + return } var storeMarker []byte if accMarker != nil && bytes.Equal(accountHash[:], accMarker) && len(dl.genMarker) > common.HashLength { @@ -222,32 +275,54 @@ func (dl *diskLayer) generate(stats *generatorStats) { if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil { // Only write and set the marker if we actually did something useful if batch.ValueSize() > 0 { + // Ensure the generator entry is in sync with the data + marker := append(accountHash[:], storeIt.Key...) + journalProgress(batch, marker, stats) + batch.Write() batch.Reset() dl.lock.Lock() - dl.genMarker = append(accountHash[:], storeIt.Key...) + dl.genMarker = marker dl.lock.Unlock() } if abort != nil { - stats.Log("Aborting state snapshot generation", append(accountHash[:], storeIt.Key...)) + stats.Log("Aborting state snapshot generation", dl.root, append(accountHash[:], storeIt.Key...)) abort <- stats return } + if time.Since(logged) > 8*time.Second { + stats.Log("Generating state snapshot", dl.root, append(accountHash[:], storeIt.Key...)) + logged = time.Now() + } } } + if err := storeIt.Err; err != nil { + log.Error("Generator failed to iterate storage trie", "accroot", dl.root, "acchash", common.BytesToHash(accIt.Key), "stroot", acc.Root, "err", err) + abort := <-dl.genAbort + abort <- stats + return + } } if time.Since(logged) > 8*time.Second { - stats.Log("Generating state snapshot", accIt.Key) + stats.Log("Generating state snapshot", dl.root, accIt.Key) logged = time.Now() } // Some account processed, unmark the marker accMarker = nil } - // Snapshot fully generated, set the marker to nil - if batch.ValueSize() > 0 { - batch.Write() + if err := accIt.Err; err != nil { + log.Error("Generator failed to iterate account trie", "root", dl.root, "err", err) + abort := <-dl.genAbort + abort <- stats + return } + // Snapshot fully generated, set the marker to nil. + // Note even there is nothing to commit, persist the + // generator anyway to mark the snapshot is complete. + journalProgress(batch, nil, stats) + batch.Write() + log.Info("Generated state snapshot", "accounts", stats.accounts, "slots", stats.slots, "storage", stats.storage, "elapsed", common.PrettyDuration(time.Since(stats.start))) diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go new file mode 100644 index 000000000000..648325f00798 --- /dev/null +++ b/core/state/snapshot/generate_test.go @@ -0,0 +1,190 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "math/big" + "testing" + "time" + + "github.com/celo-org/celo-blockchain/common" + "github.com/celo-org/celo-blockchain/ethdb/memorydb" + "github.com/celo-org/celo-blockchain/rlp" + "github.com/celo-org/celo-blockchain/trie" +) + +// Tests that snapshot generation errors out correctly in case of a missing trie +// node in the account trie. +func TestGenerateCorruptAccountTrie(t *testing.T) { + // We can't use statedb to make a test trie (circular dependency), so make + // a fake one manually. We're going with a small account trie of 3 accounts, + // without any storage slots to keep the test smaller. + var ( + diskdb = memorydb.New() + triedb = trie.NewDatabase(diskdb) + ) + tr, _ := trie.NewSecure(common.Hash{}, triedb) + acc := &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + tr.Update([]byte("acc-1"), val) // 0xc7a30f39aff471c95d8a837497ad0e49b65be475cc0953540f80cfcdbdcd9074 + + acc = &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} + val, _ = rlp.EncodeToBytes(acc) + tr.Update([]byte("acc-2"), val) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 + + acc = &Account{Balance: big.NewInt(3), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} + val, _ = rlp.EncodeToBytes(acc) + tr.Update([]byte("acc-3"), val) // 0x19ead688e907b0fab07176120dceec244a72aff2f0aa51e8b827584e378772f4 + tr.Commit(nil) // Root: 0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978 + + // Delete an account trie leaf and ensure the generator chokes + triedb.Commit(common.HexToHash("0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978"), false) + diskdb.Delete(common.HexToHash("0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7").Bytes()) + + snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978"), nil) + select { + case <-snap.genPending: + // Snapshot generation succeeded + t.Errorf("Snapshot generated against corrupt account trie") + + case <-time.After(250 * time.Millisecond): + // Not generated fast enough, hopefully blocked inside on missing trie node fail + } + // Signal abortion to the generator and wait for it to tear down + stop := make(chan *generatorStats) + snap.genAbort <- stop + <-stop +} + +// Tests that snapshot generation errors out correctly in case of a missing root +// trie node for a storage trie. It's similar to internal corruption but it is +// handled differently inside the generator. +func TestGenerateMissingStorageTrie(t *testing.T) { + // We can't use statedb to make a test trie (circular dependency), so make + // a fake one manually. We're going with a small account trie of 3 accounts, + // two of which also has the same 3-slot storage trie attached. + var ( + diskdb = memorydb.New() + triedb = trie.NewDatabase(diskdb) + ) + stTrie, _ := trie.NewSecure(common.Hash{}, triedb) + stTrie.Update([]byte("key-1"), []byte("val-1")) // 0x1314700b81afc49f94db3623ef1df38f3ed18b73a1b7ea2f6c095118cf6118a0 + stTrie.Update([]byte("key-2"), []byte("val-2")) // 0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371 + stTrie.Update([]byte("key-3"), []byte("val-3")) // 0x51c71a47af0695957647fb68766d0becee77e953df17c29b3c2f25436f055c78 + stTrie.Commit(nil) // Root: 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 + + accTrie, _ := trie.NewSecure(common.Hash{}, triedb) + acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + + acc = &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} + val, _ = rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-2"), val) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 + + acc = &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ = rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-3"), val) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 + accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd + + // We can only corrupt the disk database, so flush the tries out + triedb.Reference( + common.HexToHash("0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67"), + common.HexToHash("0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e"), + ) + triedb.Reference( + common.HexToHash("0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67"), + common.HexToHash("0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2"), + ) + triedb.Commit(common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"), false) + + // Delete a storage trie root and ensure the generator chokes + diskdb.Delete(common.HexToHash("0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67").Bytes()) + + snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"), nil) + select { + case <-snap.genPending: + // Snapshot generation succeeded + t.Errorf("Snapshot generated against corrupt storage trie") + + case <-time.After(250 * time.Millisecond): + // Not generated fast enough, hopefully blocked inside on missing trie node fail + } + // Signal abortion to the generator and wait for it to tear down + stop := make(chan *generatorStats) + snap.genAbort <- stop + <-stop +} + +// Tests that snapshot generation errors out correctly in case of a missing trie +// node in a storage trie. +func TestGenerateCorruptStorageTrie(t *testing.T) { + // We can't use statedb to make a test trie (circular dependency), so make + // a fake one manually. We're going with a small account trie of 3 accounts, + // two of which also has the same 3-slot storage trie attached. + var ( + diskdb = memorydb.New() + triedb = trie.NewDatabase(diskdb) + ) + stTrie, _ := trie.NewSecure(common.Hash{}, triedb) + stTrie.Update([]byte("key-1"), []byte("val-1")) // 0x1314700b81afc49f94db3623ef1df38f3ed18b73a1b7ea2f6c095118cf6118a0 + stTrie.Update([]byte("key-2"), []byte("val-2")) // 0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371 + stTrie.Update([]byte("key-3"), []byte("val-3")) // 0x51c71a47af0695957647fb68766d0becee77e953df17c29b3c2f25436f055c78 + stTrie.Commit(nil) // Root: 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 + + accTrie, _ := trie.NewSecure(common.Hash{}, triedb) + acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + + acc = &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} + val, _ = rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-2"), val) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 + + acc = &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ = rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-3"), val) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 + accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd + + // We can only corrupt the disk database, so flush the tries out + triedb.Reference( + common.HexToHash("0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67"), + common.HexToHash("0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e"), + ) + triedb.Reference( + common.HexToHash("0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67"), + common.HexToHash("0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2"), + ) + triedb.Commit(common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"), false) + + // Delete a storage trie leaf and ensure the generator chokes + diskdb.Delete(common.HexToHash("0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371").Bytes()) + + snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"), nil) + select { + case <-snap.genPending: + // Snapshot generation succeeded + t.Errorf("Snapshot generated against corrupt storage trie") + + case <-time.After(250 * time.Millisecond): + // Not generated fast enough, hopefully blocked inside on missing trie node fail + } + // Signal abortion to the generator and wait for it to tear down + stop := make(chan *generatorStats) + snap.genAbort <- stop + <-stop +} diff --git a/core/state/snapshot/iterator.go b/core/state/snapshot/iterator.go index d0576cb7fb3d..b30acfd4bd33 100644 --- a/core/state/snapshot/iterator.go +++ b/core/state/snapshot/iterator.go @@ -133,7 +133,7 @@ func (it *diffAccountIterator) Hash() common.Hash { // Account returns the RLP encoded slim account the iterator is currently at. // This method may _fail_, if the underlying layer has been flattened between -// the call to Next and Acccount. That type of error will set it.Err. +// the call to Next and Account. That type of error will set it.Err. // This method assumes that flattening does not delete elements from // the accountdata mapping (writing nil into it is fine though), and will panic // if elements have been deleted. @@ -243,7 +243,7 @@ type diffStorageIterator struct { } // StorageIterator creates a storage iterator over a single diff layer. -// Execept the storage iterator is returned, there is an additional flag +// Except the storage iterator is returned, there is an additional flag // "destructed" returned. If it's true then it means the whole storage is // destructed in this layer(maybe recreated too), don't bother deeper layer // for storage retrieval. diff --git a/core/state/snapshot/iterator_binary.go b/core/state/snapshot/iterator_binary.go index 6e51aed1413f..4af1dccd58a8 100644 --- a/core/state/snapshot/iterator_binary.go +++ b/core/state/snapshot/iterator_binary.go @@ -37,7 +37,7 @@ type binaryIterator struct { } // initBinaryAccountIterator creates a simplistic iterator to step over all the -// accounts in a slow, but eaily verifiable way. Note this function is used for +// accounts in a slow, but easily verifiable way. Note this function is used for // initialization, use `newBinaryAccountIterator` as the API. func (dl *diffLayer) initBinaryAccountIterator() Iterator { parent, ok := dl.parent.(*diffLayer) @@ -62,7 +62,7 @@ func (dl *diffLayer) initBinaryAccountIterator() Iterator { } // initBinaryStorageIterator creates a simplistic iterator to step over all the -// storage slots in a slow, but eaily verifiable way. Note this function is used +// storage slots in a slow, but easily verifiable way. Note this function is used // for initialization, use `newBinaryStorageIterator` as the API. func (dl *diffLayer) initBinaryStorageIterator(account common.Hash) Iterator { parent, ok := dl.parent.(*diffLayer) @@ -199,14 +199,14 @@ func (it *binaryIterator) Release() { } // newBinaryAccountIterator creates a simplistic account iterator to step over -// all the accounts in a slow, but eaily verifiable way. +// all the accounts in a slow, but easily verifiable way. func (dl *diffLayer) newBinaryAccountIterator() AccountIterator { iter := dl.initBinaryAccountIterator() return iter.(AccountIterator) } // newBinaryStorageIterator creates a simplistic account iterator to step over -// all the storage slots in a slow, but eaily verifiable way. +// all the storage slots in a slow, but easily verifiable way. func (dl *diffLayer) newBinaryStorageIterator(account common.Hash) StorageIterator { iter := dl.initBinaryStorageIterator(account) return iter.(StorageIterator) diff --git a/core/state/snapshot/iterator_fast.go b/core/state/snapshot/iterator_fast.go index 9ef1ee3887f2..ad6410997228 100644 --- a/core/state/snapshot/iterator_fast.go +++ b/core/state/snapshot/iterator_fast.go @@ -75,7 +75,7 @@ type fastIterator struct { fail error } -// newFastIterator creates a new hierarhical account or storage iterator with one +// newFastIterator creates a new hierarchical account or storage iterator with one // element per diff layer. The returned combo iterator can be used to walk over // the entire snapshot diff stack simultaneously. func newFastIterator(tree *Tree, root common.Hash, account common.Hash, seek common.Hash, accountIterator bool) (*fastIterator, error) { @@ -335,14 +335,14 @@ func (fi *fastIterator) Debug() { fmt.Println() } -// newFastAccountIterator creates a new hierarhical account iterator with one +// newFastAccountIterator creates a new hierarchical account iterator with one // element per diff layer. The returned combo iterator can be used to walk over // the entire snapshot diff stack simultaneously. func newFastAccountIterator(tree *Tree, root common.Hash, seek common.Hash) (AccountIterator, error) { return newFastIterator(tree, root, common.Hash{}, seek, true) } -// newFastStorageIterator creates a new hierarhical storage iterator with one +// newFastStorageIterator creates a new hierarchical storage iterator with one // element per diff layer. The returned combo iterator can be used to walk over // the entire snapshot diff stack simultaneously. func newFastStorageIterator(tree *Tree, root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) { diff --git a/core/state/snapshot/journal.go b/core/state/snapshot/journal.go index 6bddc4b90a45..332fccd556e3 100644 --- a/core/state/snapshot/journal.go +++ b/core/state/snapshot/journal.go @@ -33,6 +33,8 @@ import ( "github.com/celo-org/celo-blockchain/trie" ) +const journalVersion uint64 = 0 + // journalGenerator is a disk layer entry containing the generator progress marker. type journalGenerator struct { Wiping bool // Whether the database was in progress of being wiped @@ -61,8 +63,91 @@ type journalStorage struct { Vals [][]byte } +// loadAndParseLegacyJournal tries to parse the snapshot journal in legacy format. +func loadAndParseLegacyJournal(db ethdb.KeyValueStore, base *diskLayer) (snapshot, journalGenerator, error) { + // Retrieve the journal, for legacy journal it must exist since even for + // 0 layer it stores whether we've already generated the snapshot or are + // in progress only. + journal := rawdb.ReadSnapshotJournal(db) + if len(journal) == 0 { + return nil, journalGenerator{}, errors.New("missing or corrupted snapshot journal") + } + r := rlp.NewStream(bytes.NewReader(journal), 0) + + // Read the snapshot generation progress for the disk layer + var generator journalGenerator + if err := r.Decode(&generator); err != nil { + return nil, journalGenerator{}, fmt.Errorf("failed to load snapshot progress marker: %v", err) + } + // Load all the snapshot diffs from the journal + snapshot, err := loadDiffLayer(base, r) + if err != nil { + return nil, generator, err + } + return snapshot, generator, nil +} + +// loadAndParseJournal tries to parse the snapshot journal in latest format. +func loadAndParseJournal(db ethdb.KeyValueStore, base *diskLayer) (snapshot, journalGenerator, error) { + // Retrieve the disk layer generator. It must exist, no matter the + // snapshot is fully generated or not. Otherwise the entire disk + // layer is invalid. + generatorBlob := rawdb.ReadSnapshotGenerator(db) + if len(generatorBlob) == 0 { + return nil, journalGenerator{}, errors.New("missing snapshot generator") + } + var generator journalGenerator + if err := rlp.DecodeBytes(generatorBlob, &generator); err != nil { + return nil, journalGenerator{}, fmt.Errorf("failed to decode snapshot generator: %v", err) + } + // Retrieve the diff layer journal. It's possible that the journal is + // not existent, e.g. the disk layer is generating while that the Geth + // crashes without persisting the diff journal. + // So if there is no journal, or the journal is invalid(e.g. the journal + // is not matched with disk layer; or the it's the legacy-format journal, + // etc.), we just discard all diffs and try to recover them later. + journal := rawdb.ReadSnapshotJournal(db) + if len(journal) == 0 { + log.Warn("Loaded snapshot journal", "diskroot", base.root, "diffs", "missing") + return base, generator, nil + } + r := rlp.NewStream(bytes.NewReader(journal), 0) + + // Firstly, resolve the first element as the journal version + version, err := r.Uint() + if err != nil { + log.Warn("Failed to resolve the journal version", "error", err) + return base, generator, nil + } + if version != journalVersion { + log.Warn("Discarded the snapshot journal with wrong version", "required", journalVersion, "got", version) + return base, generator, nil + } + // Secondly, resolve the disk layer root, ensure it's continuous + // with disk layer. Note now we can ensure it's the snapshot journal + // correct version, so we expect everything can be resolved properly. + var root common.Hash + if err := r.Decode(&root); err != nil { + return nil, journalGenerator{}, errors.New("missing disk layer root") + } + // The diff journal is not matched with disk, discard them. + // It can happen that Geth crashes without persisting the latest + // diff journal. + if !bytes.Equal(root.Bytes(), base.root.Bytes()) { + log.Warn("Loaded snapshot journal", "diskroot", base.root, "diffs", "unmatched") + return base, generator, nil + } + // Load all the snapshot diffs from the journal + snapshot, err := loadDiffLayer(base, r) + if err != nil { + return nil, journalGenerator{}, err + } + log.Debug("Loaded snapshot journal", "diskroot", base.root, "diffhead", snapshot.Root()) + return snapshot, generator, nil +} + // loadSnapshot loads a pre-existing state snapshot backed by a key-value store. -func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash) (snapshot, error) { +func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, recovery bool) (snapshot, error) { // Retrieve the block number and hash of the snapshot, failing if no snapshot // is present in the database (or crashed mid-update). baseRoot := rawdb.ReadSnapshotRoot(diskdb) @@ -75,28 +160,36 @@ func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, cache: fastcache.New(cache * 1024 * 1024), root: baseRoot, } - // Retrieve the journal, it must exist since even for 0 layer it stores whether - // we've already generated the snapshot or are in progress only - journal := rawdb.ReadSnapshotJournal(diskdb) - if len(journal) == 0 { - return nil, errors.New("missing or corrupted snapshot journal") - } - r := rlp.NewStream(bytes.NewReader(journal), 0) - - // Read the snapshot generation progress for the disk layer - var generator journalGenerator - if err := r.Decode(&generator); err != nil { - return nil, fmt.Errorf("failed to load snapshot progress marker: %v", err) + var legacy bool + snapshot, generator, err := loadAndParseJournal(diskdb, base) + if err != nil { + log.Warn("Failed to load new-format journal", "error", err) + snapshot, generator, err = loadAndParseLegacyJournal(diskdb, base) + legacy = true } - // Load all the snapshot diffs from the journal - snapshot, err := loadDiffLayer(base, r) if err != nil { return nil, err } - // Entire snapshot journal loaded, sanity check the head and return - // Journal doesn't exist, don't worry if it's not supposed to + // Entire snapshot journal loaded, sanity check the head. If the loaded + // snapshot is not matched with current state root, print a warning log + // or discard the entire snapshot it's legacy snapshot. + // + // Possible scenario: Geth was crashed without persisting journal and then + // restart, the head is rewound to the point with available state(trie) + // which is below the snapshot. In this case the snapshot can be recovered + // by re-executing blocks but right now it's unavailable. if head := snapshot.Root(); head != root { - return nil, fmt.Errorf("head doesn't match snapshot: have %#x, want %#x", head, root) + // If it's legacy snapshot, or it's new-format snapshot but + // it's not in recovery mode, returns the error here for + // rebuilding the entire snapshot forcibly. + if legacy || !recovery { + return nil, fmt.Errorf("head doesn't match snapshot: have %#x, want %#x", head, root) + } + // It's in snapshot recovery, the assumption is held that + // the disk layer is always higher than chain head. It can + // be eventually recovered when the chain head beyonds the + // disk layer. + log.Warn("Snapshot is not continuous with chain", "snaproot", head, "chainroot", root) } // Everything loaded correctly, resume any suspended operations if !generator.Done { @@ -183,8 +276,8 @@ func loadDiffLayer(parent snapshot, r *rlp.Stream) (snapshot, error) { return loadDiffLayer(newDiffLayer(parent, root, destructSet, accountData, storageData), r) } -// Journal writes the persistent layer generator stats into a buffer to be stored -// in the database as the snapshot journal. +// Journal terminates any in-progress snapshot generation, also implicitly pushing +// the progress into the database. func (dl *diskLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { // If the snapshot is currently being generated, abort it var stats *generatorStats @@ -193,7 +286,86 @@ func (dl *diskLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { dl.genAbort <- abort if stats = <-abort; stats != nil { - stats.Log("Journalling in-progress snapshot", dl.genMarker) + stats.Log("Journalling in-progress snapshot", dl.root, dl.genMarker) + } + } + // Ensure the layer didn't get stale + dl.lock.RLock() + defer dl.lock.RUnlock() + + if dl.stale { + return common.Hash{}, ErrSnapshotStale + } + // Ensure the generator stats is written even if none was ran this cycle + journalProgress(dl.diskdb, dl.genMarker, stats) + + log.Debug("Journalled disk layer", "root", dl.root) + return dl.root, nil +} + +// Journal writes the memory layer contents into a buffer to be stored in the +// database as the snapshot journal. +func (dl *diffLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { + // Journal the parent first + base, err := dl.parent.Journal(buffer) + if err != nil { + return common.Hash{}, err + } + // Ensure the layer didn't get stale + dl.lock.RLock() + defer dl.lock.RUnlock() + + if dl.Stale() { + return common.Hash{}, ErrSnapshotStale + } + // Everything below was journalled, persist this layer too + if err := rlp.Encode(buffer, dl.root); err != nil { + return common.Hash{}, err + } + destructs := make([]journalDestruct, 0, len(dl.destructSet)) + for hash := range dl.destructSet { + destructs = append(destructs, journalDestruct{Hash: hash}) + } + if err := rlp.Encode(buffer, destructs); err != nil { + return common.Hash{}, err + } + accounts := make([]journalAccount, 0, len(dl.accountData)) + for hash, blob := range dl.accountData { + accounts = append(accounts, journalAccount{Hash: hash, Blob: blob}) + } + if err := rlp.Encode(buffer, accounts); err != nil { + return common.Hash{}, err + } + storage := make([]journalStorage, 0, len(dl.storageData)) + for hash, slots := range dl.storageData { + keys := make([]common.Hash, 0, len(slots)) + vals := make([][]byte, 0, len(slots)) + for key, val := range slots { + keys = append(keys, key) + vals = append(vals, val) + } + storage = append(storage, journalStorage{Hash: hash, Keys: keys, Vals: vals}) + } + if err := rlp.Encode(buffer, storage); err != nil { + return common.Hash{}, err + } + log.Debug("Journalled diff layer", "root", dl.root, "parent", dl.parent.Root()) + return base, nil +} + +// LegacyJournal writes the persistent layer generator stats into a buffer +// to be stored in the database as the snapshot journal. +// +// Note it's the legacy version which is only used in testing right now. +func (dl *diskLayer) LegacyJournal(buffer *bytes.Buffer) (common.Hash, error) { + // If the snapshot is currently being generated, abort it + var stats *generatorStats + if dl.genAbort != nil { + abort := make(chan *generatorStats) + dl.genAbort <- abort + + if stats = <-abort; stats != nil { + stats.Log("Journalling in-progress snapshot", dl.root, dl.genMarker) } } // Ensure the layer didn't get stale @@ -214,6 +386,7 @@ func (dl *diskLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { entry.Slots = stats.slots entry.Storage = uint64(stats.storage) } + log.Debug("Legacy journalled disk layer", "root", dl.root) if err := rlp.Encode(buffer, entry); err != nil { return common.Hash{}, err } @@ -222,9 +395,11 @@ func (dl *diskLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { // Journal writes the memory layer contents into a buffer to be stored in the // database as the snapshot journal. -func (dl *diffLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { +// +// Note it's the legacy version which is only used in testing right now. +func (dl *diffLayer) LegacyJournal(buffer *bytes.Buffer) (common.Hash, error) { // Journal the parent first - base, err := dl.parent.Journal(buffer) + base, err := dl.parent.LegacyJournal(buffer) if err != nil { return common.Hash{}, err } @@ -266,5 +441,6 @@ func (dl *diffLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { if err := rlp.Encode(buffer, storage); err != nil { return common.Hash{}, err } + log.Debug("Legacy journalled diff layer", "root", dl.root, "parent", dl.parent.Root()) return base, nil } diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go index f575a092dcc7..c46d8ed8ffcb 100644 --- a/core/state/snapshot/snapshot.go +++ b/core/state/snapshot/snapshot.go @@ -29,6 +29,7 @@ import ( "github.com/celo-org/celo-blockchain/ethdb" "github.com/celo-org/celo-blockchain/log" "github.com/celo-org/celo-blockchain/metrics" + "github.com/celo-org/celo-blockchain/rlp" "github.com/celo-org/celo-blockchain/trie" ) @@ -86,6 +87,10 @@ var ( // range of accounts covered. ErrNotCoveredYet = errors.New("not covered yet") + // ErrNotConstructed is returned if the callers want to iterate the snapshot + // while the generation is not finished yet. + ErrNotConstructed = errors.New("snapshot is not constructed") + // errSnapshotCycle is returned if a snapshot is attempted to be inserted // that forms a cycle in the snapshot tree. errSnapshotCycle = errors.New("snapshot cycle") @@ -132,6 +137,10 @@ type snapshot interface { // flattening everything down (bad for reorgs). Journal(buffer *bytes.Buffer) (common.Hash, error) + // LegacyJournal is basically identical to Journal. it's the legacy version for + // flushing legacy journal. Now the only purpose of this function is for testing. + LegacyJournal(buffer *bytes.Buffer) (common.Hash, error) + // Stale return whether this layer has become stale (was flattened across) or // if it's still live. Stale() bool @@ -164,10 +173,12 @@ type Tree struct { // store (with a number of memory layers from a journal), ensuring that the head // of the snapshot matches the expected one. // -// If the snapshot is missing or inconsistent, the entirety is deleted and will -// be reconstructed from scratch based on the tries in the key-value store, on a -// background thread. -func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, async bool) *Tree { +// If the snapshot is missing or the disk layer is broken, the entire is deleted +// and will be reconstructed from scratch based on the tries in the key-value +// store, on a background thread. If the memory layers from the journal is not +// continuous with disk layer or the journal is missing, all diffs will be discarded +// iff it's in "recovery" mode, otherwise rebuild is mandatory. +func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, async bool, recovery bool) *Tree { // Create a new, empty snapshot tree snap := &Tree{ diskdb: diskdb, @@ -179,7 +190,7 @@ func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root comm defer snap.waitBuild() } // Attempt to load a previously persisted snapshot and rebuild one if failed - head, err := loadSnapshot(diskdb, triedb, cache, root) + head, err := loadSnapshot(diskdb, triedb, cache, root, recovery) if err != nil { log.Warn("Failed to load snapshot, regenerating", "err", err) snap.Rebuild(root) @@ -194,7 +205,7 @@ func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root comm } // waitBuild blocks until the snapshot finishes rebuilding. This method is meant -// to be used by tests to ensure we're testing what we believe we are. +// to be used by tests to ensure we're testing what we believe we are. func (t *Tree) waitBuild() { // Find the rebuild termination channel var done chan struct{} @@ -236,11 +247,11 @@ func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, destructs m return errSnapshotCycle } // Generate a new snapshot on top of the parent - parent := t.Snapshot(parentRoot).(snapshot) + parent := t.Snapshot(parentRoot) if parent == nil { return fmt.Errorf("parent [%#x] snapshot missing", parentRoot) } - snap := parent.Update(blockRoot, destructs, accounts, storage) + snap := parent.(snapshot).Update(blockRoot, destructs, accounts, storage) // Save the new snapshot for later t.lock.Lock() @@ -253,6 +264,12 @@ func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, destructs m // Cap traverses downwards the snapshot tree from a head block hash until the // number of allowed layers are crossed. All layers beyond the permitted number // are flattened downwards. +// +// Note, the final diff layer count in general will be one more than the amount +// requested. This happens because the bottom-most diff layer is the accumulator +// which may or may not overflow and cascade to disk. Since this last layer's +// survival is only known *after* capping, we need to omit it from the count if +// we want to ensure that *at least* the requested number of diff layers remain. func (t *Tree) Cap(root common.Hash, layers int) error { // Retrieve the head snapshot to cap from snap := t.Snapshot(root) @@ -263,6 +280,13 @@ func (t *Tree) Cap(root common.Hash, layers int) error { if !ok { return fmt.Errorf("snapshot [%#x] is disk layer", root) } + // If the generator is still running, use a more aggressive cap + diff.origin.lock.RLock() + if diff.origin.genMarker != nil && layers > 8 { + layers = 8 + } + diff.origin.lock.RUnlock() + // Run the internal capping and discard all stale layers t.lock.Lock() defer t.lock.Unlock() @@ -270,10 +294,7 @@ func (t *Tree) Cap(root common.Hash, layers int) error { // Flattening the bottom-most diff layer requires special casing since there's // no child to rewire to the grandparent. In that case we can fake a temporary // child for the capping and then remove it. - var persisted *diskLayer - - switch layers { - case 0: + if layers == 0 { // If full commit was requested, flatten the diffs and merge onto disk diff.lock.RLock() base := diffToDisk(diff.flatten().(*diffLayer)) @@ -282,33 +303,9 @@ func (t *Tree) Cap(root common.Hash, layers int) error { // Replace the entire snapshot tree with the flat base t.layers = map[common.Hash]snapshot{base.root: base} return nil - - case 1: - // If full flattening was requested, flatten the diffs but only merge if the - // memory limit was reached - var ( - bottom *diffLayer - base *diskLayer - ) - diff.lock.RLock() - bottom = diff.flatten().(*diffLayer) - if bottom.memory >= aggregatorMemoryLimit { - base = diffToDisk(bottom) - } - diff.lock.RUnlock() - - // If all diff layers were removed, replace the entire snapshot tree - if base != nil { - t.layers = map[common.Hash]snapshot{base.root: base} - return nil - } - // Merge the new aggregated layer into the snapshot tree, clean stales below - t.layers[bottom.root] = bottom - - default: - // Many layers requested to be retained, cap normally - persisted = t.cap(diff, layers) } + persisted := t.cap(diff, layers) + // Remove any layer that is stale or links into a stale layer children := make(map[common.Hash][]common.Hash) for root, snap := range t.layers { @@ -350,10 +347,16 @@ func (t *Tree) Cap(root common.Hash, layers int) error { // crossed. All diffs beyond the permitted number are flattened downwards. If the // layer limit is reached, memory cap is also enforced (but not before). // -// The method returns the new disk layer if diffs were persistend into it. +// The method returns the new disk layer if diffs were persisted into it. +// +// Note, the final diff layer count in general will be one more than the amount +// requested. This happens because the bottom-most diff layer is the accumulator +// which may or may not overflow and cascade to disk. Since this last layer's +// survival is only known *after* capping, we need to omit it from the count if +// we want to ensure that *at least* the requested number of diff layers remain. func (t *Tree) cap(diff *diffLayer, layers int) *diskLayer { // Dive until we run out of layers or reach the persistent database - for ; layers > 2; layers-- { + for i := 0; i < layers-1; i++ { // If we still have diff layers below, continue down if parent, ok := diff.parent.(*diffLayer); ok { diff = parent @@ -404,6 +407,9 @@ func (t *Tree) cap(diff *diffLayer, layers int) *diskLayer { // diffToDisk merges a bottom-most diff into the persistent disk layer underneath // it. The method will panic if called onto a non-bottom-most diff layer. +// +// The disk layer persistence should be operated in an atomic way. All updates should +// be discarded if the whole transition if not finished. func diffToDisk(bottom *diffLayer) *diskLayer { var ( base = bottom.parent.(*diskLayer) @@ -416,8 +422,7 @@ func diffToDisk(bottom *diffLayer) *diskLayer { base.genAbort <- abort stats = <-abort } - // Start by temporarily deleting the current snapshot block marker. This - // ensures that in the case of a crash, the entire snapshot is invalidated. + // Put the deletion in the batch writer, flush all updates in the final step. rawdb.DeleteSnapshotRoot(batch) // Mark the original base as stale as we're going to create a new wrapper @@ -443,8 +448,17 @@ func diffToDisk(bottom *diffLayer) *diskLayer { if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator batch.Delete(key) base.cache.Del(key[1:]) - snapshotFlushStorageItemMeter.Mark(1) + + // Ensure we don't delete too much data blindly (contract can be + // huge). It's ok to flush, the root will go missing in case of a + // crash and we'll detect and regenerate the snapshot. + if batch.ValueSize() > ethdb.IdealBatchSize { + if err := batch.Write(); err != nil { + log.Crit("Failed to write storage deletions", "err", err) + } + batch.Reset() + } } } it.Release() @@ -460,14 +474,18 @@ func diffToDisk(bottom *diffLayer) *diskLayer { base.cache.Set(hash[:], data) snapshotCleanAccountWriteMeter.Mark(int64(len(data))) + snapshotFlushAccountItemMeter.Mark(1) + snapshotFlushAccountSizeMeter.Mark(int64(len(data))) + + // Ensure we don't write too much data blindly. It's ok to flush, the + // root will go missing in case of a crash and we'll detect and regen + // the snapshot. if batch.ValueSize() > ethdb.IdealBatchSize { if err := batch.Write(); err != nil { - log.Crit("Failed to write account snapshot", "err", err) + log.Crit("Failed to write storage deletions", "err", err) } batch.Reset() } - snapshotFlushAccountItemMeter.Mark(1) - snapshotFlushAccountSizeMeter.Mark(int64(len(data))) } // Push all the storage slots into the database for accountHash, storage := range bottom.storageData { @@ -494,18 +512,19 @@ func diffToDisk(bottom *diffLayer) *diskLayer { snapshotFlushStorageItemMeter.Mark(1) snapshotFlushStorageSizeMeter.Mark(int64(len(data))) } - if batch.ValueSize() > ethdb.IdealBatchSize { - if err := batch.Write(); err != nil { - log.Crit("Failed to write storage snapshot", "err", err) - } - batch.Reset() - } } // Update the snapshot block marker and write any remainder data rawdb.WriteSnapshotRoot(batch, bottom.root) + + // Write out the generator progress marker and report + journalProgress(batch, base.genMarker, stats) + + // Flush all the updates in the single db operation. Ensure the + // disk layer transition is atomic. if err := batch.Write(); err != nil { log.Crit("Failed to write leftover snapshot", "err", err) } + log.Debug("Journalled disk layer", "root", bottom.root, "complete", base.genMarker == nil) res := &diskLayer{ root: bottom.root, cache: base.cache, @@ -543,7 +562,21 @@ func (t *Tree) Journal(root common.Hash) (common.Hash, error) { t.lock.Lock() defer t.lock.Unlock() + // Firstly write out the metadata of journal journal := new(bytes.Buffer) + if err := rlp.Encode(journal, journalVersion); err != nil { + return common.Hash{}, err + } + diskroot := t.diskRoot() + if diskroot == (common.Hash{}) { + return common.Hash{}, errors.New("invalid disk root") + } + // Secondly write out the disk layer root, ensure the + // diff journal is continuous with disk. + if err := rlp.Encode(journal, diskroot); err != nil { + return common.Hash{}, err + } + // Finally write out the journal of each layer in reverse order. base, err := snap.(snapshot).Journal(journal) if err != nil { return common.Hash{}, err @@ -553,6 +586,29 @@ func (t *Tree) Journal(root common.Hash) (common.Hash, error) { return base, nil } +// LegacyJournal is basically identical to Journal. it's the legacy +// version for flushing legacy journal. Now the only purpose of this +// function is for testing. +func (t *Tree) LegacyJournal(root common.Hash) (common.Hash, error) { + // Retrieve the head snapshot to journal from var snap snapshot + snap := t.Snapshot(root) + if snap == nil { + return common.Hash{}, fmt.Errorf("snapshot [%#x] missing", root) + } + // Run the journaling + t.lock.Lock() + defer t.lock.Unlock() + + journal := new(bytes.Buffer) + base, err := snap.(snapshot).LegacyJournal(journal) + if err != nil { + return common.Hash{}, err + } + // Store the journal into the database and return + rawdb.WriteSnapshotJournal(t.diskdb, journal.Bytes()) + return base, nil +} + // Rebuild wipes all available snapshot data from the persistent database and // discard all caches and diff layers. Afterwards, it starts a new snapshot // generator with the given root hash. @@ -560,6 +616,10 @@ func (t *Tree) Rebuild(root common.Hash) { t.lock.Lock() defer t.lock.Unlock() + // Firstly delete any recovery flag in the database. Because now we are + // building a brand new snapshot. + rawdb.DeleteSnapshotRecoveryNumber(t.diskdb) + // Track whether there's a wipe currently running and keep it alive if so var wiper chan struct{} @@ -591,7 +651,7 @@ func (t *Tree) Rebuild(root common.Hash) { panic(fmt.Sprintf("unknown layer type: %T", layer)) } } - // Start generating a new snapshot from scratch on a backgroung thread. The + // Start generating a new snapshot from scratch on a background thread. The // generator will run a wiper first if there's not one running right now. log.Info("Rebuilding state snapshot") t.layers = map[common.Hash]snapshot{ @@ -602,11 +662,79 @@ func (t *Tree) Rebuild(root common.Hash) { // AccountIterator creates a new account iterator for the specified root hash and // seeks to a starting account hash. func (t *Tree) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) { + ok, err := t.generating() + if err != nil { + return nil, err + } + if ok { + return nil, ErrNotConstructed + } return newFastAccountIterator(t, root, seek) } // StorageIterator creates a new storage iterator for the specified root hash and // account. The iterator will be move to the specific start position. func (t *Tree) StorageIterator(root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) { + ok, err := t.generating() + if err != nil { + return nil, err + } + if ok { + return nil, ErrNotConstructed + } return newFastStorageIterator(t, root, account, seek) } + +// disklayer is an internal helper function to return the disk layer. +// The lock of snapTree is assumed to be held already. +func (t *Tree) disklayer() *diskLayer { + var snap snapshot + for _, s := range t.layers { + snap = s + break + } + if snap == nil { + return nil + } + switch layer := snap.(type) { + case *diskLayer: + return layer + case *diffLayer: + return layer.origin + default: + panic(fmt.Sprintf("%T: undefined layer", snap)) + } +} + +// diskRoot is a internal helper function to return the disk layer root. +// The lock of snapTree is assumed to be held already. +func (t *Tree) diskRoot() common.Hash { + disklayer := t.disklayer() + if disklayer == nil { + return common.Hash{} + } + return disklayer.Root() +} + +// generating is an internal helper function which reports whether the snapshot +// is still under the construction. +func (t *Tree) generating() (bool, error) { + t.lock.Lock() + defer t.lock.Unlock() + + layer := t.disklayer() + if layer == nil { + return false, errors.New("disk layer is missing") + } + layer.lock.RLock() + defer layer.lock.RUnlock() + return layer.genMarker != nil, nil +} + +// diskRoot is a external helper function to return the disk layer root. +func (t *Tree) DiskRoot() common.Hash { + t.lock.Lock() + defer t.lock.Unlock() + + return t.diskRoot() +} diff --git a/core/state/snapshot/snapshot_test.go b/core/state/snapshot/snapshot_test.go index 9bdc4933b574..88a0e0cd4e44 100644 --- a/core/state/snapshot/snapshot_test.go +++ b/core/state/snapshot/snapshot_test.go @@ -161,57 +161,10 @@ func TestDiskLayerExternalInvalidationPartialFlatten(t *testing.T) { defer func(memcap uint64) { aggregatorMemoryLimit = memcap }(aggregatorMemoryLimit) aggregatorMemoryLimit = 0 - if err := snaps.Cap(common.HexToHash("0x03"), 2); err != nil { - t.Fatalf("failed to merge diff layer onto disk: %v", err) - } - // Since the base layer was modified, ensure that data retrievald on the external reference fail - if acc, err := ref.Account(common.HexToHash("0x01")); err != ErrSnapshotStale { - t.Errorf("stale reference returned account: %#x (err: %v)", acc, err) - } - if slot, err := ref.Storage(common.HexToHash("0xa1"), common.HexToHash("0xb1")); err != ErrSnapshotStale { - t.Errorf("stale reference returned storage slot: %#x (err: %v)", slot, err) - } - if n := len(snaps.layers); n != 2 { - t.Errorf("post-cap layer count mismatch: have %d, want %d", n, 2) - fmt.Println(snaps.layers) - } -} - -// Tests that if a diff layer becomes stale, no active external references will -// be returned with junk data. This version of the test flattens every diff layer -// to check internal corner case around the bottom-most memory accumulator. -func TestDiffLayerExternalInvalidationFullFlatten(t *testing.T) { - // Create an empty base layer and a snapshot tree out of it - base := &diskLayer{ - diskdb: rawdb.NewMemoryDatabase(), - root: common.HexToHash("0x01"), - cache: fastcache.New(1024 * 500), - } - snaps := &Tree{ - layers: map[common.Hash]snapshot{ - base.root: base, - }, - } - // Commit two diffs on top and retrieve a reference to the bottommost - accounts := map[common.Hash][]byte{ - common.HexToHash("0xa1"): randomAccount(), - } - if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, accounts, nil); err != nil { - t.Fatalf("failed to create a diff layer: %v", err) - } - if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil, accounts, nil); err != nil { - t.Fatalf("failed to create a diff layer: %v", err) - } - if n := len(snaps.layers); n != 3 { - t.Errorf("pre-cap layer count mismatch: have %d, want %d", n, 3) - } - ref := snaps.Snapshot(common.HexToHash("0x02")) - - // Flatten the diff layer into the bottom accumulator if err := snaps.Cap(common.HexToHash("0x03"), 1); err != nil { - t.Fatalf("failed to flatten diff layer into accumulator: %v", err) + t.Fatalf("failed to merge accumulator onto disk: %v", err) } - // Since the accumulator diff layer was modified, ensure that data retrievald on the external reference fail + // Since the base layer was modified, ensure that data retrievald on the external reference fail if acc, err := ref.Account(common.HexToHash("0x01")); err != ErrSnapshotStale { t.Errorf("stale reference returned account: %#x (err: %v)", acc, err) } @@ -266,7 +219,7 @@ func TestDiffLayerExternalInvalidationPartialFlatten(t *testing.T) { t.Errorf("layers modified, got %d exp %d", got, exp) } // Flatten the diff layer into the bottom accumulator - if err := snaps.Cap(common.HexToHash("0x04"), 2); err != nil { + if err := snaps.Cap(common.HexToHash("0x04"), 1); err != nil { t.Fatalf("failed to flatten diff layer into accumulator: %v", err) } // Since the accumulator diff layer was modified, ensure that data retrievald on the external reference fail diff --git a/core/state/statedb.go b/core/state/statedb.go index d2835a9c4032..40805d2e823b 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -691,6 +691,31 @@ func (s *StateDB) Copy() *StateDB { for hash, preimage := range s.preimages { state.preimages[hash] = preimage } + if s.snaps != nil { + // In order for the miner to be able to use and make additions + // to the snapshot tree, we need to copy that aswell. + // Otherwise, any block mined by ourselves will cause gaps in the tree, + // and force the miner to operate trie-backed only + state.snaps = s.snaps + state.snap = s.snap + // deep copy needed + state.snapDestructs = make(map[common.Hash]struct{}) + for k, v := range s.snapDestructs { + state.snapDestructs[k] = v + } + state.snapAccounts = make(map[common.Hash][]byte) + for k, v := range s.snapAccounts { + state.snapAccounts[k] = v + } + state.snapStorage = make(map[common.Hash]map[common.Hash][]byte) + for k, v := range s.snapStorage { + temp := make(map[common.Hash][]byte) + for kk, vv := range v { + temp[kk] = vv + } + state.snapStorage[k] = temp + } + } return state } @@ -861,8 +886,12 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { if err := s.snaps.Update(root, parent, s.snapDestructs, s.snapAccounts, s.snapStorage); err != nil { log.Warn("Failed to update snapshot tree", "from", parent, "to", root, "err", err) } - if err := s.snaps.Cap(root, 127); err != nil { // Persistent layer is 128th, the last available trie - log.Warn("Failed to cap snapshot tree", "root", root, "layers", 127, "err", err) + // Keep 128 diff layers in the memory, persistent layer is 129th. + // - head layer is paired with HEAD state + // - head-1 layer is paired with HEAD-1 state + // - head-127 layer(bottom-most diff layer) is paired with HEAD-127 state + if err := s.snaps.Cap(root, 128); err != nil { + log.Warn("Failed to cap snapshot tree", "root", root, "layers", 128, "err", err) } } s.snap, s.snapDestructs, s.snapAccounts, s.snapStorage = nil, nil, nil, nil diff --git a/eth/config.go b/eth/config.go index aa48cd04968e..601c64fc6386 100644 --- a/eth/config.go +++ b/eth/config.go @@ -35,11 +35,11 @@ var DefaultConfig = Config{ LightPeers: 100, LightServ: 0, UltraLightFraction: 75, - DatabaseCache: 768, - TrieCleanCache: 256, + DatabaseCache: 512, + TrieCleanCache: 154, TrieDirtyCache: 256, TrieTimeout: 60 * time.Minute, - SnapshotCache: 256, + SnapshotCache: 102, Miner: miner.Config{ GasFloor: 8000000, GasCeil: 8000000, diff --git a/ethdb/leveldb/leveldb.go b/ethdb/leveldb/leveldb.go index c3834c4b543d..e92bac2d2f92 100644 --- a/ethdb/leveldb/leveldb.go +++ b/ethdb/leveldb/leveldb.go @@ -428,7 +428,7 @@ func (b *batch) Put(key, value []byte) error { // Delete inserts the a key removal into the batch for later committing. func (b *batch) Delete(key []byte) error { b.b.Delete(key) - b.size++ + b.size += len(key) return nil } diff --git a/ethdb/memorydb/memorydb.go b/ethdb/memorydb/memorydb.go index 8e9cf3c70b58..338dd1aeb579 100644 --- a/ethdb/memorydb/memorydb.go +++ b/ethdb/memorydb/memorydb.go @@ -211,7 +211,7 @@ func (b *batch) Put(key, value []byte) error { // Delete inserts the a key removal into the batch for later committing. func (b *batch) Delete(key []byte) error { b.writes = append(b.writes, keyvalue{common.CopyBytes(key), nil, true}) - b.size += 1 + b.size += len(key) return nil } diff --git a/tests/state_test_util.go b/tests/state_test_util.go index 1c04719a367d..4abb8dc72e47 100644 --- a/tests/state_test_util.go +++ b/tests/state_test_util.go @@ -220,7 +220,7 @@ func MakePreState(db ethdb.Database, accounts core.GenesisAlloc, snapshotter boo var snaps *snapshot.Tree if snapshotter { - snaps = snapshot.New(db, sdb.TrieDB(), 1, root, false) + snaps = snapshot.New(db, sdb.TrieDB(), 1, root, false, false) } statedb, _ = state.New(root, sdb, snaps) return snaps, statedb diff --git a/trie/database.go b/trie/database.go index fae5cf491e55..321c62cc35d8 100644 --- a/trie/database.go +++ b/trie/database.go @@ -747,7 +747,7 @@ func (db *Database) Commit(node common.Hash, report bool) error { batch.Replay(uncacher) batch.Reset() - // Reset the storage counters and bumpd metrics + // Reset the storage counters and bumped metrics db.preimages = make(map[common.Hash][]byte) db.preimagesSize = 0