Skip to content

Commit f5f544f

Browse files
karalabeMariusVanDerWijdenholiman
authored andcommitted
eth: support bubbling up bad blocks from sync to the engine API (ethereum#25190)
* eth: support bubbling up bad blocks from sync to the engine API * eth/catalyst: fix typo Co-authored-by: Marius van der Wijden <m.vanderwijden@live.de> * eth/catalyst: fix typo Co-authored-by: Marius van der Wijden <m.vanderwijden@live.de> * Update eth/catalyst/api.go * eth/catalyst: when forgetting bad hashes, also forget descendants * eth/catalyst: minor bad block tweaks for resilience Co-authored-by: Marius van der Wijden <m.vanderwijden@live.de> Co-authored-by: Martin Holst Swende <martin@swende.se>
1 parent 0f9fff3 commit f5f544f

File tree

5 files changed

+154
-17
lines changed

5 files changed

+154
-17
lines changed

core/beacon/types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ type payloadAttributesMarshaling struct {
4242

4343
//go:generate go run github.com/fjl/gencodec -type ExecutableDataV1 -field-override executableDataMarshaling -out gen_ed.go
4444

45-
// ExecutableDataV1 structure described at https://github.com/ethereum/execution-apis/src/engine/specification.md
45+
// ExecutableDataV1 structure described at https://github.com/ethereum/execution-apis/tree/main/src/engine/specification.md
4646
type ExecutableDataV1 struct {
4747
ParentHash common.Hash `json:"parentHash" gencodec:"required"`
4848
FeeRecipient common.Address `json:"feeRecipient" gencodec:"required"`

eth/catalyst/api.go

Lines changed: 127 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,47 @@ func Register(stack *node.Node, backend *eth.Ethereum) error {
5050
return nil
5151
}
5252

53+
const (
54+
// invalidBlockHitEviction is the number of times an invalid block can be
55+
// referenced in forkchoice update or new payload before it is attempted
56+
// to be reprocessed again.
57+
invalidBlockHitEviction = 128
58+
59+
// invalidTipsetsCap is the max number of recent block hashes tracked that
60+
// have lead to some bad ancestor block. It's just an OOM protection.
61+
invalidTipsetsCap = 512
62+
)
63+
5364
type ConsensusAPI struct {
54-
eth *eth.Ethereum
65+
eth *eth.Ethereum
66+
5567
remoteBlocks *headerQueue // Cache of remote payloads received
5668
localBlocks *payloadQueue // Cache of local payloads generated
57-
// Lock for the forkChoiceUpdated method
58-
forkChoiceLock sync.Mutex
69+
70+
// The forkchoice update and new payload method require us to return the
71+
// latest valid hash in an invalid chain. To support that return, we need
72+
// to track historical bad blocks as well as bad tipsets in case a chain
73+
// is constantly built on it.
74+
//
75+
// There are a few important caveats in this mechanism:
76+
// - The bad block tracking is ephemeral, in-memory only. We must never
77+
// persist any bad block information to disk as a bug in Geth could end
78+
// up blocking a valid chain, even if a later Geth update would accept
79+
// it.
80+
// - Bad blocks will get forgotten after a certain threshold of import
81+
// attempts and will be retried. The rationale is that if the network
82+
// really-really-really tries to feed us a block, we should give it a
83+
// new chance, perhaps us being racey instead of the block being legit
84+
// bad (this happened in Geth at a point with import vs. pending race).
85+
// - Tracking all the blocks built on top of the bad one could be a bit
86+
// problematic, so we will only track the head chain segment of a bad
87+
// chain to allow discarding progressing bad chains and side chains,
88+
// without tracking too much bad data.
89+
invalidBlocksHits map[common.Hash]int // Emhemeral cache to track invalid blocks and their hit count
90+
invalidTipsets map[common.Hash]*types.Header // Ephemeral cache to track invalid tipsets and their bad ancestor
91+
invalidLock sync.Mutex // Protects the invalid maps from concurrent access
92+
93+
forkChoiceLock sync.Mutex // Lock for the forkChoiceUpdated method
5994
}
6095

6196
// NewConsensusAPI creates a new consensus api for the given backend.
@@ -64,11 +99,16 @@ func NewConsensusAPI(eth *eth.Ethereum) *ConsensusAPI {
6499
if eth.BlockChain().Config().TerminalTotalDifficulty == nil {
65100
log.Warn("Engine API started but chain not configured for merge yet")
66101
}
67-
return &ConsensusAPI{
68-
eth: eth,
69-
remoteBlocks: newHeaderQueue(),
70-
localBlocks: newPayloadQueue(),
102+
api := &ConsensusAPI{
103+
eth: eth,
104+
remoteBlocks: newHeaderQueue(),
105+
localBlocks: newPayloadQueue(),
106+
invalidBlocksHits: make(map[common.Hash]int),
107+
invalidTipsets: make(map[common.Hash]*types.Header),
71108
}
109+
eth.Downloader().SetBadBlockCallback(api.setInvalidAncestor)
110+
111+
return api
72112
}
73113

74114
// ForkchoiceUpdatedV1 has several responsibilities:
@@ -96,6 +136,10 @@ func (api *ConsensusAPI) ForkchoiceUpdatedV1(update beacon.ForkchoiceStateV1, pa
96136
// reason.
97137
block := api.eth.BlockChain().GetBlockByHash(update.HeadBlockHash)
98138
if block == nil {
139+
// If this block was previously invalidated, keep rejecting it here too
140+
if res := api.checkInvalidAncestor(update.HeadBlockHash, update.HeadBlockHash); res != nil {
141+
return beacon.ForkChoiceResponse{PayloadStatus: *res, PayloadID: nil}, nil
142+
}
99143
// If the head hash is unknown (was not given to us in a newPayload request),
100144
// we cannot resolve the header, so not much to do. This could be extended in
101145
// the future to resolve from the `eth` network, but it's an unexpected case
@@ -266,6 +310,10 @@ func (api *ConsensusAPI) NewPayloadV1(params beacon.ExecutableDataV1) (beacon.Pa
266310
hash := block.Hash()
267311
return beacon.PayloadStatusV1{Status: beacon.VALID, LatestValidHash: &hash}, nil
268312
}
313+
// If this block was rejected previously, keep rejecting it
314+
if res := api.checkInvalidAncestor(block.Hash(), block.Hash()); res != nil {
315+
return *res, nil
316+
}
269317
// If the parent is missing, we - in theory - could trigger a sync, but that
270318
// would also entail a reorg. That is problematic if multiple sibling blocks
271319
// are being fed to us, and even more so, if some semi-distant uncle shortens
@@ -293,7 +341,7 @@ func (api *ConsensusAPI) NewPayloadV1(params beacon.ExecutableDataV1) (beacon.Pa
293341
}
294342
if block.Time() <= parent.Time() {
295343
log.Warn("Invalid timestamp", "parent", block.Time(), "block", block.Time())
296-
return api.invalid(errors.New("invalid timestamp"), parent), nil
344+
return api.invalid(errors.New("invalid timestamp"), parent.Header()), nil
297345
}
298346
// Another cornercase: if the node is in snap sync mode, but the CL client
299347
// tries to make it import a block. That should be denied as pushing something
@@ -310,7 +358,13 @@ func (api *ConsensusAPI) NewPayloadV1(params beacon.ExecutableDataV1) (beacon.Pa
310358
log.Trace("Inserting block without sethead", "hash", block.Hash(), "number", block.Number)
311359
if err := api.eth.BlockChain().InsertBlockWithoutSetHead(block); err != nil {
312360
log.Warn("NewPayloadV1: inserting block failed", "error", err)
313-
return api.invalid(err, parent), nil
361+
362+
api.invalidLock.Lock()
363+
api.invalidBlocksHits[block.Hash()] = 1
364+
api.invalidTipsets[block.Hash()] = block.Header()
365+
api.invalidLock.Unlock()
366+
367+
return api.invalid(err, parent.Header()), nil
314368
}
315369
// We've accepted a valid payload from the beacon client. Mark the local
316370
// chain transitions to notify other subsystems (e.g. downloader) of the
@@ -339,8 +393,13 @@ func computePayloadId(headBlockHash common.Hash, params *beacon.PayloadAttribute
339393
// delayPayloadImport stashes the given block away for import at a later time,
340394
// either via a forkchoice update or a sync extension. This method is meant to
341395
// be called by the newpayload command when the block seems to be ok, but some
342-
// prerequisite prevents it from being processed (e.g. no parent, or nap sync).
396+
// prerequisite prevents it from being processed (e.g. no parent, or snap sync).
343397
func (api *ConsensusAPI) delayPayloadImport(block *types.Block) (beacon.PayloadStatusV1, error) {
398+
// Sanity check that this block's parent is not on a previously invalidated
399+
// chain. If it is, mark the block as invalid too.
400+
if res := api.checkInvalidAncestor(block.ParentHash(), block.Hash()); res != nil {
401+
return *res, nil
402+
}
344403
// Stash the block away for a potential forced forkchoice update to it
345404
// at a later time.
346405
api.remoteBlocks.put(block.Hash(), block.Header())
@@ -360,14 +419,70 @@ func (api *ConsensusAPI) delayPayloadImport(block *types.Block) (beacon.PayloadS
360419
return beacon.PayloadStatusV1{Status: beacon.ACCEPTED}, nil
361420
}
362421

422+
// setInvalidAncestor is a callback for the downloader to notify us if a bad block
423+
// is encountered during the async sync.
424+
func (api *ConsensusAPI) setInvalidAncestor(invalid *types.Header, origin *types.Header) {
425+
api.invalidLock.Lock()
426+
defer api.invalidLock.Unlock()
427+
428+
api.invalidTipsets[origin.Hash()] = invalid
429+
api.invalidBlocksHits[invalid.Hash()]++
430+
}
431+
432+
// checkInvalidAncestor checks whether the specified chain end links to a known
433+
// bad ancestor. If yes, it constructs the payload failure response to return.
434+
func (api *ConsensusAPI) checkInvalidAncestor(check common.Hash, head common.Hash) *beacon.PayloadStatusV1 {
435+
api.invalidLock.Lock()
436+
defer api.invalidLock.Unlock()
437+
438+
// If the hash to check is unknown, return valid
439+
invalid, ok := api.invalidTipsets[check]
440+
if !ok {
441+
return nil
442+
}
443+
// If the bad hash was hit too many times, evict it and try to reprocess in
444+
// the hopes that we have a data race that we can exit out of.
445+
badHash := invalid.Hash()
446+
447+
api.invalidBlocksHits[badHash]++
448+
if api.invalidBlocksHits[badHash] >= invalidBlockHitEviction {
449+
log.Warn("Too many bad block import attempt, trying", "number", invalid.Number, "hash", badHash)
450+
delete(api.invalidBlocksHits, badHash)
451+
452+
for descendant, badHeader := range api.invalidTipsets {
453+
if badHeader.Hash() == badHash {
454+
delete(api.invalidTipsets, descendant)
455+
}
456+
}
457+
return nil
458+
}
459+
// Not too many failures yet, mark the head of the invalid chain as invalid
460+
if check != head {
461+
log.Warn("Marked new chain head as invalid", "hash", head, "badnumber", invalid.Number, "badhash", badHash)
462+
for len(api.invalidTipsets) >= invalidTipsetsCap {
463+
for key := range api.invalidTipsets {
464+
delete(api.invalidTipsets, key)
465+
break
466+
}
467+
}
468+
api.invalidTipsets[head] = invalid
469+
}
470+
failure := "links to previously rejected block"
471+
return &beacon.PayloadStatusV1{
472+
Status: beacon.INVALID,
473+
LatestValidHash: &invalid.ParentHash,
474+
ValidationError: &failure,
475+
}
476+
}
477+
363478
// invalid returns a response "INVALID" with the latest valid hash supplied by latest or to the current head
364479
// if no latestValid block was provided.
365-
func (api *ConsensusAPI) invalid(err error, latestValid *types.Block) beacon.PayloadStatusV1 {
480+
func (api *ConsensusAPI) invalid(err error, latestValid *types.Header) beacon.PayloadStatusV1 {
366481
currentHash := api.eth.BlockChain().CurrentBlock().Hash()
367482
if latestValid != nil {
368483
// Set latest valid hash to 0x0 if parent is PoW block
369484
currentHash = common.Hash{}
370-
if latestValid.Difficulty().BitLen() == 0 {
485+
if latestValid.Difficulty.BitLen() == 0 {
371486
// Otherwise set latest valid hash to parent hash
372487
currentHash = latestValid.Hash()
373488
}

eth/catalyst/api_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -773,16 +773,16 @@ func TestTrickRemoteBlockCache(t *testing.T) {
773773
if err != nil {
774774
panic(err)
775775
}
776-
if status.Status == beacon.INVALID {
777-
panic("success")
776+
if status.Status == beacon.VALID {
777+
t.Error("invalid status: VALID on an invalid chain")
778778
}
779779
// Now reorg to the head of the invalid chain
780780
resp, err := apiB.ForkchoiceUpdatedV1(beacon.ForkchoiceStateV1{HeadBlockHash: payload.BlockHash, SafeBlockHash: payload.BlockHash, FinalizedBlockHash: payload.ParentHash}, nil)
781781
if err != nil {
782782
t.Fatal(err)
783783
}
784784
if resp.PayloadStatus.Status == beacon.VALID {
785-
t.Errorf("invalid status: expected INVALID got: %v", resp.PayloadStatus.Status)
785+
t.Error("invalid status: VALID on an invalid chain")
786786
}
787787
time.Sleep(100 * time.Millisecond)
788788
}

eth/downloader/beaconsync.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,13 @@ func (b *beaconBackfiller) setMode(mode SyncMode) {
137137
b.resume()
138138
}
139139

140+
// SetBadBlockCallback sets the callback to run when a bad block is hit by the
141+
// block processor. This method is not thread safe and should be set only once
142+
// on startup before system events are fired.
143+
func (d *Downloader) SetBadBlockCallback(onBadBlock badBlockFn) {
144+
d.badBlock = onBadBlock
145+
}
146+
140147
// BeaconSync is the post-merge version of the chain synchronization, where the
141148
// chain is not downloaded from genesis onward, rather from trusted head announces
142149
// backwards.

eth/downloader/downloader.go

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ var (
8585
// peerDropFn is a callback type for dropping a peer detected as malicious.
8686
type peerDropFn func(id string)
8787

88+
// badBlockFn is a callback for the async beacon sync to notify the caller that
89+
// the origin header requested to sync to, produced a chain with a bad block.
90+
type badBlockFn func(invalid *types.Header, origin *types.Header)
91+
8892
// headerTask is a set of downloaded headers to queue along with their precomputed
8993
// hashes to avoid constant rehashing.
9094
type headerTask struct {
@@ -113,6 +117,7 @@ type Downloader struct {
113117

114118
// Callbacks
115119
dropPeer peerDropFn // Drops a peer for misbehaving
120+
badBlock badBlockFn // Reports a block as rejected by the chain
116121

117122
// Status
118123
// SYSCOIN
@@ -1538,7 +1543,7 @@ func (d *Downloader) importBlockResults(results []*fetchResult) error {
15381543
return errCancelContentProcessing
15391544
default:
15401545
}
1541-
// Retrieve the a batch of results to import
1546+
// Retrieve a batch of results to import
15421547
first, last := results[0].Header, results[len(results)-1].Header
15431548
log.Debug("Inserting downloaded chain", "items", len(results),
15441549
"firstnum", first.Number, "firsthash", first.Hash(),
@@ -1554,6 +1559,16 @@ func (d *Downloader) importBlockResults(results []*fetchResult) error {
15541559
if index, err := d.blockchain.InsertChain(blocks); err != nil {
15551560
if index < len(results) {
15561561
log.Debug("Downloaded item processing failed", "number", results[index].Header.Number, "hash", results[index].Header.Hash(), "err", err)
1562+
1563+
// In post-merge, notify the engine API of encountered bad chains
1564+
if d.badBlock != nil {
1565+
head, _, err := d.skeleton.Bounds()
1566+
if err != nil {
1567+
log.Error("Failed to retrieve beacon bounds for bad block reporting", "err", err)
1568+
} else {
1569+
d.badBlock(blocks[index].Header(), head)
1570+
}
1571+
}
15571572
} else {
15581573
// The InsertChain method in blockchain.go will sometimes return an out-of-bounds index,
15591574
// when it needs to preprocess blocks to import a sidechain.

0 commit comments

Comments
 (0)