@@ -19,6 +19,7 @@ package downloader
19
19
import (
20
20
"encoding/json"
21
21
"errors"
22
+ "fmt"
22
23
"math/rand"
23
24
"sort"
24
25
"time"
@@ -148,11 +149,15 @@ type backfiller interface {
148
149
// based on the skeleton chain as it might be invalid. The backfiller should
149
150
// gracefully handle multiple consecutive suspends without a resume, even
150
151
// on initial sartup.
151
- suspend ()
152
+ //
153
+ // The method should return the last block header that has been successfully
154
+ // backfilled, or nil if the backfiller was not resumed.
155
+ suspend () * types.Header
152
156
153
157
// resume requests the backfiller to start running fill or snap sync based on
154
158
// the skeleton chain as it has successfully been linked. Appending new heads
155
159
// to the end of the chain will not result in suspend/resume cycles.
160
+ // leaking too much sync logic out to the filler.
156
161
resume ()
157
162
}
158
163
@@ -358,8 +363,17 @@ func (s *skeleton) sync(head *types.Header) (*types.Header, error) {
358
363
if linked {
359
364
s .filler .resume ()
360
365
}
361
- defer s .filler .suspend ()
362
-
366
+ defer func () {
367
+ if filled := s .filler .suspend (); filled != nil {
368
+ // If something was filled, try to delete stale sync helpers. If
369
+ // unsuccessful, warn the user, but not much else we can do (it's
370
+ // a programming error, just let users report an issue and don't
371
+ // choke in the meantime).
372
+ if err := s .cleanStales (filled ); err != nil {
373
+ log .Error ("Failed to clean stale beacon headers" , "err" , err )
374
+ }
375
+ }
376
+ }()
363
377
// Create a set of unique channels for this sync cycle. We need these to be
364
378
// ephemeral so a data race doesn't accidentally deliver something stale on
365
379
// a persistent channel across syncs (yup, this happened)
@@ -582,8 +596,16 @@ func (s *skeleton) processNewHead(head *types.Header, force bool) bool {
582
596
583
597
lastchain := s .progress .Subchains [0 ]
584
598
if lastchain .Tail >= number {
599
+ // If the chain is down to a single beacon header, and it is re-announced
600
+ // once more, ignore it instead of tearing down sync for a noop.
601
+ if lastchain .Head == lastchain .Tail {
602
+ if current := rawdb .ReadSkeletonHeader (s .db , number ); current .Hash () == head .Hash () {
603
+ return false
604
+ }
605
+ }
606
+ // Not a noop / double head announce, abort with a reorg
585
607
if force {
586
- log .Warn ("Beacon chain reorged" , "tail" , lastchain .Tail , "newHead" , number )
608
+ log .Warn ("Beacon chain reorged" , "tail" , lastchain .Tail , "head" , lastchain . Head , " newHead" , number )
587
609
}
588
610
return true
589
611
}
@@ -943,12 +965,44 @@ func (s *skeleton) processResponse(res *headerResponse) (linked bool, merged boo
943
965
// If the beacon chain was linked to the local chain, completely swap out
944
966
// all internal progress and abort header synchronization.
945
967
if linked {
946
- // Note, linking into the local chain should also mean that there are
947
- // no leftover subchains, but just in case there's some junk due to
948
- // strange conditions or bugs, clean up all internal state.
949
- if len (s .progress .Subchains ) > 1 {
950
- log .Error ("Cleaning up leftovers after beacon link" )
951
- s .progress .Subchains = s .progress .Subchains [:1 ]
968
+ // Linking into the local chain should also mean that there are no
969
+ // leftover subchains, but in the case of importing the blocks via
970
+ // the engine API, we will not push the subchains forward. This will
971
+ // lead to a gap between an old sync cycle and a future one.
972
+ if subchains := len (s .progress .Subchains ); subchains > 1 {
973
+ switch {
974
+ // If there are only 2 subchains - the current one and an older
975
+ // one - and the old one consists of a single block, then it's
976
+ // the expected new sync cycle after some propagated blocks. Log
977
+ // it for debugging purposes, explicitly clean and don't escalate.
978
+ case subchains == 2 && s .progress .Subchains [1 ].Head == s .progress .Subchains [1 ].Tail :
979
+ log .Debug ("Cleaning previous beacon sync state" , "head" , s .progress .Subchains [1 ].Head )
980
+ rawdb .DeleteSkeletonHeader (batch , s .progress .Subchains [1 ].Head )
981
+ s .progress .Subchains = s .progress .Subchains [:1 ]
982
+
983
+ // If we have more than one header or more than one leftover chain,
984
+ // the syncer's internal state is corrupted. Do try to fix it, but
985
+ // be very vocal about the fault.
986
+ default :
987
+ var context []interface {}
988
+
989
+ for i := range s .progress .Subchains [1 :] {
990
+ context = append (context , fmt .Sprintf ("stale_head_%d" , i + 1 ))
991
+ context = append (context , s .progress .Subchains [i + 1 ].Head )
992
+ context = append (context , fmt .Sprintf ("stale_tail_%d" , i + 1 ))
993
+ context = append (context , s .progress .Subchains [i + 1 ].Tail )
994
+ context = append (context , fmt .Sprintf ("stale_next_%d" , i + 1 ))
995
+ context = append (context , s .progress .Subchains [i + 1 ].Next )
996
+ }
997
+ log .Error ("Cleaning spurious beacon sync leftovers" , context ... )
998
+ s .progress .Subchains = s .progress .Subchains [:1 ]
999
+
1000
+ // Note, here we didn't actually delete the headers at all,
1001
+ // just the metadata. We could implement a cleanup mechanism,
1002
+ // but further modifying corrupted state is kind of asking
1003
+ // for it. Unless there's a good enough reason to risk it,
1004
+ // better to live with the small database junk.
1005
+ }
952
1006
}
953
1007
break
954
1008
}
@@ -1023,6 +1077,74 @@ func (s *skeleton) processResponse(res *headerResponse) (linked bool, merged boo
1023
1077
return linked , merged
1024
1078
}
1025
1079
1080
+ // cleanStales removes previously synced beacon headers that have become stale
1081
+ // due to the downloader backfilling past the tracked tail.
1082
+ func (s * skeleton ) cleanStales (filled * types.Header ) error {
1083
+ number := filled .Number .Uint64 ()
1084
+ log .Trace ("Cleaning stale beacon headers" , "filled" , number , "hash" , filled .Hash ())
1085
+
1086
+ // If the filled header is below the linked subchain, something's
1087
+ // corrupted internally. Report and error and refuse to do anything.
1088
+ if number < s .progress .Subchains [0 ].Tail {
1089
+ return fmt .Errorf ("filled header below beacon header tail: %d < %d" , number , s .progress .Subchains [0 ].Tail )
1090
+ }
1091
+ // Subchain seems trimmable, push the tail forward up to the last
1092
+ // filled header and delete everything before it - if available. In
1093
+ // case we filled past the head, recreate the subchain with a new
1094
+ // head to keep it consistent with the data on disk.
1095
+ var (
1096
+ start = s .progress .Subchains [0 ].Tail // start deleting from the first known header
1097
+ end = number // delete until the requested threshold
1098
+ )
1099
+ s .progress .Subchains [0 ].Tail = number
1100
+ s .progress .Subchains [0 ].Next = filled .ParentHash
1101
+
1102
+ if s .progress .Subchains [0 ].Head < number {
1103
+ // If more headers were filled than available, push the entire
1104
+ // subchain forward to keep tracking the node's block imports
1105
+ end = s .progress .Subchains [0 ].Head + 1 // delete the entire original range, including the head
1106
+ s .progress .Subchains [0 ].Head = number // assign a new head (tail is already assigned to this)
1107
+ }
1108
+ // Execute the trimming and the potential rewiring of the progress
1109
+ batch := s .db .NewBatch ()
1110
+
1111
+ if end != number {
1112
+ // The entire original skeleton chain was deleted and a new one
1113
+ // defined. Make sure the new single-header chain gets pushed to
1114
+ // disk to keep internal state consistent.
1115
+ rawdb .WriteSkeletonHeader (batch , filled )
1116
+ }
1117
+ s .saveSyncStatus (batch )
1118
+ for n := start ; n < end ; n ++ {
1119
+ // If the batch grew too big, flush it and continue with a new batch.
1120
+ // The catch is that the sync metadata needs to reflect the actually
1121
+ // flushed state, so temporarily change the subchain progress and
1122
+ // revert after the flush.
1123
+ if batch .ValueSize () >= ethdb .IdealBatchSize {
1124
+ tmpTail := s .progress .Subchains [0 ].Tail
1125
+ tmpNext := s .progress .Subchains [0 ].Next
1126
+
1127
+ s .progress .Subchains [0 ].Tail = n
1128
+ s .progress .Subchains [0 ].Next = rawdb .ReadSkeletonHeader (s .db , n ).ParentHash
1129
+ s .saveSyncStatus (batch )
1130
+
1131
+ if err := batch .Write (); err != nil {
1132
+ log .Crit ("Failed to write beacon trim data" , "err" , err )
1133
+ }
1134
+ batch .Reset ()
1135
+
1136
+ s .progress .Subchains [0 ].Tail = tmpTail
1137
+ s .progress .Subchains [0 ].Next = tmpNext
1138
+ s .saveSyncStatus (batch )
1139
+ }
1140
+ rawdb .DeleteSkeletonHeader (batch , n )
1141
+ }
1142
+ if err := batch .Write (); err != nil {
1143
+ log .Crit ("Failed to write beacon trim data" , "err" , err )
1144
+ }
1145
+ return nil
1146
+ }
1147
+
1026
1148
// Bounds retrieves the current head and tail tracked by the skeleton syncer.
1027
1149
// This method is used by the backfiller, whose life cycle is controlled by the
1028
1150
// skeleton syncer.
0 commit comments