Skip to content

Commit 4768ed4

Browse files
Remove bootstrapping retry config (#2301)
1 parent 392b313 commit 4768ed4

File tree

13 files changed

+139
-166
lines changed

13 files changed

+139
-166
lines changed

chains/manager.go

+26-32
Original file line numberDiff line numberDiff line change
@@ -174,34 +174,32 @@ type ManagerConfig struct {
174174
StakingBLSKey *bls.SecretKey
175175
TracingEnabled bool
176176
// Must not be used unless [TracingEnabled] is true as this may be nil.
177-
Tracer trace.Tracer
178-
Log logging.Logger
179-
LogFactory logging.Factory
180-
VMManager vms.Manager // Manage mappings from vm ID --> vm
181-
BlockAcceptorGroup snow.AcceptorGroup
182-
TxAcceptorGroup snow.AcceptorGroup
183-
VertexAcceptorGroup snow.AcceptorGroup
184-
DB database.Database
185-
MsgCreator message.OutboundMsgBuilder // message creator, shared with network
186-
Router router.Router // Routes incoming messages to the appropriate chain
187-
Net network.Network // Sends consensus messages to other validators
188-
Validators validators.Manager // Validators validating on this chain
189-
NodeID ids.NodeID // The ID of this node
190-
NetworkID uint32 // ID of the network this node is connected to
191-
PartialSyncPrimaryNetwork bool
192-
Server server.Server // Handles HTTP API calls
193-
Keystore keystore.Keystore
194-
AtomicMemory *atomic.Memory
195-
AVAXAssetID ids.ID
196-
XChainID ids.ID // ID of the X-Chain,
197-
CChainID ids.ID // ID of the C-Chain,
198-
CriticalChains set.Set[ids.ID] // Chains that can't exit gracefully
199-
TimeoutManager timeout.Manager // Manages request timeouts when sending messages to other validators
200-
Health health.Registerer
201-
RetryBootstrap bool // Should Bootstrap be retried
202-
RetryBootstrapWarnFrequency int // Max number of times to retry bootstrap before warning the node operator
203-
SubnetConfigs map[ids.ID]subnets.Config // ID -> SubnetConfig
204-
ChainConfigs map[string]ChainConfig // alias -> ChainConfig
177+
Tracer trace.Tracer
178+
Log logging.Logger
179+
LogFactory logging.Factory
180+
VMManager vms.Manager // Manage mappings from vm ID --> vm
181+
BlockAcceptorGroup snow.AcceptorGroup
182+
TxAcceptorGroup snow.AcceptorGroup
183+
VertexAcceptorGroup snow.AcceptorGroup
184+
DB database.Database
185+
MsgCreator message.OutboundMsgBuilder // message creator, shared with network
186+
Router router.Router // Routes incoming messages to the appropriate chain
187+
Net network.Network // Sends consensus messages to other validators
188+
Validators validators.Manager // Validators validating on this chain
189+
NodeID ids.NodeID // The ID of this node
190+
NetworkID uint32 // ID of the network this node is connected to
191+
PartialSyncPrimaryNetwork bool
192+
Server server.Server // Handles HTTP API calls
193+
Keystore keystore.Keystore
194+
AtomicMemory *atomic.Memory
195+
AVAXAssetID ids.ID
196+
XChainID ids.ID // ID of the X-Chain,
197+
CChainID ids.ID // ID of the C-Chain,
198+
CriticalChains set.Set[ids.ID] // Chains that can't exit gracefully
199+
TimeoutManager timeout.Manager // Manages request timeouts when sending messages to other validators
200+
Health health.Registerer
201+
SubnetConfigs map[ids.ID]subnets.Config // ID -> SubnetConfig
202+
ChainConfigs map[string]ChainConfig // alias -> ChainConfig
205203
// ShutdownNodeFunc allows the chain manager to issue a request to shutdown the node
206204
ShutdownNodeFunc func(exitCode int)
207205
MeterVMEnabled bool // Should each VM be wrapped with a MeterVM
@@ -889,8 +887,6 @@ func (m *manager) createAvalancheChain(
889887
Sender: snowmanMessageSender,
890888
BootstrapTracker: sb,
891889
Timer: h,
892-
RetryBootstrap: m.RetryBootstrap,
893-
RetryBootstrapWarnFrequency: m.RetryBootstrapWarnFrequency,
894890
AncestorsMaxContainersReceived: m.BootstrapAncestorsMaxContainersReceived,
895891
SharedCfg: &common.SharedConfig{},
896892
},
@@ -1235,8 +1231,6 @@ func (m *manager) createSnowmanChain(
12351231
Sender: messageSender,
12361232
BootstrapTracker: sb,
12371233
Timer: h,
1238-
RetryBootstrap: m.RetryBootstrap,
1239-
RetryBootstrapWarnFrequency: m.RetryBootstrapWarnFrequency,
12401234
AncestorsMaxContainersReceived: m.BootstrapAncestorsMaxContainersReceived,
12411235
SharedCfg: &common.SharedConfig{},
12421236
}

config/config.go

-2
Original file line numberDiff line numberDiff line change
@@ -521,8 +521,6 @@ func getStateSyncConfig(v *viper.Viper) (node.StateSyncConfig, error) {
521521

522522
func getBootstrapConfig(v *viper.Viper, networkID uint32) (node.BootstrapConfig, error) {
523523
config := node.BootstrapConfig{
524-
RetryBootstrap: v.GetBool(RetryBootstrapKey),
525-
RetryBootstrapWarnFrequency: v.GetInt(RetryBootstrapWarnFrequencyKey),
526524
BootstrapBeaconConnectionTimeout: v.GetDuration(BootstrapBeaconConnectionTimeoutKey),
527525
BootstrapMaxTimeGetAncestors: v.GetDuration(BootstrapMaxTimeGetAncestorsKey),
528526
BootstrapAncestorsMaxContainersSent: int(v.GetUint(BootstrapAncestorsMaxContainersSentKey)),

config/flags.go

-2
Original file line numberDiff line numberDiff line change
@@ -292,8 +292,6 @@ func addNodeFlags(fs *pflag.FlagSet) {
292292
// TODO: combine "BootstrapIPsKey" and "BootstrapIDsKey" into one flag
293293
fs.String(BootstrapIPsKey, "", "Comma separated list of bootstrap peer ips to connect to. Example: 127.0.0.1:9630,127.0.0.1:9631")
294294
fs.String(BootstrapIDsKey, "", "Comma separated list of bootstrap peer ids to connect to. Example: NodeID-JR4dVmy6ffUGAKCBDkyCbeZbyHQBeDsET,NodeID-8CrVPQZ4VSqgL8zTdvL14G8HqAfrBr4z")
295-
fs.Bool(RetryBootstrapKey, true, "Specifies whether bootstrap should be retried")
296-
fs.Int(RetryBootstrapWarnFrequencyKey, 50, "Specifies how many times bootstrap should be retried before warning the operator")
297295
fs.Duration(BootstrapBeaconConnectionTimeoutKey, time.Minute, "Timeout before emitting a warn log when connecting to bootstrapping beacons")
298296
fs.Duration(BootstrapMaxTimeGetAncestorsKey, 50*time.Millisecond, "Max Time to spend fetching a container and its ancestors when responding to a GetAncestors")
299297
fs.Uint(BootstrapAncestorsMaxContainersSentKey, 2000, "Max number of containers in an Ancestors message sent by this node")

config/keys.go

-2
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,6 @@ const (
163163
RouterHealthMaxOutstandingRequestsKey = "router-health-max-outstanding-requests"
164164
HealthCheckFreqKey = "health-check-frequency"
165165
HealthCheckAveragerHalflifeKey = "health-check-averager-halflife"
166-
RetryBootstrapKey = "bootstrap-retry-enabled"
167-
RetryBootstrapWarnFrequencyKey = "bootstrap-retry-warn-frequency"
168166
PluginDirKey = "plugin-dir"
169167
BootstrapBeaconConnectionTimeoutKey = "bootstrap-beacon-connection-timeout"
170168
BootstrapMaxTimeGetAncestorsKey = "bootstrap-max-time-get-ancestors"

node/config.go

-6
Original file line numberDiff line numberDiff line change
@@ -107,12 +107,6 @@ type StateSyncConfig struct {
107107
}
108108

109109
type BootstrapConfig struct {
110-
// Should Bootstrap be retried
111-
RetryBootstrap bool `json:"retryBootstrap"`
112-
113-
// Max number of times to retry bootstrap before warning the node operator
114-
RetryBootstrapWarnFrequency int `json:"retryBootstrapWarnFrequency"`
115-
116110
// Timeout before emitting a warn log when connecting to bootstrapping beacons
117111
BootstrapBeaconConnectionTimeout time.Duration `json:"bootstrapBeaconConnectionTimeout"`
118112

node/node.go

-2
Original file line numberDiff line numberDiff line change
@@ -1009,8 +1009,6 @@ func (n *Node) initChainManager(avaxAssetID ids.ID) error {
10091009
CriticalChains: criticalChains,
10101010
TimeoutManager: n.timeoutManager,
10111011
Health: n.health,
1012-
RetryBootstrap: n.Config.RetryBootstrap,
1013-
RetryBootstrapWarnFrequency: n.Config.RetryBootstrapWarnFrequency,
10141012
ShutdownNodeFunc: n.Shutdown,
10151013
MeterVMEnabled: n.Config.MeterVMEnabled,
10161014
Metrics: n.MetricsGatherer,

snow/engine/common/bootstrapper.go

+5-23
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ type Bootstrapper interface {
3131
AcceptedHandler
3232
Haltable
3333
Startup(context.Context) error
34-
Restart(ctx context.Context, reset bool) error
34+
Restart(ctx context.Context) error
3535
}
3636

3737
// It collects mechanisms common to both snowman and avalanche bootstrappers
@@ -41,9 +41,6 @@ type bootstrapper struct {
4141

4242
minority smbootstrapper.Poll
4343
majority smbootstrapper.Poll
44-
45-
// number of times the bootstrap has been attempted
46-
bootstrapAttempts int
4744
}
4845

4946
func NewCommonBootstrapper(config Config) Bootstrapper {
@@ -146,7 +143,6 @@ func (b *bootstrapper) Startup(ctx context.Context) error {
146143
MaxOutstandingBroadcastRequests,
147144
)
148145

149-
b.bootstrapAttempts++
150146
if accepted, finalized := b.majority.Result(ctx); finalized {
151147
b.Ctx.Log.Info("bootstrapping skipped",
152148
zap.String("reason", "no provided bootstraps"),
@@ -158,22 +154,9 @@ func (b *bootstrapper) Startup(ctx context.Context) error {
158154
return b.sendMessagesOrFinish(ctx)
159155
}
160156

161-
func (b *bootstrapper) Restart(ctx context.Context, reset bool) error {
162-
// resets the attempts when we're pulling blocks/vertices we don't want to
163-
// fail the bootstrap at that stage
164-
if reset {
165-
b.Ctx.Log.Debug("Checking for new frontiers")
166-
167-
b.Config.SharedCfg.Restarted = true
168-
b.bootstrapAttempts = 0
169-
}
170-
171-
if b.bootstrapAttempts > 0 && b.bootstrapAttempts%b.RetryBootstrapWarnFrequency == 0 {
172-
b.Ctx.Log.Debug("check internet connection",
173-
zap.Int("numBootstrapAttempts", b.bootstrapAttempts),
174-
)
175-
}
176-
157+
func (b *bootstrapper) Restart(ctx context.Context) error {
158+
b.Ctx.Log.Debug("Checking for new frontiers")
159+
b.Config.SharedCfg.Restarted = true
177160
return b.Startup(ctx)
178161
}
179162

@@ -207,9 +190,8 @@ func (b *bootstrapper) sendMessagesOrFinish(ctx context.Context) error {
207190
b.Ctx.Log.Debug("restarting bootstrap",
208191
zap.String("reason", "no blocks accepted"),
209192
zap.Int("numBeacons", b.Beacons.Count(b.Ctx.SubnetID)),
210-
zap.Int("numBootstrapAttempts", b.bootstrapAttempts),
211193
)
212-
return b.Restart(ctx, false /*=reset*/)
194+
return b.Startup(ctx)
213195
}
214196

215197
if !b.Config.SharedCfg.Restarted {

snow/engine/common/config.go

-6
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,6 @@ type Config struct {
2323
BootstrapTracker BootstrapTracker
2424
Timer Timer
2525

26-
// Should Bootstrap be retried
27-
RetryBootstrap bool
28-
29-
// Max number of times to retry bootstrap before warning the node operator
30-
RetryBootstrapWarnFrequency int
31-
3226
// This node will only consider the first [AncestorsMaxContainersReceived]
3327
// containers in an ancestors message it receives.
3428
AncestorsMaxContainersReceived int

snow/engine/snowman/bootstrap/bootstrapper.go

+17-3
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,20 @@ var (
3434
errUnexpectedTimeout = errors.New("unexpected timeout fired")
3535
)
3636

37+
// bootstrapper repeatedly performs the bootstrapping protocol.
38+
//
39+
// 1. Wait until a sufficient amount of stake is connected.
40+
// 2. Sample a small number of nodes to get the last accepted block ID
41+
// 3. Verify against the full network that the last accepted block ID received
42+
// in step 2 is an accepted block.
43+
// 4. Sync the full ancestry of the last accepted block.
44+
// 5. Execute all the fetched blocks that haven't already been executed.
45+
// 6. Restart the bootstrapping protocol until the number of blocks being
46+
// accepted during a bootstrapping round stops decreasing.
47+
//
48+
// Note: Because of step 6, the bootstrapping protocol will generally be
49+
// performed multiple times.
50+
//
3751
// Invariant: The VM is not guaranteed to be initialized until Start has been
3852
// called, so it must be guaranteed the VM is not used until after Start.
3953
type bootstrapper struct {
@@ -288,7 +302,7 @@ func (b *bootstrapper) Timeout(ctx context.Context) error {
288302
b.awaitingTimeout = false
289303

290304
if !b.Config.BootstrapTracker.IsBootstrapped() {
291-
return b.Restart(ctx, true)
305+
return b.Restart(ctx)
292306
}
293307
b.fetchETA.Set(0)
294308
return b.OnFinished(ctx, b.Config.SharedCfg.RequestID)
@@ -592,8 +606,8 @@ func (b *bootstrapper) checkFinish(ctx context.Context) error {
592606
// Note that executedBlocks < c*previouslyExecuted ( 0 <= c < 1 ) is enforced
593607
// so that the bootstrapping process will terminate even as new blocks are
594608
// being issued.
595-
if b.Config.RetryBootstrap && executedBlocks > 0 && executedBlocks < previouslyExecuted/2 {
596-
return b.Restart(ctx, true)
609+
if executedBlocks > 0 && executedBlocks < previouslyExecuted/2 {
610+
return b.Restart(ctx)
597611
}
598612

599613
// If there is an additional callback, notify them that this chain has been

snow/engine/snowman/bootstrap/bootstrapper_test.go

+28-20
Original file line numberDiff line numberDiff line change
@@ -356,8 +356,6 @@ func TestBootstrapperUnknownByzantineResponse(t *testing.T) {
356356

357357
require.NoError(bs.Start(context.Background(), 0))
358358

359-
acceptedIDs := []ids.ID{blkID2}
360-
361359
parsedBlk1 := false
362360
vm.GetBlockF = func(_ context.Context, blkID ids.ID) (snowman.Block, error) {
363361
switch blkID {
@@ -390,31 +388,29 @@ func TestBootstrapperUnknownByzantineResponse(t *testing.T) {
390388
return nil, errUnknownBlock
391389
}
392390

393-
requestID := new(uint32)
391+
var requestID uint32
394392
sender.SendGetAncestorsF = func(_ context.Context, vdr ids.NodeID, reqID uint32, blkID ids.ID) {
395393
require.Equal(peerID, vdr)
396394
require.Equal(blkID1, blkID)
397-
*requestID = reqID
395+
requestID = reqID
398396
}
399397

400398
vm.CantSetState = false
401-
require.NoError(bs.ForceAccepted(context.Background(), acceptedIDs)) // should request blk1
402-
403-
oldReqID := *requestID
404-
require.NoError(bs.Ancestors(context.Background(), peerID, *requestID+1, [][]byte{blkBytes1})) // respond with wrong request ID
405-
require.Equal(oldReqID, *requestID)
399+
require.NoError(bs.ForceAccepted(context.Background(), []ids.ID{blkID2})) // should request blk1
406400

407-
require.NoError(bs.Ancestors(context.Background(), ids.BuildTestNodeID([]byte{1, 2, 3}), *requestID, [][]byte{blkBytes1})) // respond from wrong peer
408-
require.Equal(oldReqID, *requestID)
401+
oldReqID := requestID
402+
require.NoError(bs.Ancestors(context.Background(), peerID, requestID, [][]byte{blkBytes0})) // respond with wrong block
403+
require.NotEqual(oldReqID, requestID)
409404

410-
require.NoError(bs.Ancestors(context.Background(), peerID, *requestID, [][]byte{blkBytes0})) // respond with wrong block
411-
require.NotEqual(oldReqID, *requestID)
405+
require.NoError(bs.Ancestors(context.Background(), peerID, requestID, [][]byte{blkBytes1}))
412406

413-
require.NoError(bs.Ancestors(context.Background(), peerID, *requestID, [][]byte{blkBytes1}))
414-
require.Equal(snow.NormalOp, config.Ctx.State.Get().State)
407+
require.Equal(snow.Bootstrapping, config.Ctx.State.Get().State)
415408
require.Equal(choices.Accepted, blk0.Status())
416409
require.Equal(choices.Accepted, blk1.Status())
417410
require.Equal(choices.Accepted, blk2.Status())
411+
412+
require.NoError(bs.ForceAccepted(context.Background(), []ids.ID{blkID2}))
413+
require.Equal(snow.NormalOp, config.Ctx.State.Get().State)
418414
}
419415

420416
// There are multiple needed blocks and Ancestors returns one at a time
@@ -554,10 +550,13 @@ func TestBootstrapperPartialFetch(t *testing.T) {
554550
require.NoError(bs.Ancestors(context.Background(), peerID, *requestID, [][]byte{blkBytes1})) // respond with blk1
555551
require.Equal(blkID1, requested)
556552

557-
require.Equal(snow.NormalOp, config.Ctx.State.Get().State)
553+
require.Equal(snow.Bootstrapping, config.Ctx.State.Get().State)
558554
require.Equal(choices.Accepted, blk0.Status())
559555
require.Equal(choices.Accepted, blk1.Status())
560556
require.Equal(choices.Accepted, blk2.Status())
557+
558+
require.NoError(bs.ForceAccepted(context.Background(), acceptedIDs))
559+
require.Equal(snow.NormalOp, config.Ctx.State.Get().State)
561560
}
562561

563562
// There are multiple needed blocks and some validators do not have all the blocks
@@ -714,7 +713,7 @@ func TestBootstrapperEmptyResponse(t *testing.T) {
714713

715714
require.NoError(bs.Ancestors(context.Background(), requestedVdr, requestID, [][]byte{blkBytes1})) // respond with blk1
716715

717-
require.Equal(snow.NormalOp, config.Ctx.State.Get().State)
716+
require.Equal(snow.Bootstrapping, config.Ctx.State.Get().State)
718717
require.Equal(choices.Accepted, blk0.Status())
719718
require.Equal(choices.Accepted, blk1.Status())
720719
require.Equal(choices.Accepted, blk2.Status())
@@ -856,10 +855,13 @@ func TestBootstrapperAncestors(t *testing.T) {
856855
require.NoError(bs.Ancestors(context.Background(), peerID, *requestID, [][]byte{blkBytes2, blkBytes1})) // respond with blk2 and blk1
857856
require.Equal(blkID2, requested)
858857

859-
require.Equal(snow.NormalOp, config.Ctx.State.Get().State)
858+
require.Equal(snow.Bootstrapping, config.Ctx.State.Get().State)
860859
require.Equal(choices.Accepted, blk0.Status())
861860
require.Equal(choices.Accepted, blk1.Status())
862861
require.Equal(choices.Accepted, blk2.Status())
862+
863+
require.NoError(bs.ForceAccepted(context.Background(), acceptedIDs))
864+
require.Equal(snow.NormalOp, config.Ctx.State.Get().State)
863865
}
864866

865867
func TestBootstrapperFinalized(t *testing.T) {
@@ -976,10 +978,13 @@ func TestBootstrapperFinalized(t *testing.T) {
976978

977979
require.NoError(bs.Ancestors(context.Background(), peerID, reqIDBlk2, [][]byte{blkBytes2, blkBytes1}))
978980

979-
require.Equal(snow.NormalOp, config.Ctx.State.Get().State)
981+
require.Equal(snow.Bootstrapping, config.Ctx.State.Get().State)
980982
require.Equal(choices.Accepted, blk0.Status())
981983
require.Equal(choices.Accepted, blk1.Status())
982984
require.Equal(choices.Accepted, blk2.Status())
985+
986+
require.NoError(bs.ForceAccepted(context.Background(), []ids.ID{blkID2}))
987+
require.Equal(snow.NormalOp, config.Ctx.State.Get().State)
983988
}
984989

985990
func TestRestartBootstrapping(t *testing.T) {
@@ -1156,12 +1161,15 @@ func TestRestartBootstrapping(t *testing.T) {
11561161

11571162
require.NoError(bs.Ancestors(context.Background(), peerID, blk4RequestID, [][]byte{blkBytes4}))
11581163

1159-
require.Equal(snow.NormalOp, config.Ctx.State.Get().State)
1164+
require.Equal(snow.Bootstrapping, config.Ctx.State.Get().State)
11601165
require.Equal(choices.Accepted, blk0.Status())
11611166
require.Equal(choices.Accepted, blk1.Status())
11621167
require.Equal(choices.Accepted, blk2.Status())
11631168
require.Equal(choices.Accepted, blk3.Status())
11641169
require.Equal(choices.Accepted, blk4.Status())
1170+
1171+
require.NoError(bs.ForceAccepted(context.Background(), []ids.ID{blkID4}))
1172+
require.Equal(snow.NormalOp, config.Ctx.State.Get().State)
11651173
}
11661174

11671175
func TestBootstrapOldBlockAfterStateSync(t *testing.T) {

0 commit comments

Comments
 (0)