Skip to content

Commit db9e96a

Browse files
Unblock misconfigured subnets (#2679)
1 parent e2ef48f commit db9e96a

File tree

2 files changed

+121
-2
lines changed

2 files changed

+121
-2
lines changed

snow/engine/snowman/transitive.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ func (t *Transitive) Gossip(ctx context.Context) error {
171171
// nodes with a large amount of stake weight.
172172
vdrID, ok := t.ConnectedValidators.SampleValidator()
173173
if !ok {
174-
t.Ctx.Log.Error("skipping block gossip",
174+
t.Ctx.Log.Warn("skipping block gossip",
175175
zap.String("reason", "no connected validators"),
176176
)
177177
return nil
@@ -201,6 +201,11 @@ func (t *Transitive) Gossip(ctx context.Context) error {
201201
zap.String("reason", "blocks currently processing"),
202202
zap.Int("numProcessing", numProcessing),
203203
)
204+
205+
// repoll is called here to unblock the engine if it previously errored
206+
// when attempting to issue a query. This can happen if a subnet was
207+
// temporarily misconfigured and there were no validators.
208+
t.repoll(ctx)
204209
}
205210

206211
// TODO: Remove periodic push gossip after v1.11.x is activated
@@ -932,7 +937,7 @@ func (t *Transitive) sendQuery(
932937

933938
vdrIDs, err := t.Validators.Sample(t.Ctx.SubnetID, t.Params.K)
934939
if err != nil {
935-
t.Ctx.Log.Error("dropped query for block",
940+
t.Ctx.Log.Warn("dropped query for block",
936941
zap.String("reason", "insufficient number of validators"),
937942
zap.Stringer("blkID", blkID),
938943
zap.Int("size", t.Params.K),

snow/engine/snowman/transitive_test.go

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2921,3 +2921,117 @@ func TestEngineApplyAcceptedFrontierInQueryFailed(t *testing.T) {
29212921

29222922
require.Equal(choices.Accepted, blk.Status())
29232923
}
2924+
2925+
func TestEngineRepollsMisconfiguredSubnet(t *testing.T) {
2926+
require := require.New(t)
2927+
2928+
engCfg := DefaultConfig(t)
2929+
engCfg.Params = snowball.Parameters{
2930+
K: 1,
2931+
AlphaPreference: 1,
2932+
AlphaConfidence: 1,
2933+
BetaVirtuous: 1,
2934+
BetaRogue: 1,
2935+
ConcurrentRepolls: 1,
2936+
OptimalProcessing: 1,
2937+
MaxOutstandingItems: 1,
2938+
MaxItemProcessingTime: 1,
2939+
}
2940+
2941+
// Setup the engine with no validators. When a block is issued, the poll
2942+
// should fail to be created because there is nobody to poll.
2943+
vals := validators.NewManager()
2944+
engCfg.Validators = vals
2945+
2946+
sender := &common.SenderTest{T: t}
2947+
engCfg.Sender = sender
2948+
2949+
sender.Default(true)
2950+
2951+
vm := &block.TestVM{}
2952+
vm.T = t
2953+
engCfg.VM = vm
2954+
2955+
vm.Default(true)
2956+
vm.CantSetState = false
2957+
vm.CantSetPreference = false
2958+
2959+
gBlk := &snowman.TestBlock{TestDecidable: choices.TestDecidable{
2960+
IDV: ids.GenerateTestID(),
2961+
StatusV: choices.Accepted,
2962+
}}
2963+
2964+
vm.LastAcceptedF = func(context.Context) (ids.ID, error) {
2965+
return gBlk.ID(), nil
2966+
}
2967+
vm.GetBlockF = func(_ context.Context, id ids.ID) (snowman.Block, error) {
2968+
require.Equal(gBlk.ID(), id)
2969+
return gBlk, nil
2970+
}
2971+
2972+
te, err := newTransitive(engCfg)
2973+
require.NoError(err)
2974+
require.NoError(te.Start(context.Background(), 0))
2975+
2976+
vm.LastAcceptedF = nil
2977+
2978+
blk := &snowman.TestBlock{
2979+
TestDecidable: choices.TestDecidable{
2980+
IDV: ids.GenerateTestID(),
2981+
StatusV: choices.Processing,
2982+
},
2983+
ParentV: gBlk.IDV,
2984+
HeightV: 1,
2985+
BytesV: []byte{1},
2986+
}
2987+
2988+
// Issue the block. This shouldn't call the sender, because creating the
2989+
// poll should fail.
2990+
require.NoError(te.issue(
2991+
context.Background(),
2992+
te.Ctx.NodeID,
2993+
blk,
2994+
true,
2995+
te.metrics.issued.WithLabelValues(unknownSource),
2996+
))
2997+
2998+
// The block should have successfully been added into consensus.
2999+
require.Equal(1, te.Consensus.NumProcessing())
3000+
3001+
// Fix the subnet configuration by adding a validator.
3002+
vdr := ids.GenerateTestNodeID()
3003+
require.NoError(vals.AddStaker(engCfg.Ctx.SubnetID, vdr, nil, ids.Empty, 1))
3004+
3005+
var (
3006+
queryRequestID uint32
3007+
queried bool
3008+
)
3009+
sender.SendPullQueryF = func(_ context.Context, inVdrs set.Set[ids.NodeID], requestID uint32, blkID ids.ID, requestedHeight uint64) {
3010+
queryRequestID = requestID
3011+
require.Contains(inVdrs, vdr)
3012+
require.Equal(blk.ID(), blkID)
3013+
require.Equal(uint64(1), requestedHeight)
3014+
queried = true
3015+
}
3016+
3017+
// Because there is now a validator that can be queried, gossip should
3018+
// trigger creation of the poll.
3019+
require.NoError(te.Gossip(context.Background()))
3020+
require.True(queried)
3021+
3022+
vm.GetBlockF = func(_ context.Context, id ids.ID) (snowman.Block, error) {
3023+
switch id {
3024+
case gBlk.ID():
3025+
return gBlk, nil
3026+
case blk.ID():
3027+
return blk, nil
3028+
}
3029+
require.FailNow(errUnknownBlock.Error())
3030+
return nil, errUnknownBlock
3031+
}
3032+
3033+
// Voting for the block that was issued during the period when the validator
3034+
// set was misconfigured should result in it being accepted successfully.
3035+
require.NoError(te.Chits(context.Background(), vdr, queryRequestID, blk.ID(), blk.ID(), blk.ID()))
3036+
require.Equal(choices.Accepted, blk.Status())
3037+
}

0 commit comments

Comments
 (0)