-
Notifications
You must be signed in to change notification settings - Fork 807
Unblock misconfigured subnets #2679
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
c23262b
5c215d6
2904c51
d2ae28a
0174d28
4eb403c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -171,7 +171,7 @@ func (t *Transitive) Gossip(ctx context.Context) error { | |
// nodes with a large amount of stake weight. | ||
vdrID, ok := t.ConnectedValidators.SampleValidator() | ||
if !ok { | ||
t.Ctx.Log.Error("skipping block gossip", | ||
t.Ctx.Log.Warn("skipping block gossip", | ||
zap.String("reason", "no connected validators"), | ||
) | ||
return nil | ||
|
@@ -201,6 +201,11 @@ func (t *Transitive) Gossip(ctx context.Context) error { | |
zap.String("reason", "blocks currently processing"), | ||
zap.Int("numProcessing", numProcessing), | ||
) | ||
|
||
// repoll is called here to unblock the engine if it previously errored | ||
// when attempting to issue a query. This can happen if a subnet was | ||
// temporarily misconfigured and there were no validators. | ||
t.repoll(ctx) | ||
} | ||
|
||
// TODO: Remove periodic push gossip after v1.11.x is activated | ||
|
@@ -932,7 +937,7 @@ func (t *Transitive) sendQuery( | |
|
||
vdrIDs, err := t.Validators.Sample(t.Ctx.SubnetID, t.Params.K) | ||
if err != nil { | ||
t.Ctx.Log.Error("dropped query for block", | ||
t.Ctx.Log.Warn("dropped query for block", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are no longer |
||
zap.String("reason", "insufficient number of validators"), | ||
zap.Stringer("blkID", blkID), | ||
zap.Int("size", t.Params.K), | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2921,3 +2921,108 @@ func TestEngineApplyAcceptedFrontierInQueryFailed(t *testing.T) { | |
|
||
require.Equal(choices.Accepted, blk.Status()) | ||
} | ||
|
||
func TestEngineRepollsMisconfiguredSubnet(t *testing.T) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (No action required) I was able to grok this test, but have some suggestions for increasing maintainability:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added comments. I'm hoping to do a large refactor of both this code and the tests... When we do that hopefully all of this will be able to be cleaned up. |
||
require := require.New(t) | ||
|
||
engCfg := DefaultConfig(t) | ||
engCfg.Params = snowball.Parameters{ | ||
K: 1, | ||
AlphaPreference: 1, | ||
AlphaConfidence: 1, | ||
BetaVirtuous: 1, | ||
BetaRogue: 1, | ||
ConcurrentRepolls: 1, | ||
OptimalProcessing: 1, | ||
MaxOutstandingItems: 1, | ||
MaxItemProcessingTime: 1, | ||
} | ||
|
||
vals := validators.NewManager() | ||
engCfg.Validators = vals | ||
|
||
sender := &common.SenderTest{T: t} | ||
engCfg.Sender = sender | ||
|
||
sender.Default(true) | ||
|
||
vm := &block.TestVM{} | ||
vm.T = t | ||
engCfg.VM = vm | ||
|
||
vm.Default(true) | ||
vm.CantSetState = false | ||
vm.CantSetPreference = false | ||
|
||
gBlk := &snowman.TestBlock{TestDecidable: choices.TestDecidable{ | ||
IDV: ids.GenerateTestID(), | ||
StatusV: choices.Accepted, | ||
}} | ||
|
||
vm.LastAcceptedF = func(context.Context) (ids.ID, error) { | ||
return gBlk.ID(), nil | ||
} | ||
vm.GetBlockF = func(_ context.Context, id ids.ID) (snowman.Block, error) { | ||
require.Equal(gBlk.ID(), id) | ||
return gBlk, nil | ||
} | ||
|
||
te, err := newTransitive(engCfg) | ||
require.NoError(err) | ||
require.NoError(te.Start(context.Background(), 0)) | ||
|
||
vm.LastAcceptedF = nil | ||
|
||
blk := &snowman.TestBlock{ | ||
TestDecidable: choices.TestDecidable{ | ||
IDV: ids.GenerateTestID(), | ||
StatusV: choices.Processing, | ||
}, | ||
ParentV: gBlk.IDV, | ||
HeightV: 1, | ||
BytesV: []byte{1}, | ||
} | ||
|
||
require.NoError(te.issue( | ||
context.Background(), | ||
te.Ctx.NodeID, | ||
blk, | ||
true, | ||
te.metrics.issued.WithLabelValues(unknownSource), | ||
)) | ||
|
||
require.Equal(choices.Processing, blk.Status()) | ||
|
||
vdr := ids.GenerateTestNodeID() | ||
require.NoError(vals.AddStaker(engCfg.Ctx.SubnetID, vdr, nil, ids.Empty, 1)) | ||
|
||
var ( | ||
queryRequestID uint32 | ||
queried bool | ||
) | ||
sender.SendPullQueryF = func(_ context.Context, inVdrs set.Set[ids.NodeID], requestID uint32, blkID ids.ID, requestedHeight uint64) { | ||
queryRequestID = requestID | ||
require.Contains(inVdrs, vdr) | ||
require.Equal(blk.ID(), blkID) | ||
require.Equal(uint64(1), requestedHeight) | ||
queried = true | ||
} | ||
|
||
require.NoError(te.Gossip(context.Background())) | ||
require.True(queried) | ||
|
||
vm.GetBlockF = func(_ context.Context, id ids.ID) (snowman.Block, error) { | ||
switch id { | ||
case gBlk.ID(): | ||
return gBlk, nil | ||
case blk.ID(): | ||
return blk, nil | ||
} | ||
require.FailNow(errUnknownBlock.Error()) | ||
return nil, errUnknownBlock | ||
} | ||
|
||
require.NoError(te.Chits(context.Background(), vdr, queryRequestID, blk.ID(), blk.ID(), blk.ID())) | ||
|
||
require.Equal(choices.Accepted, blk.Status()) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are no longer
Error
s as the node can recover.