Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Dynamic Protocol State] TODOs and refactoring, part 2 #5080

88 changes: 86 additions & 2 deletions model/flow/protocol_state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,52 @@ func TestNewRichProtocolStateEntry(t *testing.T) {
assert.Equal(t, expectedIdentities, richEntry.NextEpochIdentityTable, "should be equal to next epoch setup participants + current epoch setup participants")
})

// TODO: include test for epoch setup phase where no prior epoch exist (i.e. first epoch setup phase after spork)
t.Run("setup-after-spork", func(t *testing.T) {
stateEntry := unittest.ProtocolStateFixture(unittest.WithNextEpochProtocolState(), func(entry *flow.RichProtocolStateEntry) {
// no previous epoch since we are in the first epoch
entry.PreviousEpochSetup = nil
entry.PreviousEpochCommit = nil
entry.PreviousEpoch = nil

// next epoch is setup but not committed
entry.NextEpochCommit = nil
entry.NextEpoch.CommitID = flow.ZeroID
})

Copy link
Member

@AlexHentschel AlexHentschel Dec 7, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the following lines are sanity checks for the previously-constructed stateEntry, correct?

assert.Nil(t, stateEntry.PreviousEpoch)
assert.Nil(t, stateEntry.PreviousEpochSetup)
assert.Nil(t, stateEntry.PreviousEpochCommit)

Would suggest to include a comment:

Suggested change
// sanity check that previous epoch is not populated in `stateEntry`

👉 Implemented in my PR #5116

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, this is santity checks to ensure we are testing the correct thing

assert.Nil(t, stateEntry.PreviousEpoch)
assert.Nil(t, stateEntry.PreviousEpochSetup)
assert.Nil(t, stateEntry.PreviousEpochCommit)

richEntry, err := flow.NewRichProtocolStateEntry(
stateEntry.ProtocolStateEntry,
stateEntry.PreviousEpochSetup,
stateEntry.PreviousEpochCommit,
stateEntry.CurrentEpochSetup,
stateEntry.CurrentEpochCommit,
stateEntry.NextEpochSetup,
nil,
)
assert.NoError(t, err)
expectedIdentities, err := flow.BuildIdentityTable(
stateEntry.CurrentEpochSetup.Participants,
stateEntry.CurrentEpoch.ActiveIdentities,
stateEntry.NextEpochSetup.Participants,
stateEntry.NextEpoch.ActiveIdentities,
flow.EpochParticipationStatusJoining,
)
assert.NoError(t, err)
assert.Equal(t, expectedIdentities, richEntry.CurrentEpochIdentityTable, "should be equal to current epoch setup participants + next epoch setup participants")
assert.Nil(t, richEntry.NextEpochCommit)
expectedIdentities, err = flow.BuildIdentityTable(
stateEntry.NextEpochSetup.Participants,
stateEntry.NextEpoch.ActiveIdentities,
stateEntry.CurrentEpochSetup.Participants,
stateEntry.CurrentEpoch.ActiveIdentities,
flow.EpochParticipationStatusLeaving,
)
assert.NoError(t, err)
assert.Equal(t, expectedIdentities, richEntry.NextEpochIdentityTable, "should be equal to next epoch setup participants + current epoch setup participants")
})

// Common situation during the epoch commit phase for epoch N+1
// * we are currently in Epoch N
Expand Down Expand Up @@ -165,8 +210,47 @@ func TestNewRichProtocolStateEntry(t *testing.T) {
assert.Equal(t, expectedIdentities, richEntry.NextEpochIdentityTable, "should be equal to next epoch setup participants + current epoch setup participants")
})

// TODO: include test for epoch commit phase where no prior epoch exist (i.e. first epoch commit phase after spork)
t.Run("commit-after-spork", func(t *testing.T) {
stateEntry := unittest.ProtocolStateFixture(unittest.WithNextEpochProtocolState(), func(entry *flow.RichProtocolStateEntry) {
// no previous epoch since we are in the first epoch
entry.PreviousEpochSetup = nil
entry.PreviousEpochCommit = nil
entry.PreviousEpoch = nil
})

Copy link
Member

@AlexHentschel AlexHentschel Dec 7, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// sanity check that previous epoch is not populated in `stateEntry`

👉 Implemented in my PR #5116

assert.Nil(t, stateEntry.PreviousEpoch)
assert.Nil(t, stateEntry.PreviousEpochSetup)
assert.Nil(t, stateEntry.PreviousEpochCommit)

richEntry, err := flow.NewRichProtocolStateEntry(
stateEntry.ProtocolStateEntry,
stateEntry.PreviousEpochSetup,
stateEntry.PreviousEpochCommit,
stateEntry.CurrentEpochSetup,
stateEntry.CurrentEpochCommit,
stateEntry.NextEpochSetup,
stateEntry.NextEpochCommit,
)
assert.NoError(t, err)
expectedIdentities, err := flow.BuildIdentityTable(
stateEntry.CurrentEpochSetup.Participants,
stateEntry.CurrentEpoch.ActiveIdentities,
stateEntry.NextEpochSetup.Participants,
stateEntry.NextEpoch.ActiveIdentities,
flow.EpochParticipationStatusJoining,
)
assert.NoError(t, err)
assert.Equal(t, expectedIdentities, richEntry.CurrentEpochIdentityTable, "should be equal to current epoch setup participants + next epoch setup participants")
expectedIdentities, err = flow.BuildIdentityTable(
stateEntry.NextEpochSetup.Participants,
stateEntry.NextEpoch.ActiveIdentities,
stateEntry.CurrentEpochSetup.Participants,
stateEntry.CurrentEpoch.ActiveIdentities,
flow.EpochParticipationStatusLeaving,
)
assert.NoError(t, err)
assert.Equal(t, expectedIdentities, richEntry.NextEpochIdentityTable, "should be equal to next epoch setup participants + current epoch setup participants")
})
}

// TestProtocolStateEntry_Copy tests if the copy method returns a deep copy of the entry.
Expand Down
1 change: 1 addition & 0 deletions module/metrics/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ const (
ResourceMyReceipt = "my_receipt"
ResourceCollection = "collection"
ResourceProtocolState = "protocol_state"
ResourceProtocolStateByBlockID = "protocol_state_by_block_id"
ResourceApproval = "approval"
ResourceSeal = "seal"
ResourcePendingIncorporatedSeal = "pending_incorporated_seal"
Expand Down
57 changes: 27 additions & 30 deletions storage/badger/protocol_state.go
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed that there are still some open and outdated TODOs here regarding documentation. Furthermore, the methods' goDoc differers quite a bit between the interface storage.ProtocolState and this implementation. Lastly, we are still using the term "Identity Table" despite the protocol state now being a lot more general.

I put up PR #5116 targeting your branch, which consolidates the goDoc, address the remaining open TODOs, and removes the outdated TODOs.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with not populating the cache which holds the RichProtocolStateEntrys on store. This is because (i) we don't have the RichProtocolStateEntry on store readily available and (ii) new RichProtocolStateEntry are really rare throughout an epoch, so the total cost of populating the cache becomes negligible over several views.

Side comment (outside the scope of this PR):

  • I think we could have the State Machine's Build method generate the RichProtocolStateEntry right away. I think it already has the needed Epoch Setup and Epoch Commit events, since it starts with a RichProtocolStateEntry for the parent state and consumes Epoch Setup and Epoch Commit events.
  • I think we might want to implement this, if we want to store more readily changing information in the protocol state, like the latest sealed block.

Though, I think for the scope of this PR, it would be beneficial to populate the byBlockIdCache on store, because here, we add a new entry every block! And we probably query for every block. So argument (ii) does not really apply here. Furthermore, argument (i) also does not apply, because we already have the Protocol State's ID on store, so we could populate the cache without much additional effort.

I implemented this in my PR #5116.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currenlty, we are using the same cache size for both caches

withLimit[flow.Identifier, *flow.RichProtocolStateEntry](cacheSize),

withLimit[flow.Identifier, flow.Identifier](cacheSize),

I don't think that is a good idea for the following reason:

  • byBlockIdCache will contain an entry for every block. We want to be able to cover a broad interval of views without cache misses, so I like the default setting of allowing up to 1000 entries.
  • However, cache only holds the distinct Protocol States. Minimally, we have something like 3 entries per epoch (one on epoch Switchover, one on receiving the Epoch Setup and one when seeing the Epoch Commit event). Lets be generous and assume we have 20 different Protocol States per epoch. Beyond that, we are certainly leaving the domain of normal operations that we optimize for. That would mean we are holding the protocol states for 1 year in the cache. That doesn't seem useful to me.

I would suggest to have a dedicated size parameter for each cache.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ import (
// operations and to speed up access to frequently used identity tables.
// TODO: update naming to IdentityTable
type ProtocolState struct {
db *badger.DB
cache *Cache[flow.Identifier, *flow.RichProtocolStateEntry]
db *badger.DB
cache *Cache[flow.Identifier, *flow.RichProtocolStateEntry] // protocol_state_id -> protocol state
byBlockIdCache *Cache[flow.Identifier, *flow.RichProtocolStateEntry] // block id -> protocol_state_id
}

var _ storage.ProtocolState = (*ProtocolState)(nil)
Expand All @@ -48,13 +49,30 @@ func NewProtocolState(collector module.CacheMetrics,
return result, nil
}
}
cache := newCache[flow.Identifier, *flow.RichProtocolStateEntry](collector, metrics.ResourceProtocolState,
withLimit[flow.Identifier, *flow.RichProtocolStateEntry](cacheSize),
withStore(noopStore[flow.Identifier, *flow.RichProtocolStateEntry]),
withRetrieve(retrieve))

retrieveByBlockID := func(blockID flow.Identifier) func(tx *badger.Txn) (*flow.RichProtocolStateEntry, error) {
return func(tx *badger.Txn) (*flow.RichProtocolStateEntry, error) {
var protocolStateID flow.Identifier
err := operation.LookupProtocolState(blockID, &protocolStateID)(tx)
if err != nil {
return nil, fmt.Errorf("could not lookup identity table ID for block (%x): %w", blockID[:], err)
}
return cache.Get(protocolStateID)(tx)
Copy link
Member

@jordanschalm jordanschalm Nov 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a minor preference for moving this second cache Get call outside of the byBlockID cache retrieval function (and into ByBlockID). That way each cache remains conceptually only a wrapper around one database call, rather than containing glue logic linking different database calls together. It's also easier to add things like a public ProtocolStateIDByBlockID method (similar to how we have the BlockIDByHeight method that just reads the secondary index for finalized blocks).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean changing signature of secondary cache to Cache[flow.Identifier, flow.Identifier] which will map block_id -> protocol_state_id?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changing signature of secondary cache to Cache[flow.Identifier, flow.Identifier] which will map block_id -> protocol_state_id

Yeah, exactly

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}
}
byBlockIdCache := newCache[flow.Identifier, *flow.RichProtocolStateEntry](collector, metrics.ResourceProtocolStateByBlockID,
withLimit[flow.Identifier, *flow.RichProtocolStateEntry](cacheSize),
withStore(noopStore[flow.Identifier, *flow.RichProtocolStateEntry]),
withRetrieve(retrieveByBlockID))

return &ProtocolState{
db: db,
cache: newCache[flow.Identifier, *flow.RichProtocolStateEntry](collector, metrics.ResourceProtocolState,
withLimit[flow.Identifier, *flow.RichProtocolStateEntry](cacheSize),
withStore(noopStore[flow.Identifier, *flow.RichProtocolStateEntry]),
withRetrieve(retrieve)),
db: db,
cache: cache,
byBlockIdCache: byBlockIdCache,
}
}

Expand Down Expand Up @@ -94,7 +112,7 @@ func (s *ProtocolState) Index(blockID flow.Identifier, protocolStateID flow.Iden
func (s *ProtocolState) ByID(id flow.Identifier) (*flow.RichProtocolStateEntry, error) {
tx := s.db.NewTransaction(false)
defer tx.Discard()
return s.byID(id)(tx)
return s.cache.Get(id)(tx)
}

// ByBlockID retrieves the identity table by the respective block ID.
Expand All @@ -104,28 +122,7 @@ func (s *ProtocolState) ByID(id flow.Identifier) (*flow.RichProtocolStateEntry,
func (s *ProtocolState) ByBlockID(blockID flow.Identifier) (*flow.RichProtocolStateEntry, error) {
tx := s.db.NewTransaction(false)
defer tx.Discard()
return s.byBlockID(blockID)(tx)
}

// byID retrieves the identity table by its ID. Error returns:
// - storage.ErrNotFound if no identity table with the given ID exists
func (s *ProtocolState) byID(protocolStateID flow.Identifier) func(*badger.Txn) (*flow.RichProtocolStateEntry, error) {
return s.cache.Get(protocolStateID)
}

// byBlockID retrieves the identity table by the respective block ID.
// TODO: clarify whether the blockID is the block that defines this identity table or the _child_ block where the identity table is applied. CAUTION: surface for bugs!
// Error returns:
// - storage.ErrNotFound if no identity table for the given blockID exists
func (s *ProtocolState) byBlockID(blockID flow.Identifier) func(*badger.Txn) (*flow.RichProtocolStateEntry, error) {
return func(tx *badger.Txn) (*flow.RichProtocolStateEntry, error) {
var protocolStateID flow.Identifier
err := operation.LookupProtocolState(blockID, &protocolStateID)(tx)
if err != nil {
return nil, fmt.Errorf("could not lookup identity table ID for block (%x): %w", blockID[:], err)
}
return s.byID(protocolStateID)(tx)
}
return s.byBlockIdCache.Get(blockID)(tx)
}

// newRichProtocolStateEntry constructs a rich protocol state entry from a protocol state entry.
Expand Down
Loading