@@ -30,11 +30,11 @@ use std::{
3030} ;
3131use thiserror:: Error ;
3232
33- #[ derive( PartialEq , Clone , Debug ) ]
33+ #[ derive( PartialEq , Clone , Debug , AbiExample ) ]
3434pub enum SwitchForkDecision {
3535 SwitchProof ( Hash ) ,
36- NoSwitch ,
37- FailedSwitchThreshold ,
36+ SameFork ,
37+ FailedSwitchThreshold ( u64 , u64 ) ,
3838}
3939
4040impl SwitchForkDecision {
@@ -45,8 +45,8 @@ impl SwitchForkDecision {
4545 authorized_voter_pubkey : & Pubkey ,
4646 ) -> Option < Instruction > {
4747 match self {
48- SwitchForkDecision :: FailedSwitchThreshold => None ,
49- SwitchForkDecision :: NoSwitch => Some ( vote_instruction:: vote (
48+ SwitchForkDecision :: FailedSwitchThreshold ( _ , _ ) => None ,
49+ SwitchForkDecision :: SameFork => Some ( vote_instruction:: vote (
5050 vote_account_pubkey,
5151 authorized_voter_pubkey,
5252 vote,
@@ -101,6 +101,8 @@ pub struct Tower {
101101 // This could be emptied after some time; but left intact indefinitely for easier
102102 // implementation
103103 stray_restored_slot : Option < Slot > ,
104+ #[ serde( skip) ]
105+ pub last_switch_threshold_check : Option < ( Slot , SwitchForkDecision ) > ,
104106}
105107
106108impl Default for Tower {
@@ -115,6 +117,7 @@ impl Default for Tower {
115117 path : PathBuf :: default ( ) ,
116118 tmp_path : PathBuf :: default ( ) ,
117119 stray_restored_slot : Option :: default ( ) ,
120+ last_switch_threshold_check : Option :: default ( ) ,
118121 } ;
119122 // VoteState::root_slot is ensured to be Some in Tower
120123 tower. lockouts . root_slot = Some ( Slot :: default ( ) ) ;
@@ -440,6 +443,10 @@ impl Tower {
440443 self . lockouts . root_slot . unwrap ( )
441444 }
442445
446+ pub fn node_pubkey ( & self ) -> Pubkey {
447+ self . node_pubkey
448+ }
449+
443450 // a slot is recent if it's newer than the last vote we have
444451 pub fn is_recent ( & self , slot : Slot ) -> bool {
445452 if let Some ( last_voted_slot) = self . lockouts . last_voted_slot ( ) {
@@ -520,9 +527,16 @@ impl Tower {
520527 // all of them.
521528 panic ! ( "no ancestors found with slot: {}" , last_voted_slot) ;
522529 } else {
523- // bank_forks doesn't have corresponding data for the stray restored last vote,
524- // meaning some inconsistency between saved tower and ledger.
525- // (newer snapshot, or only a saved tower is moved over to new setup?)
530+ // compare slots not to error! just because of newer snapshots
531+ if self . last_switch_threshold_check . is_none ( ) && switch_slot < last_voted_slot {
532+ error ! (
533+ "{} bank_forks doesn't have corresponding data for the stray restored \
534+ last vote({}), meaning some inconsistency between saved tower and ledger.",
535+ self . node_pubkey,
536+ last_voted_slot
537+ ) ;
538+ }
539+ // (system crash? process crash? newer snapshot, or only a saved tower is moved over to new setup?)
526540 & empty_ancestors
527541 }
528542 } ) ;
@@ -532,12 +546,22 @@ impl Tower {
532546 if switch_slot == last_voted_slot || switch_slot_ancestors. contains ( & last_voted_slot) {
533547 // If the `switch_slot is a descendant of the last vote,
534548 // no switching proof is necessary
535- return SwitchForkDecision :: NoSwitch ;
549+ return SwitchForkDecision :: SameFork ;
536550 }
537551
538- // Should never consider switching to an ancestor
539- // of your last vote
540- assert ! ( !last_vote_ancestors. contains( & switch_slot) ) ;
552+ // Generally, should never consider switching to an ancestor of your last vote
553+ if last_vote_ancestors. contains ( & switch_slot) {
554+ let restored_stray_exception = self . is_stray_last_vote ( ) && switch_slot < last_voted_slot;
555+ if !restored_stray_exception {
556+ panic ! ( "Don't switch to slot ({}), which is ancestors({:?}) of last vote: {}" , switch_slot, last_vote_ancestors, last_voted_slot) ;
557+ } else {
558+ // If last vote is stray, it's possible to reach here, because we don't have complete view of voting history.
559+ // In that case, pretend we're always failing to switch, regarless switch_slot is actually on same frok or not.
560+ // this is for safety; otherwise we accidentally might doubl#e-vte on ancestors.
561+ // siwtch_slot must be `< last_voted_slot` because switch_slot must be one of last_vote_ancestors in this if clause.
562+ return SwitchForkDecision :: FailedSwitchThreshold ( 0 , 0 ) ;
563+ }
564+ }
541565
542566 // By this point, we know the `switch_slot` is on a different fork
543567 // (is neither an ancestor nor descendant of `last_vote`), so a
@@ -622,10 +646,10 @@ impl Tower {
622646 if ( locked_out_stake as f64 / total_stake as f64 ) > SWITCH_FORK_THRESHOLD {
623647 SwitchForkDecision :: SwitchProof ( switch_proof)
624648 } else {
625- SwitchForkDecision :: FailedSwitchThreshold
649+ SwitchForkDecision :: FailedSwitchThreshold ( locked_out_stake , total_stake )
626650 }
627651 } )
628- . unwrap_or ( SwitchForkDecision :: NoSwitch )
652+ . unwrap_or ( SwitchForkDecision :: SameFork )
629653 }
630654
631655 pub fn check_vote_stake_threshold (
@@ -931,9 +955,9 @@ impl Tower {
931955 self . lockouts = vote_state;
932956 self . do_initialize_lockouts ( root, |v| v. slot > root) ;
933957 trace ! (
934- "{} lockouts initialized to {:? }" ,
958+ "Lockouts in tower for {} is initialized using bank { }" ,
935959 self . node_pubkey,
936- self . lockouts
960+ bank . slot ( ) ,
937961 ) ;
938962 assert_eq ! (
939963 self . lockouts. node_pubkey, self . node_pubkey,
@@ -985,6 +1009,11 @@ impl Tower {
9851009 bincode:: serialize_into ( & mut file, & saved_tower) ?;
9861010 // file.sync_all() hurts performance; pipeline sync-ing and submitting votes to the cluster!
9871011 }
1012+ trace ! (
1013+ "{} persisted votes: {:?}" ,
1014+ node_keypair. pubkey( ) ,
1015+ self . voted_slots( )
1016+ ) ;
9881017 fs:: rename ( & new_filename, & filename) ?;
9891018 // self.path.parent().sync_all() hurts performance same as the above sync
9901019
@@ -1046,6 +1075,16 @@ pub enum TowerError {
10461075 FatallyInconsistent ( & ' static str ) ,
10471076}
10481077
1078+ impl TowerError {
1079+ pub fn is_file_missing ( & self ) -> bool {
1080+ if let TowerError :: IOError ( io_err) = & self {
1081+ io_err. kind ( ) == std:: io:: ErrorKind :: NotFound
1082+ } else {
1083+ false
1084+ }
1085+ }
1086+ }
1087+
10491088#[ frozen_abi( digest = "Gaxfwvx5MArn52mKZQgzHmDCyn5YfCuTHvp5Et3rFfpp" ) ]
10501089#[ derive( Default , Clone , Serialize , Deserialize , Debug , PartialEq , AbiExample ) ]
10511090pub struct SavedTower {
@@ -1268,7 +1307,7 @@ pub mod test {
12681307 & ancestors,
12691308 & descendants,
12701309 & self . progress ,
1271- & tower,
1310+ tower,
12721311 ) ;
12731312
12741313 // Make sure this slot isn't locked out or failing threshold
@@ -1465,11 +1504,11 @@ pub mod test {
14651504 #[ test]
14661505 fn test_to_vote_instruction ( ) {
14671506 let vote = Vote :: default ( ) ;
1468- let mut decision = SwitchForkDecision :: FailedSwitchThreshold ;
1507+ let mut decision = SwitchForkDecision :: FailedSwitchThreshold ( 0 , 0 ) ;
14691508 assert ! ( decision
14701509 . to_vote_instruction( vote. clone( ) , & Pubkey :: default ( ) , & Pubkey :: default ( ) )
14711510 . is_none( ) ) ;
1472- decision = SwitchForkDecision :: NoSwitch ;
1511+ decision = SwitchForkDecision :: SameFork ;
14731512 assert_eq ! (
14741513 decision. to_vote_instruction( vote. clone( ) , & Pubkey :: default ( ) , & Pubkey :: default ( ) ) ,
14751514 Some ( vote_instruction:: vote(
@@ -1572,7 +1611,7 @@ pub mod test {
15721611 total_stake,
15731612 bank0. epoch_vote_accounts( 0 ) . unwrap( ) ,
15741613 ) ,
1575- SwitchForkDecision :: NoSwitch
1614+ SwitchForkDecision :: SameFork
15761615 ) ;
15771616
15781617 // Trying to switch to another fork at 110 should fail
@@ -1585,7 +1624,7 @@ pub mod test {
15851624 total_stake,
15861625 bank0. epoch_vote_accounts( 0 ) . unwrap( ) ,
15871626 ) ,
1588- SwitchForkDecision :: FailedSwitchThreshold
1627+ SwitchForkDecision :: FailedSwitchThreshold ( 0 , 20000 )
15891628 ) ;
15901629
15911630 // Adding another validator lockout on a descendant of last vote should
@@ -1600,7 +1639,7 @@ pub mod test {
16001639 total_stake,
16011640 bank0. epoch_vote_accounts( 0 ) . unwrap( ) ,
16021641 ) ,
1603- SwitchForkDecision :: FailedSwitchThreshold
1642+ SwitchForkDecision :: FailedSwitchThreshold ( 0 , 20000 )
16041643 ) ;
16051644
16061645 // Adding another validator lockout on an ancestor of last vote should
@@ -1615,7 +1654,7 @@ pub mod test {
16151654 total_stake,
16161655 bank0. epoch_vote_accounts( 0 ) . unwrap( ) ,
16171656 ) ,
1618- SwitchForkDecision :: FailedSwitchThreshold
1657+ SwitchForkDecision :: FailedSwitchThreshold ( 0 , 20000 )
16191658 ) ;
16201659
16211660 // Adding another validator lockout on a different fork, but the lockout
@@ -1630,7 +1669,7 @@ pub mod test {
16301669 total_stake,
16311670 bank0. epoch_vote_accounts( 0 ) . unwrap( ) ,
16321671 ) ,
1633- SwitchForkDecision :: FailedSwitchThreshold
1672+ SwitchForkDecision :: FailedSwitchThreshold ( 0 , 20000 )
16341673 ) ;
16351674
16361675 // Adding another validator lockout on a different fork, and the lockout
@@ -1647,7 +1686,7 @@ pub mod test {
16471686 total_stake,
16481687 bank0. epoch_vote_accounts( 0 ) . unwrap( ) ,
16491688 ) ,
1650- SwitchForkDecision :: FailedSwitchThreshold
1689+ SwitchForkDecision :: FailedSwitchThreshold ( 0 , 20000 )
16511690 ) ;
16521691
16531692 // Adding another validator lockout on a different fork, and the lockout
@@ -1698,7 +1737,7 @@ pub mod test {
16981737 total_stake,
16991738 bank0. epoch_vote_accounts( 0 ) . unwrap( ) ,
17001739 ) ,
1701- SwitchForkDecision :: FailedSwitchThreshold
1740+ SwitchForkDecision :: FailedSwitchThreshold ( 0 , 20000 )
17021741 ) ;
17031742 }
17041743
@@ -2366,7 +2405,7 @@ pub mod test {
23662405 total_stake,
23672406 bank0. epoch_vote_accounts( 0 ) . unwrap( ) ,
23682407 ) ,
2369- SwitchForkDecision :: NoSwitch
2408+ SwitchForkDecision :: SameFork
23702409 ) ;
23712410
23722411 // Trying to switch to another fork at 110 should fail
@@ -2379,7 +2418,7 @@ pub mod test {
23792418 total_stake,
23802419 bank0. epoch_vote_accounts( 0 ) . unwrap( ) ,
23812420 ) ,
2382- SwitchForkDecision :: FailedSwitchThreshold
2421+ SwitchForkDecision :: FailedSwitchThreshold ( 0 , 20000 )
23832422 ) ;
23842423
23852424 vote_simulator. simulate_lockout_interval ( 111 , ( 10 , 49 ) , & other_vote_account) ;
@@ -2457,7 +2496,7 @@ pub mod test {
24572496 total_stake,
24582497 bank0. epoch_vote_accounts( 0 ) . unwrap( ) ,
24592498 ) ,
2460- SwitchForkDecision :: FailedSwitchThreshold
2499+ SwitchForkDecision :: FailedSwitchThreshold ( 0 , 20000 )
24612500 ) ;
24622501
24632502 // Add lockout_interval which should be excluded
@@ -2471,7 +2510,7 @@ pub mod test {
24712510 total_stake,
24722511 bank0. epoch_vote_accounts( 0 ) . unwrap( ) ,
24732512 ) ,
2474- SwitchForkDecision :: FailedSwitchThreshold
2513+ SwitchForkDecision :: FailedSwitchThreshold ( 0 , 20000 )
24752514 ) ;
24762515
24772516 // Add lockout_interval which should not be excluded
0 commit comments