solana-labs · ryoqun · Oct 15, 2020 · Oct 14, 2020 · Oct 14, 2020 · Oct 15, 2020
diff --git a/core/src/consensus.rs b/core/src/consensus.rs
@@ -30,11 +30,11 @@ use std::{
 };
 use thiserror::Error;
 
-#[derive(PartialEq, Clone, Debug)]
+#[derive(PartialEq, Clone, Debug, AbiExample)]
 pub enum SwitchForkDecision {
     SwitchProof(Hash),
-    NoSwitch,
-    FailedSwitchThreshold,
+    SameFork,
+    FailedSwitchThreshold(u64, u64),
 }
 
 impl SwitchForkDecision {
@@ -45,8 +45,11 @@ impl SwitchForkDecision {
         authorized_voter_pubkey: &Pubkey,
     ) -> Option<Instruction> {
         match self {
-            SwitchForkDecision::FailedSwitchThreshold => None,
-            SwitchForkDecision::NoSwitch => Some(vote_instruction::vote(
+            SwitchForkDecision::FailedSwitchThreshold(_, total_stake) => {
+                assert_ne!(*total_stake, 0);
+                None
+            }
+            SwitchForkDecision::SameFork => Some(vote_instruction::vote(
                 vote_account_pubkey,
                 authorized_voter_pubkey,
                 vote,
@@ -61,6 +64,10 @@ impl SwitchForkDecision {
             }
         }
     }
+
+    pub fn can_vote(&self) -> bool {
+        !matches!(self, SwitchForkDecision::FailedSwitchThreshold(_, _))
+    }
 }
 
 pub const VOTE_THRESHOLD_DEPTH: usize = 8;
@@ -101,6 +108,8 @@ pub struct Tower {
     // This could be emptied after some time; but left intact indefinitely for easier
     // implementation
     stray_restored_slot: Option<Slot>,
+    #[serde(skip)]
+    pub last_switch_threshold_check: Option<(Slot, SwitchForkDecision)>,
 }
 
 impl Default for Tower {
@@ -115,6 +124,7 @@ impl Default for Tower {
             path: PathBuf::default(),
             tmp_path: PathBuf::default(),
             stray_restored_slot: Option::default(),
+            last_switch_threshold_check: Option::default(),
         };
         // VoteState::root_slot is ensured to be Some in Tower
         tower.lockouts.root_slot = Some(Slot::default());
@@ -493,7 +503,7 @@ impl Tower {
         false
     }
 
-    pub(crate) fn check_switch_threshold(
+    fn make_check_switch_threshold_decision(
         &self,
         switch_slot: u64,
         ancestors: &HashMap<Slot, HashSet<u64>>,
@@ -520,9 +530,34 @@ impl Tower {
                             // all of them.
                             panic!("no ancestors found with slot: {}", last_voted_slot);
                         } else {
-                            // bank_forks doesn't have corresponding data for the stray restored last vote,
-                            // meaning some inconsistency between saved tower and ledger.
-                            // (newer snapshot, or only a saved tower is moved over to new setup?)
+                            // This condition shouldn't occur under normal validator operation, indicating
+                            // something unusual happened.
+                            // Possible causes include: OS/HW crash, validator process crash, only saved tower
+                            // is moved over to a new setup, etc...
+
+                            // However, returning empty ancestors as a fallback here shouldn't result in
+                            // slashing by itself (Note that we couldn't fully preclude any kind of slashing if
+                            // the failure was OS or HW level).
+
+                            // Firstly, lockout is ensured elsewhere.
+
+                            // Also, there is no risk of optimistic conf. violation. Although empty ancestors
+                            // could result in incorrect (= more than actual) locked_out_stake and
+                            // false-positive SwitchProof later in this function, there should be no such a
+                            // heavier fork candidate, first of all, if the last vote (or any of its
+                            // unavailable ancestors) were already optimistically confirmed.
+                            // The only exception is that other validator is already violating it...
+                            if self.is_first_switch_check() && switch_slot < last_voted_slot {
+                                // `switch < last` is needed not to warn! this message just because of using
+                                // newer snapshots on validator restart
+                                let message = format!(
+                                  "bank_forks doesn't have corresponding data for the stray restored \
+                                   last vote({}), meaning some inconsistency between saved tower and ledger.",
+                                  last_voted_slot
+                                );
+                                warn!("{}", message);
+                                datapoint_warn!("tower_warn", ("warn", message, String));
+                            }
                             &empty_ancestors
                         }
                     });
@@ -532,7 +567,7 @@ impl Tower {
                 if switch_slot == last_voted_slot || switch_slot_ancestors.contains(&last_voted_slot) {
                     // If the `switch_slot is a descendant of the last vote,
                     // no switching proof is necessary
-                    return SwitchForkDecision::NoSwitch;
+                    return SwitchForkDecision::SameFork;
                 }
 
                 // Should never consider switching to an ancestor
@@ -598,7 +633,7 @@ impl Tower {
                             }
 
                             // Only count lockouts on slots that are:
-                            // 1) Not ancestors of `last_vote`
+                            // 1) Not ancestors of `last_vote`, meaning being on different fork
                             // 2) Not from before the current root as we can't determine if
                             // anything before the root was an ancestor of `last_vote` or not
                             if !last_vote_ancestors.contains(lockout_interval_start)
@@ -622,10 +657,43 @@ impl Tower {
                 if (locked_out_stake as f64 / total_stake as f64) > SWITCH_FORK_THRESHOLD {
                     SwitchForkDecision::SwitchProof(switch_proof)
                 } else {
-                    SwitchForkDecision::FailedSwitchThreshold
+                    SwitchForkDecision::FailedSwitchThreshold(locked_out_stake, total_stake)
                 }
             })
-            .unwrap_or(SwitchForkDecision::NoSwitch)
+            .unwrap_or(SwitchForkDecision::SameFork)
+    }
+
+    pub(crate) fn check_switch_threshold(
+        &mut self,
+        switch_slot: u64,
+        ancestors: &HashMap<Slot, HashSet<u64>>,
+        descendants: &HashMap<Slot, HashSet<u64>>,
+        progress: &ProgressMap,
+        total_stake: u64,
+        epoch_vote_accounts: &HashMap<Pubkey, (u64, Account)>,
+    ) -> SwitchForkDecision {
+        let decision = self.make_check_switch_threshold_decision(
+            switch_slot,
+            ancestors,
+            descendants,
+            progress,
+            total_stake,
+            epoch_vote_accounts,
+        );
+        let new_check = Some((switch_slot, decision.clone()));
+        if new_check != self.last_switch_threshold_check {
+            trace!(
+                "new switch threshold check: slot {}: {:?}",
+                switch_slot,
+                decision,
+            );
+            self.last_switch_threshold_check = new_check;
+        }
+        decision
+    }
+
+    fn is_first_switch_check(&self) -> bool {
+        self.last_switch_threshold_check.is_none()
     }
 
     pub fn check_vote_stake_threshold(
@@ -932,9 +1000,9 @@ impl Tower {
             self.lockouts = vote_state;
             self.do_initialize_lockouts(root, |v| v.slot > root);
             trace!(
-                "{} lockouts initialized to {:?}",
+                "Lockouts in tower for {} is initialized using bank {}",
                 self.node_pubkey,
-                self.lockouts
+                bank.slot(),
             );
             assert_eq!(
                 self.lockouts.node_pubkey, self.node_pubkey,
@@ -986,6 +1054,7 @@ impl Tower {
             bincode::serialize_into(&mut file, &saved_tower)?;
             // file.sync_all() hurts performance; pipeline sync-ing and submitting votes to the cluster!
         }
+        trace!("persisted votes: {:?}", self.voted_slots());
         fs::rename(&new_filename, &filename)?;
         // self.path.parent().sync_all() hurts performance same as the above sync
 
@@ -1047,6 +1116,16 @@ pub enum TowerError {
     FatallyInconsistent(&'static str),
 }
 
+impl TowerError {
+    pub fn is_file_missing(&self) -> bool {
+        if let TowerError::IOError(io_err) = &self {
+            io_err.kind() == std::io::ErrorKind::NotFound
+        } else {
+            false
+        }
+    }
+}
+
 #[frozen_abi(digest = "Gaxfwvx5MArn52mKZQgzHmDCyn5YfCuTHvp5Et3rFfpp")]
 #[derive(Default, Clone, Serialize, Deserialize, Debug, PartialEq, AbiExample)]
 pub struct SavedTower {
@@ -1267,7 +1346,7 @@ pub mod test {
                 &ancestors,
                 &descendants,
                 &self.progress,
-                &tower,
+                tower,
             );
 
             // Make sure this slot isn't locked out or failing threshold
@@ -1464,11 +1543,11 @@ pub mod test {
     #[test]
     fn test_to_vote_instruction() {
         let vote = Vote::default();
-        let mut decision = SwitchForkDecision::FailedSwitchThreshold;
+        let mut decision = SwitchForkDecision::FailedSwitchThreshold(0, 1);
         assert!(decision
             .to_vote_instruction(vote.clone(), &Pubkey::default(), &Pubkey::default())
             .is_none());
-        decision = SwitchForkDecision::NoSwitch;
+        decision = SwitchForkDecision::SameFork;
         assert_eq!(
             decision.to_vote_instruction(vote.clone(), &Pubkey::default(), &Pubkey::default()),
             Some(vote_instruction::vote(
@@ -1571,7 +1650,7 @@ pub mod test {
                 total_stake,
                 bank0.epoch_vote_accounts(0).unwrap(),
             ),
-            SwitchForkDecision::NoSwitch
+            SwitchForkDecision::SameFork
         );
 
         // Trying to switch to another fork at 110 should fail
@@ -1584,7 +1663,7 @@ pub mod test {
                 total_stake,
                 bank0.epoch_vote_accounts(0).unwrap(),
             ),
-            SwitchForkDecision::FailedSwitchThreshold
+            SwitchForkDecision::FailedSwitchThreshold(0, 20000)
         );
 
         // Adding another validator lockout on a descendant of last vote should
@@ -1599,7 +1678,7 @@ pub mod test {
                 total_stake,
                 bank0.epoch_vote_accounts(0).unwrap(),
             ),
-            SwitchForkDecision::FailedSwitchThreshold
+            SwitchForkDecision::FailedSwitchThreshold(0, 20000)
         );
 
         // Adding another validator lockout on an ancestor of last vote should
@@ -1614,7 +1693,7 @@ pub mod test {
                 total_stake,
                 bank0.epoch_vote_accounts(0).unwrap(),
             ),
-            SwitchForkDecision::FailedSwitchThreshold
+            SwitchForkDecision::FailedSwitchThreshold(0, 20000)
         );
 
         // Adding another validator lockout on a different fork, but the lockout
@@ -1629,7 +1708,7 @@ pub mod test {
                 total_stake,
                 bank0.epoch_vote_accounts(0).unwrap(),
             ),
-            SwitchForkDecision::FailedSwitchThreshold
+            SwitchForkDecision::FailedSwitchThreshold(0, 20000)
         );
 
         // Adding another validator lockout on a different fork, and the lockout
@@ -1646,7 +1725,7 @@ pub mod test {
                 total_stake,
                 bank0.epoch_vote_accounts(0).unwrap(),
             ),
-            SwitchForkDecision::FailedSwitchThreshold
+            SwitchForkDecision::FailedSwitchThreshold(0, 20000)
         );
 
         // Adding another validator lockout on a different fork, and the lockout
@@ -1697,7 +1776,7 @@ pub mod test {
                 total_stake,
                 bank0.epoch_vote_accounts(0).unwrap(),
             ),
-            SwitchForkDecision::FailedSwitchThreshold
+            SwitchForkDecision::FailedSwitchThreshold(0, 20000)
         );
     }
 
@@ -2365,7 +2444,7 @@ pub mod test {
                 total_stake,
                 bank0.epoch_vote_accounts(0).unwrap(),
             ),
-            SwitchForkDecision::NoSwitch
+            SwitchForkDecision::SameFork
         );
 
         // Trying to switch to another fork at 110 should fail
@@ -2378,7 +2457,7 @@ pub mod test {
                 total_stake,
                 bank0.epoch_vote_accounts(0).unwrap(),
             ),
-            SwitchForkDecision::FailedSwitchThreshold
+            SwitchForkDecision::FailedSwitchThreshold(0, 20000)
         );
 
         vote_simulator.simulate_lockout_interval(111, (10, 49), &other_vote_account);
@@ -2456,7 +2535,7 @@ pub mod test {
                 total_stake,
                 bank0.epoch_vote_accounts(0).unwrap(),
             ),
-            SwitchForkDecision::FailedSwitchThreshold
+            SwitchForkDecision::FailedSwitchThreshold(0, 20000)
         );
 
         // Add lockout_interval which should be excluded
@@ -2470,7 +2549,7 @@ pub mod test {
                 total_stake,
                 bank0.epoch_vote_accounts(0).unwrap(),
             ),
-            SwitchForkDecision::FailedSwitchThreshold
+            SwitchForkDecision::FailedSwitchThreshold(0, 20000)
         );
 
         // Add lockout_interval which should not be excluded

diff --git a/core/src/replay_stage.rs b/core/src/replay_stage.rs
@@ -408,7 +408,7 @@ impl ReplayStage {
                         &ancestors,
                         &descendants,
                         &progress,
-                        &tower,
+                        &mut tower,
                     );
                     select_vote_and_reset_forks_time.stop();
 
@@ -1525,7 +1525,7 @@ impl ReplayStage {
         ancestors: &HashMap<u64, HashSet<u64>>,
         descendants: &HashMap<u64, HashSet<u64>>,
         progress: &ProgressMap,
-        tower: &Tower,
+        tower: &mut Tower,
     ) -> SelectVoteAndResetForkResult {
         // Try to vote on the actual heaviest fork. If the heaviest bank is
         // locked out or fails the threshold check, the validator will:
@@ -1552,7 +1552,7 @@ impl ReplayStage {
                     .epoch_vote_accounts(heaviest_bank.epoch())
                     .expect("Bank epoch vote accounts must contain entry for the bank's own epoch"),
             );
-            if switch_fork_decision == SwitchForkDecision::FailedSwitchThreshold {
+            if let SwitchForkDecision::FailedSwitchThreshold(_, _) = switch_fork_decision {
                 // If we can't switch, then reset to the the next votable
                 // bank on the same fork as our last vote, but don't vote
                 info!(
@@ -1601,7 +1601,7 @@ impl ReplayStage {
             if !is_locked_out
                 && vote_threshold
                 && propagation_confirmed
-                && switch_fork_decision != SwitchForkDecision::FailedSwitchThreshold
+                && switch_fork_decision.can_vote()
             {
                 info!("voting: {} {}", bank.slot(), fork_weight);
                 SelectVoteAndResetForkResult {

diff --git a/core/src/validator.rs b/core/src/validator.rs
@@ -6,7 +6,7 @@ use crate::{
     cluster_info::{ClusterInfo, Node},
     cluster_info_vote_listener::VoteTracker,
     completed_data_sets_service::CompletedDataSetsService,
-    consensus::{reconcile_blockstore_roots_with_tower, Tower, TowerError},
+    consensus::{reconcile_blockstore_roots_with_tower, Tower},
     contact_info::ContactInfo,
     gossip_service::GossipService,
     optimistically_confirmed_bank_tracker::{
@@ -730,12 +730,7 @@ fn post_process_restored_tower(
         .unwrap_or_else(|err| {
             let voting_has_been_active =
                 active_vote_account_exists_in_bank(&bank_forks.working_bank(), &vote_account);
-            let saved_tower_is_missing = if let TowerError::IOError(io_err) = &err {
-                io_err.kind() == std::io::ErrorKind::NotFound
-            } else {
-                false
-            };
-            if !saved_tower_is_missing {
+            if !err.is_file_missing() {
                 datapoint_error!(
                     "tower_error",
                     (
@@ -753,7 +748,7 @@ fn post_process_restored_tower(
                 );
                 process::exit(1);
             }
-            if saved_tower_is_missing && !voting_has_been_active {
+            if err.is_file_missing() && !voting_has_been_active {
                 // Currently, don't protect against spoofed snapshots with no tower at all
                 info!(
                     "Ignoring expected failed tower restore because this is the initial \