From f493a882588be8467998ef71ac6cb226683fd41a Mon Sep 17 00:00:00 2001 From: carllin Date: Mon, 6 Dec 2021 17:14:38 -0500 Subject: [PATCH] Fixup flaky tests (#21617) * Fixup flaky tests * Fixup listeners --- local-cluster/src/local_cluster.rs | 83 ++++++++++++++++++++-------- local-cluster/tests/local_cluster.rs | 69 ++++++++++++++++++----- 2 files changed, 114 insertions(+), 38 deletions(-) diff --git a/local-cluster/src/local_cluster.rs b/local-cluster/src/local_cluster.rs index ba53a3eb140dbd..18ad89790fdbca 100644 --- a/local-cluster/src/local_cluster.rs +++ b/local-cluster/src/local_cluster.rs @@ -281,7 +281,7 @@ impl LocalCluster { let mut listener_config = safe_clone_config(&config.validator_configs[0]); listener_config.voting_disabled = true; (0..config.num_listeners).for_each(|_| { - cluster.add_validator( + cluster.add_validator_listener( &listener_config, 0, Arc::new(Keypair::new()), @@ -324,11 +324,50 @@ impl LocalCluster { } } + /// Set up validator without voting or staking accounts + pub fn add_validator_listener( + &mut self, + validator_config: &ValidatorConfig, + stake: u64, + validator_keypair: Arc, + voting_keypair: Option>, + socket_addr_space: SocketAddrSpace, + ) -> Pubkey { + self.do_add_validator( + validator_config, + true, + stake, + validator_keypair, + voting_keypair, + socket_addr_space, + ) + } + + /// Set up validator with voting and staking accounts pub fn add_validator( &mut self, validator_config: &ValidatorConfig, stake: u64, validator_keypair: Arc, + voting_keypair: Option>, + socket_addr_space: SocketAddrSpace, + ) -> Pubkey { + self.do_add_validator( + validator_config, + false, + stake, + validator_keypair, + voting_keypair, + socket_addr_space, + ) + } + + fn do_add_validator( + &mut self, + validator_config: &ValidatorConfig, + is_listener: bool, + stake: u64, + validator_keypair: Arc, mut voting_keypair: Option>, socket_addr_space: SocketAddrSpace, ) -> Pubkey { @@ -347,30 +386,28 @@ impl LocalCluster { let contact_info = validator_node.info.clone(); let (ledger_path, _blockhash) = create_new_tmp_ledger!(&self.genesis_config); - if validator_config.voting_disabled { + // Give the validator some lamports to setup vote accounts + if is_listener { // setup as a listener info!("listener {} ", validator_pubkey,); - } else { - // Give the validator some lamports to setup vote accounts - if should_create_vote_pubkey { - let validator_balance = Self::transfer_with_client( - &client, - &self.funding_keypair, - &validator_pubkey, - stake * 2 + 2, - ); - info!( - "validator {} balance {}", - validator_pubkey, validator_balance - ); - Self::setup_vote_and_stake_accounts( - &client, - voting_keypair.as_ref().unwrap(), - &validator_keypair, - stake, - ) - .unwrap(); - } + } else if should_create_vote_pubkey { + let validator_balance = Self::transfer_with_client( + &client, + &self.funding_keypair, + &validator_pubkey, + stake * 2 + 2, + ); + info!( + "validator {} balance {}", + validator_pubkey, validator_balance + ); + Self::setup_vote_and_stake_accounts( + &client, + voting_keypair.as_ref().unwrap(), + &validator_keypair, + stake, + ) + .unwrap(); } let mut config = safe_clone_config(validator_config); diff --git a/local-cluster/tests/local_cluster.rs b/local-cluster/tests/local_cluster.rs index 9df61a5001db5b..72a8b4d2c765b9 100644 --- a/local-cluster/tests/local_cluster.rs +++ b/local-cluster/tests/local_cluster.rs @@ -3190,13 +3190,27 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b let (validator_a_pubkey, validator_b_pubkey, validator_c_pubkey) = (validators[0], validators[1], validators[2]); + // Disable voting on all validators other than validator B to ensure neither of the below two + // scenarios occur: + // 1. If the cluster immediately forks on restart while we're killing validators A and C, + // with Validator B on one side, and `A` and `C` on a heavier fork, it's possible that the lockouts + // on `A` and `C`'s latest votes do not extend past validator B's latest vote. Then validator B + // will be stuck unable to vote, but also unable generate a switching proof to the heavier fork. + // + // 2. Validator A doesn't vote past `next_slot_on_a` before we can kill it. This is essential + // because if validator A votes past `next_slot_on_a`, and then we copy over validator B's ledger + // below only for slots <= `next_slot_on_a`, validator A will not know how it's last vote chains + // to the otehr forks, and may violate switching proofs on restart. + let mut validator_configs = + make_identical_validator_configs(&ValidatorConfig::default(), node_stakes.len()); + + validator_configs[0].voting_disabled = true; + validator_configs[2].voting_disabled = true; + let mut config = ClusterConfig { cluster_lamports: 100_000, - node_stakes: node_stakes.clone(), - validator_configs: make_identical_validator_configs( - &ValidatorConfig::default(), - node_stakes.len(), - ), + node_stakes, + validator_configs, validator_keys: Some(validator_keys), slots_per_epoch, stakers_slot_offset: slots_per_epoch, @@ -3213,9 +3227,23 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b let val_b_ledger_path = cluster.ledger_path(&validator_b_pubkey); let val_c_ledger_path = cluster.ledger_path(&validator_c_pubkey); + info!( + "val_a {} ledger path {:?}", + validator_a_pubkey, val_a_ledger_path + ); + info!( + "val_b {} ledger path {:?}", + validator_b_pubkey, val_b_ledger_path + ); + info!( + "val_c {} ledger path {:?}", + validator_c_pubkey, val_c_ledger_path + ); + // Immediately kill validator A, and C - let validator_a_info = cluster.exit_node(&validator_a_pubkey); - let validator_c_info = cluster.exit_node(&validator_c_pubkey); + info!("Exiting validators A and C"); + let mut validator_a_info = cluster.exit_node(&validator_a_pubkey); + let mut validator_c_info = cluster.exit_node(&validator_c_pubkey); // Step 1: // Let validator B, (D) run for a while. @@ -3224,7 +3252,8 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b let elapsed = now.elapsed(); assert!( elapsed <= Duration::from_secs(30), - "LocalCluster nodes failed to log enough tower votes in {} secs", + "Validator B failed to vote on any slot >= {} in {} secs", + next_slot_on_a, elapsed.as_secs() ); sleep(Duration::from_millis(100)); @@ -3269,29 +3298,38 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b } // Step 3: - // Restart A so that it can vote for the slots in B's fork + // Restart A with voting enabled so that it can vote on B's fork + // up to `next_slot_on_a`, thereby optimistcally confirming `next_slot_on_a` info!("Restarting A"); + validator_a_info.config.voting_disabled = false; cluster.restart_node( &validator_a_pubkey, validator_a_info, SocketAddrSpace::Unspecified, ); - info!("Waiting for A to vote"); - let mut last_print = Instant::now(); + info!("Waiting for A to vote on slot descended from slot `next_slot_on_a`"); + let now = Instant::now(); loop { if let Some((last_vote_slot, _)) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) { if last_vote_slot >= next_slot_on_a { - info!("Validator A has caught up: {}", last_vote_slot); + info!( + "Validator A has caught up and voted on slot: {}", + last_vote_slot + ); break; - } else if last_print.elapsed().as_secs() >= 10 { - info!("Validator A latest vote: {}", last_vote_slot); - last_print = Instant::now(); } } + if now.elapsed().as_secs() >= 30 { + panic!( + "Validator A has not seen optimistic confirmation slot > {} in 30 seconds", + next_slot_on_a + ); + } + sleep(Duration::from_millis(20)); } @@ -3319,6 +3357,7 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b // Step 4: // Run validator C only to make it produce and vote on its own fork. info!("Restart validator C again!!!"); + validator_c_info.config.voting_disabled = false; cluster.restart_node( &validator_c_pubkey, validator_c_info,