@@ -14,24 +14,17 @@ use slog::{Logger, debug, error, info, o};
1414use sprockets_tls:: keys:: SprocketsConfig ;
1515use std:: collections:: BTreeSet ;
1616use std:: net:: SocketAddrV6 ;
17- use std:: time:: Duration ;
1817use thiserror:: Error ;
1918use tokio:: sync:: mpsc:: error:: SendError ;
2019use tokio:: sync:: oneshot:: error:: RecvError ;
2120use tokio:: sync:: { mpsc, oneshot} ;
22- use tokio:: time:: sleep;
2321use trust_quorum_protocol:: {
2422 Alarm , BaseboardId , CommitError , Configuration , Epoch , ExpungedMetadata ,
2523 LoadRackSecretError , LrtqUpgradeError , LrtqUpgradeMsg , Node , NodeCallerCtx ,
2624 NodeCommonCtx , NodeCtx , PersistentState , PrepareAndCommitError ,
2725 ReconfigurationError , ReconfigureMsg , ReconstructedRackSecret ,
2826} ;
2927
30- #[ cfg( not( test) ) ]
31- const LOAD_RACK_SECRET_RETRY_TIMEOUT : Duration = Duration :: from_millis ( 500 ) ;
32- #[ cfg( test) ]
33- const LOAD_RACK_SECRET_RETRY_TIMEOUT : Duration = Duration :: from_millis ( 5 ) ;
34-
3528/// We only expect a handful of messages at a time.
3629const API_CHANNEL_BOUND : usize = 32 ;
3730
@@ -245,20 +238,13 @@ impl NodeTaskHandle {
245238 pub async fn load_rack_secret (
246239 & self ,
247240 epoch : Epoch ,
248- ) -> Result < ReconstructedRackSecret , NodeApiError > {
249- loop {
250- let ( tx, rx) = oneshot:: channel ( ) ;
251- self . tx
252- . send ( NodeApiRequest :: LoadRackSecret { epoch, responder : tx } )
253- . await ?;
254- if let Some ( rack_secret) = rx. await ?? {
255- return Ok ( rack_secret) ;
256- } ;
257-
258- // The task returns immediately with `None` if the secret is still
259- // being loaded. We must therefore retry.
260- sleep ( LOAD_RACK_SECRET_RETRY_TIMEOUT ) . await ;
261- }
241+ ) -> Result < Option < ReconstructedRackSecret > , NodeApiError > {
242+ let ( tx, rx) = oneshot:: channel ( ) ;
243+ self . tx
244+ . send ( NodeApiRequest :: LoadRackSecret { epoch, responder : tx } )
245+ . await ?;
246+ let rs = rx. await ??;
247+ Ok ( rs)
262248 }
263249
264250 /// Return `Ok(true)` if the configuration has committed, `Ok(false)` if
@@ -542,7 +528,7 @@ mod tests {
542528 use camino:: Utf8PathBuf ;
543529 use dropshot:: test_util:: { LogContext , log_prefix_for_test} ;
544530 use omicron_test_utils:: dev:: poll:: { CondCheckError , wait_for_condition} ;
545- use omicron_test_utils:: dev:: test_setup_log;
531+ use omicron_test_utils:: dev:: { self , test_setup_log} ;
546532 use omicron_uuid_kinds:: GenericUuid ;
547533 use secrecy:: ExposeSecretMut ;
548534 use sled_hardware_types:: Baseboard ;
@@ -741,6 +727,36 @@ mod tests {
741727 self . logctx . cleanup_successful ( ) ;
742728 std:: fs:: remove_dir_all ( self . dir ) . unwrap ( ) ;
743729 }
730+
731+ pub async fn wait_for_rack_secrets_and_assert_equality (
732+ & self ,
733+ node_indexes : BTreeSet < usize > ,
734+ epoch : Epoch ,
735+ ) -> Result < ( ) , dev:: poll:: Error < NodeApiError > > {
736+ let poll_interval = Duration :: from_millis ( 10 ) ;
737+ let poll_max = Duration :: from_secs ( 10 ) ;
738+ wait_for_condition (
739+ async || {
740+ let mut secret = None ;
741+ for ( i, h) in self . node_handles . iter ( ) . enumerate ( ) {
742+ if node_indexes. contains ( & i) {
743+ let Some ( rs) = h. load_rack_secret ( epoch) . await ?
744+ else {
745+ return Err ( CondCheckError :: NotYet ) ;
746+ } ;
747+ if secret. is_none ( ) {
748+ secret = Some ( rs. clone ( ) ) ;
749+ }
750+ assert_eq ! ( & rs, secret. as_ref( ) . unwrap( ) ) ;
751+ }
752+ }
753+ Ok ( ( ) )
754+ } ,
755+ & poll_interval,
756+ & poll_max,
757+ )
758+ . await
759+ }
744760 }
745761
746762 /// Test that all nodes can connect to each other when given each the full
@@ -964,14 +980,13 @@ mod tests {
964980 . unwrap ( ) ;
965981
966982 // Now load the rack secret at all nodes
967- let mut secret = None ;
968- for h in & setup. node_handles {
969- let rs = h. load_rack_secret ( Epoch ( 1 ) ) . await . unwrap ( ) ;
970- if secret. is_none ( ) {
971- secret = Some ( rs. clone ( ) ) ;
972- }
973- assert_eq ! ( & rs, secret. as_ref( ) . unwrap( ) ) ;
974- }
983+ setup
984+ . wait_for_rack_secrets_and_assert_equality (
985+ ( 0 ..num_nodes) . collect ( ) ,
986+ Epoch ( 1 ) ,
987+ )
988+ . await
989+ . unwrap ( ) ;
975990
976991 setup. cleanup_successful ( ) ;
977992 }
@@ -1109,14 +1124,13 @@ mod tests {
11091124 assert ! ( status. persistent_state. commits. contains( & Epoch ( 1 ) ) ) ;
11101125
11111126 // Now load the rack secret at all nodes
1112- let mut secret = None ;
1113- for h in & setup. node_handles {
1114- let rs = h. load_rack_secret ( Epoch ( 1 ) ) . await . unwrap ( ) ;
1115- if secret. is_none ( ) {
1116- secret = Some ( rs. clone ( ) ) ;
1117- }
1118- assert_eq ! ( & rs, secret. as_ref( ) . unwrap( ) ) ;
1119- }
1127+ setup
1128+ . wait_for_rack_secrets_and_assert_equality (
1129+ ( 0 ..num_nodes) . collect ( ) ,
1130+ Epoch ( 1 ) ,
1131+ )
1132+ . await
1133+ . unwrap ( ) ;
11201134
11211135 setup. cleanup_successful ( ) ;
11221136 }
@@ -1202,14 +1216,13 @@ mod tests {
12021216 . unwrap ( ) ;
12031217
12041218 // Now load the rack secret at all nodes
1205- let mut secret = None ;
1206- for h in & setup. node_handles {
1207- let rs = h. load_rack_secret ( Epoch ( 1 ) ) . await . unwrap ( ) ;
1208- if secret. is_none ( ) {
1209- secret = Some ( rs. clone ( ) ) ;
1210- }
1211- assert_eq ! ( & rs, secret. as_ref( ) . unwrap( ) ) ;
1212- }
1219+ setup
1220+ . wait_for_rack_secrets_and_assert_equality (
1221+ ( 0 ..num_nodes) . collect ( ) ,
1222+ Epoch ( 1 ) ,
1223+ )
1224+ . await
1225+ . unwrap ( ) ;
12131226
12141227 // Tell all but the last node how to reach each other
12151228 // This should disconnect the last node from everybody
@@ -1332,22 +1345,22 @@ mod tests {
13321345
13331346 // Load the secret at epoch 1. This should trigger a `CommitAdvance`
13341347 // response from nodes that committed at epoch 2.
1335- let res =
1336- setup. node_handles . last ( ) . unwrap ( ) . load_rack_secret ( Epoch ( 1 ) ) . await ;
1337-
1338- println ! ( "res = {res:#?}" ) ;
1339-
1340- let rs = last_node. load_rack_secret ( Epoch ( 2 ) ) . await . unwrap ( ) ;
1348+ setup
1349+ . wait_for_rack_secrets_and_assert_equality (
1350+ BTreeSet :: from ( [ num_nodes - 1 ] ) ,
1351+ Epoch ( 1 ) ,
1352+ )
1353+ . await
1354+ . unwrap ( ) ;
13411355
1342- // Ensure the rack secret is the same as at another node
1343- let expected = setup
1344- . node_handles
1345- . first ( )
1346- . unwrap ( )
1347- . load_rack_secret ( Epoch ( 2 ) )
1356+ // Ensure the rack secret at epoch 2 is the same as at another node
1357+ setup
1358+ . wait_for_rack_secrets_and_assert_equality (
1359+ BTreeSet :: from ( [ 0 , num_nodes - 1 ] ) ,
1360+ Epoch ( 2 ) ,
1361+ )
13481362 . await
13491363 . unwrap ( ) ;
1350- assert_eq ! ( rs, expected) ;
13511364
13521365 setup. cleanup_successful ( ) ;
13531366 }
@@ -1424,14 +1437,13 @@ mod tests {
14241437 . unwrap ( ) ;
14251438
14261439 // Now load the rack secret at all nodes
1427- let mut secret = None ;
1428- for h in & setup. node_handles {
1429- let rs = h. load_rack_secret ( Epoch ( 1 ) ) . await . unwrap ( ) ;
1430- if secret. is_none ( ) {
1431- secret = Some ( rs. clone ( ) ) ;
1432- }
1433- assert_eq ! ( & rs, secret. as_ref( ) . unwrap( ) ) ;
1434- }
1440+ setup
1441+ . wait_for_rack_secrets_and_assert_equality (
1442+ ( 0 ..num_nodes) . collect ( ) ,
1443+ Epoch ( 1 ) ,
1444+ )
1445+ . await
1446+ . unwrap ( ) ;
14351447
14361448 setup. cleanup_successful ( ) ;
14371449 }
0 commit comments