@@ -301,6 +301,12 @@ impl Lighthouse {
301
301
"Detected quorum change, bumping quorum_id to {}" ,
302
302
state. quorum_id
303
303
) ;
304
+ } else if participants. iter ( ) . map ( |p| p. commit_failures ) . sum :: < i64 > ( ) > 0 {
305
+ state. quorum_id += 1 ;
306
+ info ! (
307
+ "Detected commit failures, bumping quorum_id to {}" ,
308
+ state. quorum_id
309
+ ) ;
304
310
}
305
311
306
312
let quorum = Quorum {
@@ -639,6 +645,7 @@ mod tests {
639
645
world_size : 1 ,
640
646
shrink_only : false ,
641
647
data : String :: new ( ) ,
648
+ commit_failures : 0 ,
642
649
} ,
643
650
} ,
644
651
) ;
@@ -656,6 +663,7 @@ mod tests {
656
663
world_size : 1 ,
657
664
shrink_only : false ,
658
665
data : String :: new ( ) ,
666
+ commit_failures : 0 ,
659
667
} ,
660
668
} ,
661
669
) ;
@@ -712,6 +720,7 @@ mod tests {
712
720
world_size : 1 ,
713
721
shrink_only : false ,
714
722
data : String :: new ( ) ,
723
+ commit_failures : 0 ,
715
724
} ,
716
725
} ,
717
726
) ;
@@ -751,6 +760,7 @@ mod tests {
751
760
world_size : 1 ,
752
761
shrink_only : false ,
753
762
data : String :: new ( ) ,
763
+ commit_failures : 0 ,
754
764
} ,
755
765
} ,
756
766
) ;
@@ -798,6 +808,7 @@ mod tests {
798
808
world_size : 1 ,
799
809
shrink_only : false ,
800
810
data : String :: new ( ) ,
811
+ commit_failures : 0 ,
801
812
} ,
802
813
} ,
803
814
) ;
@@ -819,6 +830,7 @@ mod tests {
819
830
world_size: 1 ,
820
831
shrink_only: false ,
821
832
data: String :: new( ) ,
833
+ commit_failures: 0 ,
822
834
} ] ,
823
835
created : Some ( SystemTime :: now ( ) . into ( ) ) ,
824
836
} ) ;
@@ -838,6 +850,7 @@ mod tests {
838
850
world_size : 1 ,
839
851
shrink_only : false ,
840
852
data : String :: new ( ) ,
853
+ commit_failures : 0 ,
841
854
} ,
842
855
} ,
843
856
) ;
@@ -882,6 +895,7 @@ mod tests {
882
895
world_size: 1 ,
883
896
shrink_only: false ,
884
897
data: String :: new( ) ,
898
+ commit_failures: 0 ,
885
899
} ,
886
900
QuorumMember {
887
901
replica_id: "b" . to_string( ) ,
@@ -891,6 +905,7 @@ mod tests {
891
905
world_size: 1 ,
892
906
shrink_only: false ,
893
907
data: String :: new( ) ,
908
+ commit_failures: 0 ,
894
909
} ,
895
910
] ,
896
911
created : Some ( SystemTime :: now ( ) . into ( ) ) ,
@@ -908,6 +923,7 @@ mod tests {
908
923
world_size : 1 ,
909
924
shrink_only : true ,
910
925
data : String :: new ( ) ,
926
+ commit_failures : 0 ,
911
927
} ,
912
928
} ,
913
929
) ;
@@ -926,6 +942,7 @@ mod tests {
926
942
world_size : 1 ,
927
943
shrink_only : true ,
928
944
data : String :: new ( ) ,
945
+ commit_failures : 0 ,
929
946
} ,
930
947
} ,
931
948
) ;
@@ -975,6 +992,7 @@ mod tests {
975
992
world_size : 1 ,
976
993
shrink_only : false ,
977
994
data : String :: new ( ) ,
995
+ commit_failures : 0 ,
978
996
} ) ,
979
997
} ) ;
980
998
@@ -1021,6 +1039,7 @@ mod tests {
1021
1039
world_size : 1 ,
1022
1040
shrink_only : false ,
1023
1041
data : String :: new ( ) ,
1042
+ commit_failures : 0 ,
1024
1043
} ,
1025
1044
} ,
1026
1045
) ;
@@ -1047,6 +1066,7 @@ mod tests {
1047
1066
world_size: 1 ,
1048
1067
shrink_only: false ,
1049
1068
data: String :: new( ) ,
1069
+ commit_failures: 0 ,
1050
1070
} ] ;
1051
1071
let b = vec ! [ QuorumMember {
1052
1072
replica_id: "1" . to_string( ) ,
@@ -1056,6 +1076,7 @@ mod tests {
1056
1076
world_size: 1 ,
1057
1077
shrink_only: false ,
1058
1078
data: String :: new( ) ,
1079
+ commit_failures: 0 ,
1059
1080
} ] ;
1060
1081
1061
1082
// replica_id is the same
@@ -1069,12 +1090,13 @@ mod tests {
1069
1090
world_size: 1 ,
1070
1091
shrink_only: false ,
1071
1092
data: String :: new( ) ,
1093
+ commit_failures: 0 ,
1072
1094
} ] ;
1073
1095
// replica_id changed
1074
1096
assert ! ( quorum_changed( & a, & c) ) ;
1075
1097
}
1076
- #[ tokio:: test]
1077
1098
1099
+ #[ tokio:: test]
1078
1100
async fn test_lighthouse_join_during_shrink ( ) -> Result < ( ) > {
1079
1101
fn create_member ( id : & str , addr_num : & str , step : i64 , shrink_only : bool ) -> QuorumMember {
1080
1102
QuorumMember {
@@ -1085,6 +1107,7 @@ mod tests {
1085
1107
world_size : 1 ,
1086
1108
shrink_only,
1087
1109
data : String :: new ( ) ,
1110
+ commit_failures : 0 ,
1088
1111
}
1089
1112
}
1090
1113
@@ -1179,4 +1202,76 @@ mod tests {
1179
1202
lighthouse_task. abort ( ) ;
1180
1203
Ok ( ( ) )
1181
1204
}
1205
+
1206
+ #[ tokio:: test]
1207
+ async fn test_lighthouse_commit_failures ( ) -> Result < ( ) > {
1208
+ fn create_member ( id : & str , commit_failures : i64 ) -> QuorumMember {
1209
+ QuorumMember {
1210
+ replica_id : id. to_string ( ) ,
1211
+ address : format ! ( "addr{}" , id) ,
1212
+ store_address : format ! ( "store{}" , id) ,
1213
+ step : 10 ,
1214
+ world_size : 1 ,
1215
+ shrink_only : false ,
1216
+ data : String :: new ( ) ,
1217
+ commit_failures,
1218
+ }
1219
+ }
1220
+
1221
+ fn create_request ( member : & QuorumMember ) -> tonic:: Request < LighthouseQuorumRequest > {
1222
+ tonic:: Request :: new ( LighthouseQuorumRequest {
1223
+ requester : Some ( member. clone ( ) ) ,
1224
+ } )
1225
+ }
1226
+
1227
+ let opt = LighthouseOpt {
1228
+ min_replicas : 2 ,
1229
+ bind : "[::]:0" . to_string ( ) ,
1230
+ join_timeout_ms : 1000 ,
1231
+ quorum_tick_ms : 10 ,
1232
+ heartbeat_timeout_ms : 5000 ,
1233
+ } ;
1234
+
1235
+ // Start the lighthouse service
1236
+ let lighthouse = Lighthouse :: new ( opt) . await ?;
1237
+ let lighthouse_task = tokio:: spawn ( lighthouse. clone ( ) . run ( ) ) ;
1238
+
1239
+ // Create client to interact with lighthouse
1240
+ let mut client = lighthouse_client_new ( lighthouse. address ( ) ) . await ?;
1241
+
1242
+ // First two quorums should be stable
1243
+ for _i in 0 ..2 {
1244
+ let first_request = create_request ( & create_member ( "replica0" , 0 ) ) ;
1245
+ let second_request = create_request ( & create_member ( "replica1" , 0 ) ) ;
1246
+
1247
+ tokio:: spawn ( {
1248
+ let mut client = client. clone ( ) ;
1249
+ async move { client. quorum ( first_request) . await }
1250
+ } ) ;
1251
+ let first_response = client. quorum ( second_request) . await ?;
1252
+ let first_quorum = first_response. into_inner ( ) . quorum . unwrap ( ) ;
1253
+ assert_eq ! ( first_quorum. quorum_id, 1 ) ;
1254
+ assert_eq ! ( first_quorum. participants. len( ) , 2 ) ;
1255
+ assert_eq ! ( first_quorum. participants[ 0 ] . commit_failures, 0 ) ;
1256
+ assert_eq ! ( first_quorum. participants[ 1 ] . commit_failures, 0 ) ;
1257
+ }
1258
+
1259
+ // commit_failures should increment quorum_id
1260
+ let first_request = create_request ( & create_member ( "replica0" , 0 ) ) ;
1261
+ let second_request = create_request ( & create_member ( "replica1" , 2 ) ) ;
1262
+
1263
+ tokio:: spawn ( {
1264
+ let mut client = client. clone ( ) ;
1265
+ async move { client. quorum ( first_request) . await }
1266
+ } ) ;
1267
+ let first_response = client. quorum ( second_request) . await ?;
1268
+ let first_quorum = first_response. into_inner ( ) . quorum . unwrap ( ) ;
1269
+ assert_eq ! ( first_quorum. quorum_id, 2 ) ;
1270
+ assert_eq ! ( first_quorum. participants. len( ) , 2 ) ;
1271
+ assert_eq ! ( first_quorum. participants[ 0 ] . commit_failures, 0 ) ;
1272
+ assert_eq ! ( first_quorum. participants[ 1 ] . commit_failures, 2 ) ;
1273
+
1274
+ lighthouse_task. abort ( ) ;
1275
+ Ok ( ( ) )
1276
+ }
1182
1277
}
0 commit comments