Skip to content

Commit cd832b3

Browse files
Fix partition recovery tests (#2820)
The expected round number is captured before the node stops. However, it is likely that the node advances to the next round before it is stopped. When this happens, the test will fail. This change gets the most up-to-date round number after the node is stopped, but before inducePartitionTime timeout is waited. inducePartitionTime is the wait to make sure the expected behavior is obtained. The round number is captured before this wait. However, I could not identify in this PR why TestBasicPartitionRecovery has failed. Could not find anything in the test, and the failure logs have nothing. I suspect that the failure in the other tests triggered the failure, and fixed the other tests, but cannot be sure. As for the data race, it is fixed in #2844. Fixes #2384 and #2545
1 parent 8a0d699 commit cd832b3

File tree

1 file changed

+18
-6
lines changed

1 file changed

+18
-6
lines changed

test/e2e-go/features/partitionRecovery/partitionRecovery_test.go

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ func runTestWithStaggeredStopStart(t *testing.T, fixture *fixtures.RestClientFix
135135
// Stop Node1
136136
nc1.FullStop()
137137

138+
status, err := fixture.LibGoalClient.Status()
139+
a.NoError(err)
140+
roundAfterStop := status.LastRound
141+
138142
time.Sleep(inducePartitionTime)
139143

140144
// Use the fixture to start the node again so it supplies the correct peer addresses
@@ -152,10 +156,10 @@ func runTestWithStaggeredStopStart(t *testing.T, fixture *fixtures.RestClientFix
152156
a.NoError(err)
153157

154158
// Now wait for us to make progress again.
155-
status, err := fixture.LibGoalClient.Status()
159+
status, err = fixture.LibGoalClient.Status()
156160
a.NoError(err)
157161

158-
a.Equal(waitForRound, status.LastRound, "We should not have made progress since stopping the first node")
162+
a.Equal(roundAfterStop, status.LastRound, "We should not have made progress since stopping the first node")
159163

160164
err = fixture.WaitForRound(status.LastRound+1, partitionRecoveryTime)
161165
a.NoError(err)
@@ -193,6 +197,10 @@ func TestBasicPartitionRecoveryPartOffline(t *testing.T) {
193197
// Stop Node1
194198
nc1.FullStop()
195199

200+
status, err := fixture.LibGoalClient.Status()
201+
a.NoError(err)
202+
roundAfterStop := status.LastRound
203+
196204
// Stop the 2nd node and give network a chance to stall
197205
nc2, err := fixture.GetNodeController("Node2")
198206
a.NoError(err)
@@ -205,10 +213,10 @@ func TestBasicPartitionRecoveryPartOffline(t *testing.T) {
205213
a.NoError(err)
206214

207215
// Now wait for us to make progress again.
208-
status, err := fixture.LibGoalClient.Status()
216+
status, err = fixture.LibGoalClient.Status()
209217
a.NoError(err)
210218

211-
a.Equal(waitForRound, status.LastRound, "We should not have made progress since stopping the first node")
219+
a.Equal(roundAfterStop, status.LastRound, "We should not have made progress since stopping the first node")
212220

213221
err = fixture.WaitForRound(status.LastRound+1, partitionRecoveryTime)
214222
a.NoError(err)
@@ -253,12 +261,16 @@ func TestPartitionHalfOffline(t *testing.T) {
253261
a.NoError(err)
254262
nc3.FullStop()
255263

264+
status, err := client.Status()
265+
a.NoError(err)
266+
roundAfterStop := status.LastRound
267+
256268
time.Sleep(inducePartitionTime)
257269

258270
// Get main client to monitor
259-
status, err := client.Status()
271+
status, err = client.Status()
260272
a.NoError(err)
261-
a.Equal(waitForRound, status.LastRound, "We should not have made progress since stopping the nodes")
273+
a.Equal(roundAfterStop, status.LastRound, "We should not have made progress since stopping the nodes")
262274

263275
// Start 40 of 50% of the stake
264276
_, err = fixture.StartNode(nc1.GetDataDir())

0 commit comments

Comments
 (0)