Skip to content

Commit fd73931

Browse files
committed
HBASE-22964 Fix flaky TestClusterRestartFailover and TestClusterRestartFailoverSplitWithoutZk
1 parent 1d1813d commit fd73931

File tree

1 file changed

+33
-31
lines changed

1 file changed

+33
-31
lines changed

hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java

Lines changed: 33 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,14 @@
1717
*/
1818
package org.apache.hadoop.hbase.master;
1919

20+
import static org.junit.Assert.assertFalse;
21+
import static org.junit.Assert.assertNotNull;
22+
import static org.junit.Assert.assertNull;
23+
import static org.junit.Assert.assertTrue;
24+
2025
import java.util.List;
2126
import java.util.stream.Collectors;
27+
2228
import org.apache.hadoop.hbase.HBaseClassTestRule;
2329
import org.apache.hadoop.hbase.ServerName;
2430
import org.apache.hadoop.hbase.TableName;
@@ -29,7 +35,6 @@
2935
import org.apache.hadoop.hbase.procedure2.Procedure;
3036
import org.apache.hadoop.hbase.testclassification.LargeTests;
3137
import org.apache.hadoop.hbase.testclassification.MasterTests;
32-
import org.junit.Assert;
3338
import org.junit.ClassRule;
3439
import org.junit.Test;
3540
import org.junit.experimental.categories.Category;
@@ -61,54 +66,51 @@ public void test() throws Exception {
6166
UTIL.waitFor(60000, () -> UTIL.getMiniHBaseCluster().getMaster().isInitialized());
6267
// wait for all SCPs finished
6368
UTIL.waitFor(60000, () -> UTIL.getHBaseCluster().getMaster().getProcedures().stream()
64-
.noneMatch(p -> p instanceof ServerCrashProcedure));
69+
.noneMatch(p -> p instanceof ServerCrashProcedure));
6570
TableName tableName = TABLES[0];
6671
ServerName testServer = UTIL.getHBaseCluster().getRegionServer(0).getServerName();
6772
UTIL.waitFor(30000, () -> getServerStateNode(testServer) != null);
6873
ServerStateNode serverNode = getServerStateNode(testServer);
69-
Assert.assertNotNull(serverNode);
70-
Assert.assertTrue("serverNode should be ONLINE when cluster runs normally",
71-
serverNode.isInState(ServerState.ONLINE));
74+
assertNotNull(serverNode);
75+
assertTrue("serverNode should be ONLINE when cluster runs normally",
76+
serverNode.isInState(ServerState.ONLINE));
7277
UTIL.createMultiRegionTable(tableName, FAMILY);
7378
UTIL.waitTableEnabled(tableName);
7479
Table table = UTIL.getConnection().getTable(tableName);
7580
for (int i = 0; i < 100; i++) {
7681
UTIL.loadTable(table, FAMILY);
7782
}
7883
List<Integer> ports =
79-
UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServersList().stream()
80-
.map(serverName -> serverName.getPort()).collect(Collectors.toList());
84+
UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServersList().stream()
85+
.map(serverName -> serverName.getPort()).collect(Collectors.toList());
8186
LOG.info("Shutting down cluster");
8287
UTIL.getHBaseCluster().killAll();
8388
UTIL.getHBaseCluster().waitUntilShutDown();
8489
LOG.info("Starting cluster the second time");
8590
UTIL.restartHBaseCluster(3, ports);
86-
UTIL.waitFor(30000, () -> UTIL.getHBaseCluster().getMaster().isInitialized());
91+
UTIL.waitFor(60000, () -> UTIL.getHBaseCluster().getMaster().isInitialized());
8792
serverNode = UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates()
88-
.getServerNode(testServer);
89-
Assert.assertNotNull("serverNode should not be null when restart whole cluster", serverNode);
90-
Assert.assertFalse(serverNode.isInState(ServerState.ONLINE));
91-
LOG.info("start to find the procedure of SCP for the severName we choose");
92-
UTIL.waitFor(60000,
93-
() -> UTIL.getHBaseCluster().getMaster().getProcedures().stream()
94-
.anyMatch(procedure -> (procedure instanceof ServerCrashProcedure) &&
95-
((ServerCrashProcedure) procedure).getServerName().equals(testServer)));
96-
Assert.assertFalse("serverNode should not be ONLINE during SCP processing",
97-
serverNode.isInState(ServerState.ONLINE));
98-
LOG.info("start to submit the SCP for the same serverName {} which should fail", testServer);
99-
Assert
100-
.assertFalse(UTIL.getHBaseCluster().getMaster().getServerManager().expireServer(testServer));
101-
Procedure<?> procedure = UTIL.getHBaseCluster().getMaster().getProcedures().stream()
102-
.filter(p -> (p instanceof ServerCrashProcedure) &&
103-
((ServerCrashProcedure) p).getServerName().equals(testServer))
104-
.findAny().get();
105-
UTIL.waitFor(60000, () -> procedure.isFinished());
93+
.getServerNode(testServer);
94+
if (serverNode != null) {
95+
assertFalse(serverNode.isInState(ServerState.ONLINE));
96+
LOG.info("start to find the procedure of SCP for the severName we choose");
97+
Procedure<?> procedure = UTIL.getHBaseCluster().getMaster().getProcedures().stream().filter(
98+
p -> (p instanceof ServerCrashProcedure) &&
99+
((ServerCrashProcedure) p).getServerName().equals(testServer)).findAny().get();
100+
if (procedure != null) {
101+
assertFalse("serverNode should not be ONLINE during SCP processing",
102+
serverNode.isInState(ServerState.ONLINE));
103+
LOG.info("start to submit the SCP for the same serverName {} which should fail",
104+
testServer);
105+
assertFalse(UTIL.getHBaseCluster().getMaster().getServerManager().expireServer(testServer));
106+
UTIL.waitFor(60000, () -> procedure.isFinished());
107+
}
108+
}
106109
LOG.info("even when the SCP is finished, the duplicate SCP should not be scheduled for {}",
107-
testServer);
108-
Assert
109-
.assertFalse(UTIL.getHBaseCluster().getMaster().getServerManager().expireServer(testServer));
110+
testServer);
111+
assertFalse(UTIL.getHBaseCluster().getMaster().getServerManager().expireServer(testServer));
110112
serverNode = UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates()
111-
.getServerNode(testServer);
112-
Assert.assertNull("serverNode should be deleted after SCP finished", serverNode);
113+
.getServerNode(testServer);
114+
assertNull("serverNode should be deleted after SCP finished", serverNode);
113115
}
114116
}

0 commit comments

Comments
 (0)