Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix AwarenessAttributeDecommissionIT.testConcurrentDecommissionAction #14372

Merged
merged 1 commit into from
Jun 15, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -539,18 +539,7 @@ private void assertNodesRemovedAfterZoneDecommission(boolean originalClusterMana
assertEquals(originalClusterManager, currentClusterManager);
}

// Will wait for all events to complete
client(activeNode).admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get();

// Recommissioning the zone back to gracefully succeed the test once above tests succeeds
DeleteDecommissionStateResponse deleteDecommissionStateResponse = client(currentClusterManager).execute(
DeleteDecommissionStateAction.INSTANCE,
new DeleteDecommissionStateRequest()
).get();
assertTrue(deleteDecommissionStateResponse.isAcknowledged());

// will wait for cluster to stabilise with a timeout of 2 min as by then all nodes should have joined the cluster
ensureStableCluster(15, TimeValue.timeValueMinutes(2));
deleteDecommissionStateAndWaitForStableCluster(currentClusterManager, 15);
}

public void testDecommissionFailedWhenDifferentAttributeAlreadyDecommissioned() throws Exception {
Expand Down Expand Up @@ -617,18 +606,7 @@ public void testDecommissionFailedWhenDifferentAttributeAlreadyDecommissioned()
)
);

// Will wait for all events to complete
client(node_in_c).admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get();

// Recommissioning the zone back to gracefully succeed the test once above tests succeeds
DeleteDecommissionStateResponse deleteDecommissionStateResponse = client(node_in_c).execute(
DeleteDecommissionStateAction.INSTANCE,
new DeleteDecommissionStateRequest()
).get();
assertTrue(deleteDecommissionStateResponse.isAcknowledged());

// will wait for cluster to stabilise with a timeout of 2 min as by then all nodes should have joined the cluster
ensureStableCluster(6, TimeValue.timeValueMinutes(2));
deleteDecommissionStateAndWaitForStableCluster(node_in_c, 6);
}

public void testDecommissionStatusUpdatePublishedToAllNodes() throws ExecutionException, InterruptedException {
Expand Down Expand Up @@ -748,20 +726,7 @@ public void testDecommissionStatusUpdatePublishedToAllNodes() throws ExecutionEx
);
logger.info("--> Verified the decommissioned node has in_progress state.");

// Will wait for all events to complete
client(activeNode).admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get();
logger.info("--> Got LANGUID event");
// Recommissioning the zone back to gracefully succeed the test once above tests succeeds
DeleteDecommissionStateResponse deleteDecommissionStateResponse = client(activeNode).execute(
DeleteDecommissionStateAction.INSTANCE,
new DeleteDecommissionStateRequest()
).get();
assertTrue(deleteDecommissionStateResponse.isAcknowledged());
logger.info("--> Deleting decommission done.");

// will wait for cluster to stabilise with a timeout of 2 min (findPeerInterval for decommissioned nodes)
// as by then all nodes should have joined the cluster
ensureStableCluster(6, TimeValue.timeValueSeconds(121));
deleteDecommissionStateAndWaitForStableCluster(activeNode, 6);
}

public void testDecommissionFailedWhenAttributeNotWeighedAway() throws Exception {
Expand Down Expand Up @@ -983,15 +948,7 @@ public void testDecommissionAcknowledgedIfWeightsNotSetForNonRoutingNode() throw
assertEquals(clusterState.nodes().getDataNodes().size(), 3);
assertEquals(clusterState.nodes().getClusterManagerNodes().size(), 2);

// Recommissioning the zone back to gracefully succeed the test once above tests succeeds
DeleteDecommissionStateResponse deleteDecommissionStateResponse = client(dataNodes.get(0)).execute(
DeleteDecommissionStateAction.INSTANCE,
new DeleteDecommissionStateRequest()
).get();
assertTrue(deleteDecommissionStateResponse.isAcknowledged());

// will wait for cluster to stabilise with a timeout of 2 min as by then all nodes should have joined the cluster
ensureStableCluster(6, TimeValue.timeValueMinutes(2));
deleteDecommissionStateAndWaitForStableCluster(dataNodes.get(0), 6);
}

public void testConcurrentDecommissionAction() throws Exception {
Expand Down Expand Up @@ -1019,7 +976,7 @@ public void testConcurrentDecommissionAction() throws Exception {
.build()
);
logger.info("--> start 3 data nodes on zones 'a' & 'b' & 'c'");
internalCluster().startNodes(
final String bZoneDataNode = internalCluster().startNodes(
Settings.builder()
.put(commonSettings)
.put("node.attr.zone", "a")
Expand All @@ -1035,7 +992,7 @@ public void testConcurrentDecommissionAction() throws Exception {
.put("node.attr.zone", "c")
.put(onlyRole(commonSettings, DiscoveryNodeRole.DATA_ROLE))
.build()
);
).get(1);

ensureStableCluster(6);
ClusterHealthResponse health = client().admin()
Expand Down Expand Up @@ -1100,6 +1057,25 @@ public void testConcurrentDecommissionAction() throws Exception {
assertEquals(concurrentRuns, numRequestAcknowledged.get() + numRequestUnAcknowledged.get() + numRequestFailed.get());
assertEquals(concurrentRuns - 1, numRequestFailed.get());
assertEquals(1, numRequestAcknowledged.get() + numRequestUnAcknowledged.get());

deleteDecommissionStateAndWaitForStableCluster(bZoneDataNode, 6);
}

private void deleteDecommissionStateAndWaitForStableCluster(String activeNodeName, int expectedClusterSize) throws ExecutionException,
InterruptedException {
client(activeNodeName).admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get();

// Recommissioning the zone back to gracefully succeed the test once above tests succeeds
DeleteDecommissionStateResponse deleteDecommissionStateResponse = client(activeNodeName).execute(
DeleteDecommissionStateAction.INSTANCE,
new DeleteDecommissionStateRequest()
).get();
assertTrue(deleteDecommissionStateResponse.isAcknowledged());
logger.info("--> Deleting decommission done.");

// will wait for cluster to stabilise with a timeout of 2 min (findPeerInterval for decommissioned nodes)
// as by then all nodes should have joined the cluster
ensureStableCluster(expectedClusterSize, TimeValue.timeValueSeconds(121));
}

private static class WaitForFailedDecommissionState implements ClusterStateObserver.Listener {
Expand Down
Loading