Skip to content

Commit

Permalink
Fix flaky test RecoveryFromGatewayIT.testMultipleReplicaShardAssignme…
Browse files Browse the repository at this point in the history
…ntWithDelayedAllocationAndDifferentNodeStartTimeInBatchMode

Signed-off-by: Swetha Guptha <gupthasg@amazon.com>
  • Loading branch information
Swetha Guptha committed Jun 18, 2024
1 parent 112704b commit 10e4489
Showing 1 changed file with 19 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@

import org.apache.lucene.index.CorruptIndexException;
import org.opensearch.Version;
import org.opensearch.action.admin.cluster.allocation.ClusterAllocationExplainResponse;
import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction;
import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest;
import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsAction;
Expand Down Expand Up @@ -101,6 +100,8 @@
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.function.BooleanSupplier;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

Expand Down Expand Up @@ -883,17 +884,20 @@ public void testMultipleReplicaShardAssignmentWithDelayedAllocationAndDifferentN
assertEquals(YELLOW, health.getStatus());
assertEquals(2, health.getUnassignedShards());
// shard should be unassigned because of Allocation_Delayed
ClusterAllocationExplainResponse allocationExplainResponse = client().admin()
.cluster()
.prepareAllocationExplain()
.setIndex("test")
.setShard(0)
.setPrimary(false)
.get();
assertEquals(
AllocationDecision.ALLOCATION_DELAYED,
allocationExplainResponse.getExplanation().getShardAllocationDecision().getAllocateDecision().getAllocationDecision()
BooleanSupplier delayedShardAllocationStatusVerificationSupplier = () -> AllocationDecision.ALLOCATION_DELAYED.equals(
client().admin()
.cluster()
.prepareAllocationExplain()
.setIndex("test")
.setShard(0)
.setPrimary(false)
.get()
.getExplanation()
.getShardAllocationDecision()
.getAllocateDecision()
.getAllocationDecision()
);
waitUntil(delayedShardAllocationStatusVerificationSupplier, 2, TimeUnit.MINUTES);

logger.info("--> restarting the node 1");
internalCluster().startDataOnlyNode(
Expand All @@ -903,26 +907,16 @@ public void testMultipleReplicaShardAssignmentWithDelayedAllocationAndDifferentN
assertTrue(clusterRerouteResponse.isAcknowledged());
ensureStableCluster(6);
waitUntil(
() -> client().admin().cluster().health(Requests.clusterHealthRequest().timeout("5m")).actionGet().getInitializingShards() == 0
() -> client().admin().cluster().health(Requests.clusterHealthRequest().timeout("5m")).actionGet().getActiveShards() == 3,
2,
TimeUnit.MINUTES
);

health = client().admin().cluster().health(Requests.clusterHealthRequest().timeout("5m")).actionGet();
assertFalse(health.isTimedOut());
assertEquals(YELLOW, health.getStatus());
assertEquals(1, health.getUnassignedShards());
assertEquals(1, health.getDelayedUnassignedShards());
allocationExplainResponse = client().admin()
.cluster()
.prepareAllocationExplain()
.setIndex("test")
.setShard(0)
.setPrimary(false)
.get();
assertEquals(
AllocationDecision.ALLOCATION_DELAYED,
allocationExplainResponse.getExplanation().getShardAllocationDecision().getAllocateDecision().getAllocationDecision()
);

waitUntil(delayedShardAllocationStatusVerificationSupplier, 2, TimeUnit.MINUTES);
logger.info("--> restarting the node 0");
internalCluster().startDataOnlyNode(
Settings.builder().put("node.name", nodesWithReplicaShards.get(1)).put(replicaNode1DataPathSettings).build()
Expand Down

0 comments on commit 10e4489

Please sign in to comment.