From 17bb40ebf17262a80c20e2967c595a30c2e5866a Mon Sep 17 00:00:00 2001 From: kkewwei Date: Wed, 26 Jun 2024 03:12:43 +0800 Subject: [PATCH] Fix Flaky Test ClusterRerouteIT.testDelayWithALargeAmountOfShards (#14510) Signed-off-by: kkewwei kkewwei@163.com Signed-off-by: kkewwei kkewwei@163.com Signed-off-by: kkewwei --- .../cluster/allocation/ClusterRerouteIT.java | 3 ++- .../opensearch/test/OpenSearchIntegTestCase.java | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/allocation/ClusterRerouteIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/allocation/ClusterRerouteIT.java index dbcb030d8a4f7..f4b5f112f5785 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/allocation/ClusterRerouteIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/allocation/ClusterRerouteIT.java @@ -273,7 +273,8 @@ public void testDelayWithALargeAmountOfShards() throws Exception { internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node_1)); // This might run slowly on older hardware - ensureGreen(TimeValue.timeValueMinutes(2)); + // In some case, the shards will be rebalanced back and forth, it seems like a very low probability bug. + ensureGreen(TimeValue.timeValueMinutes(2), false); } private void rerouteWithAllocateLocalGateway(Settings commonSettings) throws Exception { diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java index 71ab56c98312a..ca5ddf21710af 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java @@ -864,6 +864,10 @@ public ClusterHealthStatus ensureGreen(TimeValue timeout, String... indices) { return ensureColor(ClusterHealthStatus.GREEN, timeout, false, indices); } + public ClusterHealthStatus ensureGreen(TimeValue timeout, boolean waitForNoRelocatingShards, String... indices) { + return ensureColor(ClusterHealthStatus.GREEN, timeout, waitForNoRelocatingShards, false, indices); + } + /** * Ensures the cluster has a yellow state via the cluster health API. */ @@ -891,6 +895,16 @@ private ClusterHealthStatus ensureColor( TimeValue timeout, boolean waitForNoInitializingShards, String... indices + ) { + return ensureColor(clusterHealthStatus, timeout, true, waitForNoInitializingShards, indices); + } + + private ClusterHealthStatus ensureColor( + ClusterHealthStatus clusterHealthStatus, + TimeValue timeout, + boolean waitForNoRelocatingShards, + boolean waitForNoInitializingShards, + String... indices ) { String color = clusterHealthStatus.name().toLowerCase(Locale.ROOT); String method = "ensure" + Strings.capitalize(color); @@ -899,7 +913,7 @@ private ClusterHealthStatus ensureColor( .timeout(timeout) .waitForStatus(clusterHealthStatus) .waitForEvents(Priority.LANGUID) - .waitForNoRelocatingShards(true) + .waitForNoRelocatingShards(waitForNoRelocatingShards) .waitForNoInitializingShards(waitForNoInitializingShards) // We currently often use ensureGreen or ensureYellow to check whether the cluster is back in a good state after shutting down // a node. If the node that is stopped is the cluster-manager node, another node will become cluster-manager and publish a