Skip to content

Commit cceaa9a

Browse files
authored
Only ack cluster state updates successfully applied on all nodes (#30672)
The cluster state acking mechanism currently incorrectly acks cluster state updates that have not successfully been applied on all nodes. In a situation, for example, where some of the nodes disconnect during publishing, and don't acknowledge receiving the new cluster state, the user-facing action (e.g. create index request) will still consider this as an ack.
1 parent 886db84 commit cceaa9a

File tree

5 files changed

+55
-13
lines changed

5 files changed

+55
-13
lines changed

server/src/main/java/org/elasticsearch/cluster/AckedClusterStateTaskListener.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,11 @@
2525
public interface AckedClusterStateTaskListener extends ClusterStateTaskListener {
2626

2727
/**
28-
* Called to determine which nodes the acknowledgement is expected from
28+
* Called to determine which nodes the acknowledgement is expected from.
29+
*
30+
* As this method will be called multiple times to determine the set of acking nodes,
31+
* it is crucial for it to return consistent results: Given the same listener instance
32+
* and the same node parameter, the method implementation should return the same result.
2933
*
3034
* @param discoveryNode a node
3135
* @return true if the node is expected to send ack back, false otherwise

server/src/main/java/org/elasticsearch/cluster/AckedClusterStateUpdateTask.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ public boolean mustAck(DiscoveryNode discoveryNode) {
6161
* @param e optional error that might have been thrown
6262
*/
6363
public void onAllNodesAcked(@Nullable Exception e) {
64-
listener.onResponse(newResponse(true));
64+
listener.onResponse(newResponse(e == null));
6565
}
6666

6767
protected abstract Response newResponse(boolean acknowledged);

server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataMappingService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ public boolean mustAck(DiscoveryNode discoveryNode) {
363363

364364
@Override
365365
public void onAllNodesAcked(@Nullable Exception e) {
366-
listener.onResponse(new ClusterStateUpdateResponse(true));
366+
listener.onResponse(new ClusterStateUpdateResponse(e == null));
367367
}
368368

369369
@Override

server/src/main/java/org/elasticsearch/cluster/service/MasterService.java

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,7 @@ private static class AckCountDownListener implements Discovery.AckListener {
563563

564564
private final AckedClusterStateTaskListener ackedTaskListener;
565565
private final CountDown countDown;
566-
private final DiscoveryNodes nodes;
566+
private final DiscoveryNode masterNode;
567567
private final long clusterStateVersion;
568568
private final Future<?> ackTimeoutCallback;
569569
private Exception lastFailure;
@@ -572,27 +572,23 @@ private static class AckCountDownListener implements Discovery.AckListener {
572572
ThreadPool threadPool) {
573573
this.ackedTaskListener = ackedTaskListener;
574574
this.clusterStateVersion = clusterStateVersion;
575-
this.nodes = nodes;
575+
this.masterNode = nodes.getMasterNode();
576576
int countDown = 0;
577577
for (DiscoveryNode node : nodes) {
578-
if (ackedTaskListener.mustAck(node)) {
578+
//we always wait for at least the master node
579+
if (node.equals(masterNode) || ackedTaskListener.mustAck(node)) {
579580
countDown++;
580581
}
581582
}
582-
//we always wait for at least 1 node (the master)
583-
countDown = Math.max(1, countDown);
584583
logger.trace("expecting {} acknowledgements for cluster_state update (version: {})", countDown, clusterStateVersion);
585584
this.countDown = new CountDown(countDown);
586585
this.ackTimeoutCallback = threadPool.schedule(ackedTaskListener.ackTimeout(), ThreadPool.Names.GENERIC, () -> onTimeout());
587586
}
588587

589588
@Override
590589
public void onNodeAck(DiscoveryNode node, @Nullable Exception e) {
591-
if (!ackedTaskListener.mustAck(node)) {
592-
//we always wait for the master ack anyway
593-
if (!node.equals(nodes.getMasterNode())) {
594-
return;
595-
}
590+
if (node.equals(masterNode) == false && ackedTaskListener.mustAck(node) == false) {
591+
return;
596592
}
597593
if (e == null) {
598594
logger.trace("ack received from node [{}], cluster_state update (version: {})", node, clusterStateVersion);

server/src/test/java/org/elasticsearch/cluster/ack/AckClusterUpdateSettingsIT.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
2424
import org.elasticsearch.action.admin.cluster.settings.ClusterUpdateSettingsResponse;
2525
import org.elasticsearch.action.admin.indices.close.CloseIndexResponse;
26+
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
2627
import org.elasticsearch.action.admin.indices.open.OpenIndexResponse;
2728
import org.elasticsearch.client.Client;
2829
import org.elasticsearch.cluster.ClusterState;
@@ -33,8 +34,16 @@
3334
import org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider;
3435
import org.elasticsearch.common.settings.Settings;
3536
import org.elasticsearch.discovery.DiscoverySettings;
37+
import org.elasticsearch.discovery.zen.PublishClusterStateAction;
38+
import org.elasticsearch.plugins.Plugin;
3639
import org.elasticsearch.test.ESIntegTestCase;
3740
import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
41+
import org.elasticsearch.test.transport.MockTransportService;
42+
import org.elasticsearch.transport.TransportService;
43+
44+
import java.util.Arrays;
45+
import java.util.Collection;
46+
import java.util.stream.Stream;
3847

3948
import static org.elasticsearch.test.ESIntegTestCase.Scope.TEST;
4049
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
@@ -43,6 +52,11 @@
4352
@ClusterScope(scope = TEST, minNumDataNodes = 2)
4453
public class AckClusterUpdateSettingsIT extends ESIntegTestCase {
4554

55+
@Override
56+
protected Collection<Class<? extends Plugin>> nodePlugins() {
57+
return Arrays.asList(MockTransportService.TestPlugin.class);
58+
}
59+
4660
@Override
4761
protected Settings nodeSettings(int nodeOrdinal) {
4862
return Settings.builder()
@@ -156,4 +170,32 @@ public void testOpenIndexNoAcknowledgement() {
156170
assertThat(openIndexResponse.isAcknowledged(), equalTo(false));
157171
ensureGreen("test"); // make sure that recovery from disk has completed, so that check index doesn't fail.
158172
}
173+
174+
public void testAckingFailsIfNotPublishedToAllNodes() {
175+
String masterNode = internalCluster().getMasterName();
176+
String nonMasterNode = Stream.of(internalCluster().getNodeNames())
177+
.filter(node -> node.equals(masterNode) == false).findFirst().get();
178+
179+
MockTransportService masterTransportService =
180+
(MockTransportService) internalCluster().getInstance(TransportService.class, masterNode);
181+
MockTransportService nonMasterTransportService =
182+
(MockTransportService) internalCluster().getInstance(TransportService.class, nonMasterNode);
183+
184+
logger.info("blocking cluster state publishing from master [{}] to non master [{}]", masterNode, nonMasterNode);
185+
if (randomBoolean() && internalCluster().numMasterNodes() != 2) {
186+
masterTransportService.addFailToSendNoConnectRule(nonMasterTransportService, PublishClusterStateAction.SEND_ACTION_NAME);
187+
} else {
188+
masterTransportService.addFailToSendNoConnectRule(nonMasterTransportService, PublishClusterStateAction.COMMIT_ACTION_NAME);
189+
}
190+
191+
CreateIndexResponse response = client().admin().indices().prepareCreate("test").get();
192+
assertFalse(response.isAcknowledged());
193+
194+
logger.info("waiting for cluster to reform");
195+
masterTransportService.clearRule(nonMasterTransportService);
196+
197+
ensureStableCluster(internalCluster().size());
198+
199+
assertAcked(client().admin().indices().prepareDelete("test"));
200+
}
159201
}

0 commit comments

Comments
 (0)