Skip to content

Commit 50d2cf3

Browse files
authored
Periodic warning for 1-node cluster w/ seed hosts (#88013)
For fully-formed single-node clusters, emit a periodic warning if seed_hosts has been set to a non-empty list. Closes #85222
1 parent 4cdfe7e commit 50d2cf3

File tree

5 files changed

+126
-0
lines changed

5 files changed

+126
-0
lines changed

docs/changelog/88013.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 88013
2+
summary: Periodic warning for 1-node cluster w/ seed hosts
3+
area: Cluster Coordination
4+
type: enhancement
5+
issues:
6+
- 85222

docs/reference/modules/discovery/discovery-settings.asciidoc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,11 @@ Sets how long the master node waits for each cluster state update to be
201201
completely published to all nodes, unless `discovery.type` is set to
202202
`single-node`. The default value is `30s`. See <<cluster-state-publishing>>.
203203

204+
`cluster.discovery_configuration_check.interval `::
205+
(<<static-cluster-setting,Static>>)
206+
Sets the interval of some checks that will log warnings about an
207+
incorrect discovery configuration. The default value is `30s`.
208+
204209
`cluster.join_validation.cache_timeout`::
205210
(<<static-cluster-setting,Static>>)
206211
When a node requests to join the cluster, the elected master node sends it a

server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292

9393
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_ID;
9494
import static org.elasticsearch.core.Strings.format;
95+
import static org.elasticsearch.discovery.SettingsBasedSeedHostsProvider.DISCOVERY_SEED_HOSTS_SETTING;
9596
import static org.elasticsearch.gateway.ClusterStateUpdaters.hideStateIfNotRecovered;
9697
import static org.elasticsearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK;
9798
import static org.elasticsearch.monitor.StatusInfo.Status.UNHEALTHY;
@@ -116,6 +117,13 @@ public class Coordinator extends AbstractLifecycleComponent implements ClusterSt
116117
Setting.Property.NodeScope
117118
);
118119

120+
public static final Setting<TimeValue> SINGLE_NODE_CLUSTER_SEED_HOSTS_CHECK_INTERVAL_SETTING = Setting.timeSetting(
121+
"cluster.discovery_configuration_check.interval",
122+
TimeValue.timeValueMillis(30000),
123+
TimeValue.timeValueMillis(1),
124+
Setting.Property.NodeScope
125+
);
126+
119127
public static final String COMMIT_STATE_ACTION_NAME = "internal:cluster/coordination/commit_state";
120128

121129
private final Settings settings;
@@ -140,6 +148,9 @@ public class Coordinator extends AbstractLifecycleComponent implements ClusterSt
140148
private final SeedHostsResolver configuredHostsResolver;
141149
private final TimeValue publishTimeout;
142150
private final TimeValue publishInfoTimeout;
151+
private final TimeValue singleNodeClusterSeedHostsCheckInterval;
152+
@Nullable
153+
private Scheduler.Cancellable singleNodeClusterChecker = null;
143154
private final PublicationTransportHandler publicationHandler;
144155
private final LeaderChecker leaderChecker;
145156
private final FollowersChecker followersChecker;
@@ -218,6 +229,7 @@ public Coordinator(
218229
this.joinAccumulator = new InitialJoinAccumulator();
219230
this.publishTimeout = PUBLISH_TIMEOUT_SETTING.get(settings);
220231
this.publishInfoTimeout = PUBLISH_INFO_TIMEOUT_SETTING.get(settings);
232+
this.singleNodeClusterSeedHostsCheckInterval = SINGLE_NODE_CLUSTER_SEED_HOSTS_CHECK_INTERVAL_SETTING.get(settings);
221233
this.random = random;
222234
this.electionSchedulerFactory = new ElectionSchedulerFactory(settings, random, transportService.getThreadPool());
223235
this.preVoteCollector = new PreVoteCollector(
@@ -739,6 +751,38 @@ private void processJoinRequest(JoinRequest joinRequest, ActionListener<Void> jo
739751
}
740752
}
741753

754+
private void cancelSingleNodeClusterChecker() {
755+
assert Thread.holdsLock(mutex) : "Coordinator mutex not held";
756+
if (singleNodeClusterChecker != null) {
757+
singleNodeClusterChecker.cancel();
758+
singleNodeClusterChecker = null;
759+
}
760+
}
761+
762+
private void checkSingleNodeCluster() {
763+
if (applierState.nodes().size() > 1) {
764+
return;
765+
}
766+
767+
if (DISCOVERY_SEED_HOSTS_SETTING.exists(settings)) {
768+
if (DISCOVERY_SEED_HOSTS_SETTING.get(settings).isEmpty()) {
769+
// For a single-node cluster, the only acceptable setting is an empty list.
770+
return;
771+
} else {
772+
logger.warn(
773+
"""
774+
This node is a fully-formed single-node cluster with cluster UUID [{}], but it is configured as if to \
775+
discover other nodes and form a multi-node cluster via the [{}] setting. Fully-formed clusters do not \
776+
attempt to discover other nodes, and nodes with different cluster UUIDs cannot belong to the same cluster. \
777+
The cluster UUID persists across restarts and can only be changed by deleting the contents of the node's \
778+
data path(s). Remove the discovery configuration to suppress this message.""",
779+
applierState.metadata().clusterUUID(),
780+
DISCOVERY_SEED_HOSTS_SETTING.getKey() + "=" + DISCOVERY_SEED_HOSTS_SETTING.get(settings)
781+
);
782+
}
783+
}
784+
}
785+
742786
void becomeCandidate(String method) {
743787
assert Thread.holdsLock(mutex) : "Coordinator mutex not held";
744788
logger.debug(
@@ -748,6 +792,7 @@ void becomeCandidate(String method) {
748792
mode,
749793
lastKnownLeader
750794
);
795+
cancelSingleNodeClusterChecker();
751796

752797
if (mode != Mode.CANDIDATE) {
753798
final Mode prevMode = mode;
@@ -803,6 +848,13 @@ private void becomeLeader() {
803848

804849
assert leaderChecker.leader() == null : leaderChecker.leader();
805850
followersChecker.updateFastResponseState(getCurrentTerm(), mode);
851+
852+
if (applierState.nodes().size() > 1) {
853+
cancelSingleNodeClusterChecker();
854+
} else if (singleNodeClusterChecker == null) {
855+
singleNodeClusterChecker = transportService.getThreadPool()
856+
.scheduleWithFixedDelay(() -> { checkSingleNodeCluster(); }, this.singleNodeClusterSeedHostsCheckInterval, Names.SAME);
857+
}
806858
}
807859

808860
void becomeFollower(String method, DiscoveryNode leaderNode) {
@@ -822,6 +874,7 @@ void becomeFollower(String method, DiscoveryNode leaderNode) {
822874
lastKnownLeader
823875
);
824876
}
877+
cancelSingleNodeClusterChecker();
825878

826879
final boolean restartLeaderChecker = (mode == Mode.FOLLOWER && Optional.of(leaderNode).equals(lastKnownLeader)) == false;
827880

@@ -1028,6 +1081,10 @@ assert getLocalNode().equals(applierState.nodes().getMasterNode())
10281081
: coordinationState.get().getLastAcceptedConfiguration()
10291082
+ " != "
10301083
+ coordinationState.get().getLastCommittedConfiguration();
1084+
1085+
if (coordinationState.get().getLastAcceptedState().nodes().size() == 1) {
1086+
assert singleNodeClusterChecker != null;
1087+
}
10311088
} else if (mode == Mode.FOLLOWER) {
10321089
assert coordinationState.get().electionWon() == false : getLocalNode() + " is FOLLOWER so electionWon() should be false";
10331090
assert lastKnownLeader.isPresent() && (lastKnownLeader.get().equals(getLocalNode()) == false);
@@ -1045,6 +1102,7 @@ assert getLocalNode().equals(applierState.nodes().getMasterNode())
10451102
assert currentPublication.map(Publication::isCommitted).orElse(true);
10461103
assert preVoteCollector.getLeader().equals(lastKnownLeader.get()) : preVoteCollector;
10471104
assert clusterFormationFailureHelper.isRunning() == false;
1105+
assert singleNodeClusterChecker == null;
10481106
} else {
10491107
assert mode == Mode.CANDIDATE;
10501108
assert joinAccumulator instanceof JoinHelper.CandidateJoinAccumulator;

server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,7 @@ public void apply(Settings value, Settings current, Settings previous) {
492492
ElectionSchedulerFactory.ELECTION_DURATION_SETTING,
493493
Coordinator.PUBLISH_TIMEOUT_SETTING,
494494
Coordinator.PUBLISH_INFO_TIMEOUT_SETTING,
495+
Coordinator.SINGLE_NODE_CLUSTER_SEED_HOSTS_CHECK_INTERVAL_SETTING,
495496
JoinValidationService.JOIN_VALIDATION_CACHE_TIMEOUT_SETTING,
496497
FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING,
497498
FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING,

server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_WRITES;
7878
import static org.elasticsearch.cluster.coordination.Reconfigurator.CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION;
7979
import static org.elasticsearch.discovery.PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_SETTING;
80+
import static org.elasticsearch.discovery.SettingsBasedSeedHostsProvider.DISCOVERY_SEED_HOSTS_SETTING;
8081
import static org.elasticsearch.monitor.StatusInfo.Status.HEALTHY;
8182
import static org.elasticsearch.monitor.StatusInfo.Status.UNHEALTHY;
8283
import static org.elasticsearch.test.NodeRoles.nonMasterNode;
@@ -2109,6 +2110,61 @@ public void assertMatched() {
21092110
}
21102111
}
21112112

2113+
@TestLogging(
2114+
reason = "testing warning of a single-node cluster having discovery seed hosts",
2115+
value = "org.elasticsearch.cluster.coordination.Coordinator:WARN"
2116+
)
2117+
public void testLogsWarningPeriodicallyIfSingleNodeClusterHasSeedHosts() throws IllegalAccessException {
2118+
final long warningDelayMillis;
2119+
final Settings settings;
2120+
final String fakeSeedHost = buildNewFakeTransportAddress().toString();
2121+
if (randomBoolean()) {
2122+
settings = Settings.builder().putList(DISCOVERY_SEED_HOSTS_SETTING.getKey(), fakeSeedHost).build();
2123+
warningDelayMillis = Coordinator.SINGLE_NODE_CLUSTER_SEED_HOSTS_CHECK_INTERVAL_SETTING.get(settings).millis();
2124+
} else {
2125+
warningDelayMillis = randomLongBetween(1, 100000);
2126+
settings = Settings.builder()
2127+
.put(ClusterFormationFailureHelper.DISCOVERY_CLUSTER_FORMATION_WARNING_TIMEOUT_SETTING.getKey(), warningDelayMillis + "ms")
2128+
.putList(DISCOVERY_SEED_HOSTS_SETTING.getKey(), fakeSeedHost)
2129+
.build();
2130+
}
2131+
logger.info("--> emitting warnings every [{}ms]", warningDelayMillis);
2132+
2133+
try (Cluster cluster = new Cluster(1, true, settings)) {
2134+
cluster.runRandomly();
2135+
cluster.stabilise();
2136+
2137+
for (int i = scaledRandomIntBetween(1, 10); i >= 0; i--) {
2138+
final MockLogAppender mockLogAppender = new MockLogAppender();
2139+
try {
2140+
mockLogAppender.start();
2141+
Loggers.addAppender(LogManager.getLogger(Coordinator.class), mockLogAppender);
2142+
mockLogAppender.addExpectation(new MockLogAppender.LoggingExpectation() {
2143+
String loggedClusterUuid;
2144+
2145+
@Override
2146+
public void match(LogEvent event) {
2147+
final String message = event.getMessage().getFormattedMessage();
2148+
assertThat(message, startsWith("This node is a fully-formed single-node cluster with cluster UUID"));
2149+
loggedClusterUuid = (String) event.getMessage().getParameters()[0];
2150+
}
2151+
2152+
@Override
2153+
public void assertMatched() {
2154+
final String clusterUuid = cluster.getAnyNode().getLastAppliedClusterState().metadata().clusterUUID();
2155+
assertThat(loggedClusterUuid + " vs " + clusterUuid, clusterUuid, equalTo(clusterUuid));
2156+
}
2157+
});
2158+
cluster.runFor(warningDelayMillis + DEFAULT_DELAY_VARIABILITY, "waiting for warning to be emitted");
2159+
mockLogAppender.assertAllExpectationsMatched();
2160+
} finally {
2161+
Loggers.removeAppender(LogManager.getLogger(Coordinator.class), mockLogAppender);
2162+
mockLogAppender.stop();
2163+
}
2164+
}
2165+
}
2166+
}
2167+
21122168
@TestLogging(
21132169
reason = "testing LagDetector and CoordinatorPublication logging",
21142170
value = "org.elasticsearch.cluster.coordination.LagDetector:DEBUG,"

0 commit comments

Comments
 (0)