Skip to content

Commit 5347dec

Browse files
authored
Allow ILM to stop if indices have nonexistent policies (#40820)
Prior to this PR, there is a bug in ILM which does not allow ILM to stop if one or more indices have an index.lifecycle.name which refers to a policy that does not exist - the operation_mode will be stuck as STOPPING until either the policy is created or the nonexistent policy is removed from those indices. This change allows ILM to stop in this case and makes the logging more clear as to why ILM is not stopping.
1 parent 65d2518 commit 5347dec

File tree

2 files changed

+65
-22
lines changed

2 files changed

+65
-22
lines changed

x-pack/plugin/ilm/qa/multi-node/src/test/java/org/elasticsearch/xpack/indexlifecycle/TimeSeriesLifecycleActionsIT.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,28 @@ public void testMoveToInjectedStep() throws Exception {
812812
});
813813
}
814814

815+
public void testCanStopILMWithPolicyUsingNonexistentPolicy() throws Exception {
816+
createIndexWithSettings(index, Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
817+
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
818+
.put(LifecycleSettings.LIFECYCLE_NAME_SETTING.getKey(), randomAlphaOfLengthBetween(5,15)));
819+
820+
Request stopILMRequest = new Request("POST", "_ilm/stop");
821+
assertOK(client().performRequest(stopILMRequest));
822+
823+
Request statusRequest = new Request("GET", "_ilm/status");
824+
assertBusy(() -> {
825+
Response statusResponse = client().performRequest(statusRequest);
826+
assertOK(statusResponse);
827+
Map<String, Object> statusResponseMap = entityAsMap(statusResponse);
828+
String status = (String) statusResponseMap.get("operation_mode");
829+
assertEquals("STOPPED", status);
830+
});
831+
832+
// Re-start ILM so that subsequent tests don't fail
833+
Request startILMReqest = new Request("POST", "_ilm/start");
834+
assertOK(client().performRequest(startILMReqest));
835+
}
836+
815837
private void createFullPolicy(TimeValue hotTime) throws IOException {
816838
Map<String, LifecycleAction> hotActions = new HashMap<>();
817839
hotActions.put(SetPriorityAction.NAME, new SetPriorityAction(100));

x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/indexlifecycle/IndexLifecycleService.java

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
* A service which runs the {@link LifecyclePolicy}s associated with indexes.
4444
*/
4545
public class IndexLifecycleService
46-
implements ClusterStateListener, ClusterStateApplier, SchedulerEngine.Listener, Closeable, LocalNodeMasterListener {
46+
implements ClusterStateListener, ClusterStateApplier, SchedulerEngine.Listener, Closeable, LocalNodeMasterListener {
4747
private static final Logger logger = LogManager.getLogger(IndexLifecycleService.class);
4848
private static final Set<String> IGNORE_ACTIONS_MAINTENANCE_REQUESTED = Collections.singleton(ShrinkAction.NAME);
4949
private volatile boolean isMaster = false;
@@ -111,18 +111,26 @@ public void onMaster() {
111111
IndexMetaData idxMeta = cursor.value;
112112
String policyName = LifecycleSettings.LIFECYCLE_NAME_SETTING.get(idxMeta.getSettings());
113113
if (Strings.isNullOrEmpty(policyName) == false) {
114-
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(LifecycleExecutionState.fromIndexMetadata(idxMeta));
115-
if (OperationMode.STOPPING == currentMode &&
116-
stepKey != null &&
117-
IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction()) == false) {
118-
logger.info("skipping policy [{}] for index [{}]. stopping Index Lifecycle execution",
119-
policyName, idxMeta.getIndex().getName());
120-
continue;
114+
final LifecycleExecutionState lifecycleState = LifecycleExecutionState.fromIndexMetadata(idxMeta);
115+
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(lifecycleState);
116+
117+
if (OperationMode.STOPPING == currentMode) {
118+
if (stepKey != null && IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction())) {
119+
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in action [{}]",
120+
idxMeta.getIndex().getName(), policyName, stepKey.getAction());
121+
lifecycleRunner.maybeRunAsyncAction(clusterState, idxMeta, policyName, stepKey);
122+
// ILM is trying to stop, but this index is in a Shrink action (or other dangerous action) so we can't stop
123+
safeToStop = false;
124+
} else {
125+
logger.info("skipping policy execution for index [{}] with policy [{}] because ILM is stopping",
126+
idxMeta.getIndex().getName(), policyName);
127+
}
128+
} else {
129+
lifecycleRunner.maybeRunAsyncAction(clusterState, idxMeta, policyName, stepKey);
121130
}
122-
lifecycleRunner.maybeRunAsyncAction(clusterState, idxMeta, policyName, stepKey);
123-
safeToStop = false; // proven false!
124131
}
125132
}
133+
126134
if (safeToStop && OperationMode.STOPPING == currentMode) {
127135
submitOperationModeUpdate(OperationMode.STOPPED);
128136
}
@@ -184,7 +192,7 @@ public void clusterChanged(ClusterChangedEvent event) {
184192
@Override
185193
public void applyClusterState(ClusterChangedEvent event) {
186194
if (event.localNodeMaster()) { // only act if we are master, otherwise
187-
// keep idle until elected
195+
// keep idle until elected
188196
if (event.state().metaData().custom(IndexLifecycleMetadata.TYPE) != null) {
189197
policyRegistry.update(event.state());
190198
}
@@ -237,21 +245,34 @@ void triggerPolicies(ClusterState clusterState, boolean fromClusterStateChange)
237245
IndexMetaData idxMeta = cursor.value;
238246
String policyName = LifecycleSettings.LIFECYCLE_NAME_SETTING.get(idxMeta.getSettings());
239247
if (Strings.isNullOrEmpty(policyName) == false) {
240-
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(LifecycleExecutionState.fromIndexMetadata(idxMeta));
241-
if (OperationMode.STOPPING == currentMode && stepKey != null
242-
&& IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction()) == false) {
243-
logger.info("skipping policy [" + policyName + "] for index [" + idxMeta.getIndex().getName()
244-
+ "]. stopping Index Lifecycle execution");
245-
continue;
246-
}
247-
if (fromClusterStateChange) {
248-
lifecycleRunner.runPolicyAfterStateChange(policyName, idxMeta);
248+
final LifecycleExecutionState lifecycleState = LifecycleExecutionState.fromIndexMetadata(idxMeta);
249+
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(lifecycleState);
250+
251+
if (OperationMode.STOPPING == currentMode) {
252+
if (stepKey != null && IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction())) {
253+
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in action [{}]",
254+
idxMeta.getIndex().getName(), policyName, stepKey.getAction());
255+
if (fromClusterStateChange) {
256+
lifecycleRunner.runPolicyAfterStateChange(policyName, idxMeta);
257+
} else {
258+
lifecycleRunner.runPeriodicStep(policyName, idxMeta);
259+
}
260+
// ILM is trying to stop, but this index is in a Shrink action (or other dangerous action) so we can't stop
261+
safeToStop = false;
262+
} else {
263+
logger.info("skipping policy execution for index [{}] with policy [{}] because ILM is stopping",
264+
idxMeta.getIndex().getName(), policyName);
265+
}
249266
} else {
250-
lifecycleRunner.runPeriodicStep(policyName, idxMeta);
267+
if (fromClusterStateChange) {
268+
lifecycleRunner.runPolicyAfterStateChange(policyName, idxMeta);
269+
} else {
270+
lifecycleRunner.runPeriodicStep(policyName, idxMeta);
271+
}
251272
}
252-
safeToStop = false; // proven false!
253273
}
254274
}
275+
255276
if (safeToStop && OperationMode.STOPPING == currentMode) {
256277
submitOperationModeUpdate(OperationMode.STOPPED);
257278
}

0 commit comments

Comments
 (0)