Skip to content

Commit b1b30ed

Browse files
authored
Allow ILM to stop if indices have nonexistent policies (#40820)
Prior to this PR, there is a bug in ILM which does not allow ILM to stop if one or more indices have an index.lifecycle.name which refers to a policy that does not exist - the operation_mode will be stuck as STOPPING until either the policy is created or the nonexistent policy is removed from those indices. This change allows ILM to stop in this case and makes the logging more clear as to why ILM is not stopping.
1 parent 45e981f commit b1b30ed

File tree

2 files changed

+65
-22
lines changed

2 files changed

+65
-22
lines changed

x-pack/plugin/ilm/qa/multi-node/src/test/java/org/elasticsearch/xpack/indexlifecycle/TimeSeriesLifecycleActionsIT.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -813,6 +813,28 @@ public void testMoveToInjectedStep() throws Exception {
813813
});
814814
}
815815

816+
public void testCanStopILMWithPolicyUsingNonexistentPolicy() throws Exception {
817+
createIndexWithSettings(index, Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
818+
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
819+
.put(LifecycleSettings.LIFECYCLE_NAME_SETTING.getKey(), randomAlphaOfLengthBetween(5,15)));
820+
821+
Request stopILMRequest = new Request("POST", "_ilm/stop");
822+
assertOK(client().performRequest(stopILMRequest));
823+
824+
Request statusRequest = new Request("GET", "_ilm/status");
825+
assertBusy(() -> {
826+
Response statusResponse = client().performRequest(statusRequest);
827+
assertOK(statusResponse);
828+
Map<String, Object> statusResponseMap = entityAsMap(statusResponse);
829+
String status = (String) statusResponseMap.get("operation_mode");
830+
assertEquals("STOPPED", status);
831+
});
832+
833+
// Re-start ILM so that subsequent tests don't fail
834+
Request startILMReqest = new Request("POST", "_ilm/start");
835+
assertOK(client().performRequest(startILMReqest));
836+
}
837+
816838
private void createFullPolicy(TimeValue hotTime) throws IOException {
817839
Map<String, LifecycleAction> hotActions = new HashMap<>();
818840
hotActions.put(SetPriorityAction.NAME, new SetPriorityAction(100));

x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/indexlifecycle/IndexLifecycleService.java

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
* A service which runs the {@link LifecyclePolicy}s associated with indexes.
4545
*/
4646
public class IndexLifecycleService extends AbstractComponent
47-
implements ClusterStateListener, ClusterStateApplier, SchedulerEngine.Listener, Closeable, LocalNodeMasterListener {
47+
implements ClusterStateListener, ClusterStateApplier, SchedulerEngine.Listener, Closeable, LocalNodeMasterListener {
4848
private static final Logger logger = LogManager.getLogger(IndexLifecycleService.class);
4949
private static final Set<String> IGNORE_ACTIONS_MAINTENANCE_REQUESTED = Collections.singleton(ShrinkAction.NAME);
5050
private volatile boolean isMaster = false;
@@ -111,18 +111,26 @@ public void onMaster() {
111111
IndexMetaData idxMeta = cursor.value;
112112
String policyName = LifecycleSettings.LIFECYCLE_NAME_SETTING.get(idxMeta.getSettings());
113113
if (Strings.isNullOrEmpty(policyName) == false) {
114-
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(LifecycleExecutionState.fromIndexMetadata(idxMeta));
115-
if (OperationMode.STOPPING == currentMode &&
116-
stepKey != null &&
117-
IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction()) == false) {
118-
logger.info("skipping policy [{}] for index [{}]. stopping Index Lifecycle execution",
119-
policyName, idxMeta.getIndex().getName());
120-
continue;
114+
final LifecycleExecutionState lifecycleState = LifecycleExecutionState.fromIndexMetadata(idxMeta);
115+
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(lifecycleState);
116+
117+
if (OperationMode.STOPPING == currentMode) {
118+
if (stepKey != null && IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction())) {
119+
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in action [{}]",
120+
idxMeta.getIndex().getName(), policyName, stepKey.getAction());
121+
lifecycleRunner.maybeRunAsyncAction(clusterState, idxMeta, policyName, stepKey);
122+
// ILM is trying to stop, but this index is in a Shrink action (or other dangerous action) so we can't stop
123+
safeToStop = false;
124+
} else {
125+
logger.info("skipping policy execution for index [{}] with policy [{}] because ILM is stopping",
126+
idxMeta.getIndex().getName(), policyName);
127+
}
128+
} else {
129+
lifecycleRunner.maybeRunAsyncAction(clusterState, idxMeta, policyName, stepKey);
121130
}
122-
lifecycleRunner.maybeRunAsyncAction(clusterState, idxMeta, policyName, stepKey);
123-
safeToStop = false; // proven false!
124131
}
125132
}
133+
126134
if (safeToStop && OperationMode.STOPPING == currentMode) {
127135
submitOperationModeUpdate(OperationMode.STOPPED);
128136
}
@@ -184,7 +192,7 @@ public void clusterChanged(ClusterChangedEvent event) {
184192
@Override
185193
public void applyClusterState(ClusterChangedEvent event) {
186194
if (event.localNodeMaster()) { // only act if we are master, otherwise
187-
// keep idle until elected
195+
// keep idle until elected
188196
if (event.state().metaData().custom(IndexLifecycleMetadata.TYPE) != null) {
189197
policyRegistry.update(event.state());
190198
}
@@ -237,21 +245,34 @@ void triggerPolicies(ClusterState clusterState, boolean fromClusterStateChange)
237245
IndexMetaData idxMeta = cursor.value;
238246
String policyName = LifecycleSettings.LIFECYCLE_NAME_SETTING.get(idxMeta.getSettings());
239247
if (Strings.isNullOrEmpty(policyName) == false) {
240-
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(LifecycleExecutionState.fromIndexMetadata(idxMeta));
241-
if (OperationMode.STOPPING == currentMode && stepKey != null
242-
&& IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction()) == false) {
243-
logger.info("skipping policy [" + policyName + "] for index [" + idxMeta.getIndex().getName()
244-
+ "]. stopping Index Lifecycle execution");
245-
continue;
246-
}
247-
if (fromClusterStateChange) {
248-
lifecycleRunner.runPolicyAfterStateChange(policyName, idxMeta);
248+
final LifecycleExecutionState lifecycleState = LifecycleExecutionState.fromIndexMetadata(idxMeta);
249+
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(lifecycleState);
250+
251+
if (OperationMode.STOPPING == currentMode) {
252+
if (stepKey != null && IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction())) {
253+
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in action [{}]",
254+
idxMeta.getIndex().getName(), policyName, stepKey.getAction());
255+
if (fromClusterStateChange) {
256+
lifecycleRunner.runPolicyAfterStateChange(policyName, idxMeta);
257+
} else {
258+
lifecycleRunner.runPeriodicStep(policyName, idxMeta);
259+
}
260+
// ILM is trying to stop, but this index is in a Shrink action (or other dangerous action) so we can't stop
261+
safeToStop = false;
262+
} else {
263+
logger.info("skipping policy execution for index [{}] with policy [{}] because ILM is stopping",
264+
idxMeta.getIndex().getName(), policyName);
265+
}
249266
} else {
250-
lifecycleRunner.runPeriodicStep(policyName, idxMeta);
267+
if (fromClusterStateChange) {
268+
lifecycleRunner.runPolicyAfterStateChange(policyName, idxMeta);
269+
} else {
270+
lifecycleRunner.runPeriodicStep(policyName, idxMeta);
271+
}
251272
}
252-
safeToStop = false; // proven false!
253273
}
254274
}
275+
255276
if (safeToStop && OperationMode.STOPPING == currentMode) {
256277
submitOperationModeUpdate(OperationMode.STOPPED);
257278
}

0 commit comments

Comments
 (0)