Skip to content

ILM: allow check-migration step to continue if tier setting unset #62636

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodeRole;
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
Expand All @@ -26,8 +27,9 @@
import java.util.Set;

import static org.elasticsearch.cluster.node.DiscoveryNodeRole.DATA_ROLE;
import static org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider.INDEX_ROUTING_INCLUDE_SETTING;
import static org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider.INDEX_ROUTING_PREFER_SETTING;
import static org.elasticsearch.xpack.core.ilm.AllocationRoutedStep.getPendingAllocations;
import static org.elasticsearch.xpack.core.ilm.step.info.AllocationInfo.waitingForActiveShardsAllocationInfo;

/**
* Checks whether all shards have been correctly routed in response to updating the allocation rules for an index in order
Expand Down Expand Up @@ -71,11 +73,23 @@ public Result isConditionMet(Index index, ClusterState clusterState) {
logger.debug("[{}] lifecycle action for index [{}] executed but index no longer exists", getKey().getAction(), index.getName());
return new Result(false, null);
}
String destinationTier = INDEX_ROUTING_INCLUDE_SETTING.get(idxMeta.getSettings());
String destinationTier = INDEX_ROUTING_PREFER_SETTING.get(idxMeta.getSettings());
if (ActiveShardCount.ALL.enoughShardsActive(clusterState, index.getName()) == false) {
logger.debug("[{}] migration of index [{}] to the [{}] tier cannot progress, as not all shards are active",
if (Strings.isEmpty(destinationTier)) {
logger.debug("[{}] lifecycle action for index [{}] cannot make progress because not all shards are active",
getKey().getAction(), index.getName());
} else {
logger.debug("[{}] migration of index [{}] to the [{}] tier cannot progress, as not all shards are active",
getKey().getAction(), index.getName(), destinationTier);
return new Result(false, AllocationInfo.waitingForActiveShardsAllocationInfo(idxMeta.getNumberOfReplicas()));
}
return new Result(false, waitingForActiveShardsAllocationInfo(idxMeta.getNumberOfReplicas()));
}

if (Strings.isEmpty(destinationTier)) {
logger.debug("index [{}] has no data tier routing setting configured and all its shards are active. considering the [{}] " +
"step condition met and continuing to the next step", index.getName(), getKey().getName());
// the user removed the tier routing setting and all the shards are active so we'll cary on
return new Result(true, null);
}

int allocationPendingAllShards = getPendingAllocations(index, ALLOCATION_DECIDERS, clusterState);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import java.util.Set;

import static java.util.Collections.emptyMap;
import static org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider.INDEX_ROUTING_INCLUDE_SETTING;
import static org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider.INDEX_ROUTING_PREFER;
import static org.elasticsearch.xpack.core.ilm.step.info.AllocationInfo.waitingForActiveShardsAllocationInfo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.nullValue;
Expand Down Expand Up @@ -95,7 +95,7 @@ public void testExecuteWithUnassignedShard() {

public void testExecuteWithPendingShards() {
IndexMetadata indexMetadata = IndexMetadata.builder(randomAlphaOfLengthBetween(5, 10))
.settings(settings(Version.CURRENT).put(INDEX_ROUTING_INCLUDE_SETTING.getKey(), DataTier.DATA_WARM))
.settings(settings(Version.CURRENT).put(INDEX_ROUTING_PREFER, DataTier.DATA_WARM))
.numberOfShards(1).numberOfReplicas(0).build();
Index index = indexMetadata.getIndex();
IndexRoutingTable.Builder indexRoutingTable = IndexRoutingTable.builder(index)
Expand All @@ -122,7 +122,7 @@ public void testExecuteWithPendingShards() {

public void testExecuteWithPendingShardsAndTargetRoleNotPresentInCluster() {
IndexMetadata indexMetadata = IndexMetadata.builder(randomAlphaOfLengthBetween(5, 10))
.settings(settings(Version.CURRENT).put(INDEX_ROUTING_INCLUDE_SETTING.getKey(), DataTier.DATA_WARM))
.settings(settings(Version.CURRENT).put(INDEX_ROUTING_PREFER, DataTier.DATA_WARM))
.numberOfShards(1).numberOfReplicas(0).build();
Index index = indexMetadata.getIndex();
IndexRoutingTable.Builder indexRoutingTable = IndexRoutingTable.builder(index)
Expand Down Expand Up @@ -159,7 +159,7 @@ public void testExecuteIndexMissing() {

public void testExecuteIsComplete() {
IndexMetadata indexMetadata = IndexMetadata.builder(randomAlphaOfLengthBetween(5, 10))
.settings(settings(Version.CURRENT).put(INDEX_ROUTING_INCLUDE_SETTING.getKey(), DataTier.DATA_WARM))
.settings(settings(Version.CURRENT).put(INDEX_ROUTING_PREFER, DataTier.DATA_WARM))
.numberOfShards(1).numberOfReplicas(0).build();
Index index = indexMetadata.getIndex();
IndexRoutingTable.Builder indexRoutingTable = IndexRoutingTable.builder(index)
Expand All @@ -181,7 +181,7 @@ public void testExecuteIsComplete() {

public void testExecuteWithGenericDataNodes() {
IndexMetadata indexMetadata = IndexMetadata.builder(randomAlphaOfLengthBetween(5, 10))
.settings(settings(Version.CURRENT).put(INDEX_ROUTING_INCLUDE_SETTING.getKey(), DataTier.DATA_WARM))
.settings(settings(Version.CURRENT).put(INDEX_ROUTING_PREFER, DataTier.DATA_WARM))
.numberOfShards(1).numberOfReplicas(0).build();
Index index = indexMetadata.getIndex();
IndexRoutingTable.Builder indexRoutingTable = IndexRoutingTable.builder(index)
Expand All @@ -200,6 +200,52 @@ public void testExecuteWithGenericDataNodes() {
assertThat(result.getInfomationContext(), is(nullValue()));
}

public void testExecuteForIndexWithoutTierRoutingInformationWaitsForReplicasToBeActive() {
IndexMetadata indexMetadata = IndexMetadata.builder(randomAlphaOfLengthBetween(5, 10))
.settings(settings(Version.CURRENT))
.numberOfShards(1).numberOfReplicas(1).build();
Index index = indexMetadata.getIndex();
{
IndexRoutingTable.Builder indexRoutingTable = IndexRoutingTable.builder(index)
.addShard(TestShardRouting.newShardRouting(new ShardId(index, 0), "node1", true, ShardRoutingState.STARTED))
.addReplica();

ClusterState clusterState =
ClusterState.builder(ClusterState.EMPTY_STATE).metadata(Metadata.builder().put(indexMetadata, true).build())
.nodes(DiscoveryNodes.builder()
.add(newNode("node1", Collections.singleton(DataTier.DATA_HOT_NODE_ROLE)))
)
.routingTable(RoutingTable.builder().add(indexRoutingTable).build())
.build();
DataTierMigrationRoutedStep step = createRandomInstance();
Result expectedResult = new Result(false, waitingForActiveShardsAllocationInfo(1));

Result result = step.isConditionMet(index, clusterState);
assertThat(result.isComplete(), is(false));
assertThat(result.getInfomationContext(), is(expectedResult.getInfomationContext()));
}

{
IndexRoutingTable.Builder indexRoutingTable = IndexRoutingTable.builder(index)
.addShard(TestShardRouting.newShardRouting(new ShardId(index, 0), "node1", true, ShardRoutingState.STARTED))
.addShard(TestShardRouting.newShardRouting(new ShardId(index, 0), "node2", false, ShardRoutingState.STARTED));

ClusterState clusterState =
ClusterState.builder(ClusterState.EMPTY_STATE).metadata(Metadata.builder().put(indexMetadata, true).build())
.nodes(DiscoveryNodes.builder()
.add(newNode("node1", Collections.singleton(DataTier.DATA_HOT_NODE_ROLE)))
.add(newNode("node2", Collections.singleton(DataTier.DATA_WARM_NODE_ROLE)))
)
.routingTable(RoutingTable.builder().add(indexRoutingTable).build())
.build();
DataTierMigrationRoutedStep step = createRandomInstance();

Result result = step.isConditionMet(index, clusterState);
assertThat(result.isComplete(), is(true));
assertThat(result.getInfomationContext(), is(nullValue()));
}
}

private DiscoveryNode newNode(String nodeId, Set<DiscoveryNodeRole> roles) {
return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), emptyMap(), roles, Version.CURRENT);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,16 @@

package org.elasticsearch.xpack.ilm;

import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplainRequest;
import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplainResponse;
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest;
import org.elasticsearch.cluster.routing.ShardRoutingState;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider;
import org.elasticsearch.xpack.core.DataTier;
import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;
import org.elasticsearch.xpack.core.XPackSettings;
Expand All @@ -30,6 +35,7 @@
import java.util.Collections;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.TimeUnit;

import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS;
Expand Down Expand Up @@ -138,4 +144,89 @@ public void testIndexDataTierMigration() throws Exception {
assertThat(indexLifecycleExplainResponse.getStep(), is("complete"));
});
}

public void testUserOptsOutOfTierMigration() throws Exception {
internalCluster().startMasterOnlyNodes(1, Settings.EMPTY);
logger.info("starting hot data node");
internalCluster().startNode(hotNode(Settings.EMPTY));

Phase hotPhase = new Phase("hot", TimeValue.ZERO, Collections.emptyMap());
Phase warmPhase = new Phase("warm", TimeValue.ZERO, Collections.emptyMap());
Phase coldPhase = new Phase("cold", TimeValue.ZERO, Collections.emptyMap());
LifecyclePolicy lifecyclePolicy = new LifecyclePolicy(policy, Map.of("hot", hotPhase, "warm", warmPhase, "cold", coldPhase));
PutLifecycleAction.Request putLifecycleRequest = new PutLifecycleAction.Request(lifecyclePolicy);
PutLifecycleAction.Response putLifecycleResponse = client().execute(PutLifecycleAction.INSTANCE, putLifecycleRequest).get();
assertAcked(putLifecycleResponse);

Settings settings = Settings.builder().put(indexSettings()).put(SETTING_NUMBER_OF_SHARDS, 1)
.put(SETTING_NUMBER_OF_REPLICAS, 1).put(LifecycleSettings.LIFECYCLE_NAME, policy).build();
CreateIndexResponse res = client().admin().indices().prepareCreate(managedIndex).setSettings(settings).get();
assertTrue(res.isAcknowledged());

assertBusy(() -> {
ExplainLifecycleRequest explainRequest = new ExplainLifecycleRequest().indices(managedIndex);
ExplainLifecycleResponse explainResponse = client().execute(ExplainLifecycleAction.INSTANCE,
explainRequest).get();

IndexLifecycleExplainResponse indexLifecycleExplainResponse = explainResponse.getIndexResponses().get(managedIndex);
assertThat(indexLifecycleExplainResponse.getPhase(), is("warm"));
assertThat(indexLifecycleExplainResponse.getStep(), is(DataTierMigrationRoutedStep.NAME));
});

Settings removeTierRoutingSetting = Settings.builder().putNull(DataTierAllocationDecider.INDEX_ROUTING_PREFER).build();
UpdateSettingsRequest updateSettingsRequest = new UpdateSettingsRequest(managedIndex).settings(removeTierRoutingSetting);
assertAcked(client().admin().indices().updateSettings(updateSettingsRequest).actionGet());

assertBusy(() -> {
ExplainLifecycleRequest explainRequest = new ExplainLifecycleRequest().indices(managedIndex);
ExplainLifecycleResponse explainResponse = client().execute(ExplainLifecycleAction.INSTANCE,
explainRequest).get();

IndexLifecycleExplainResponse indexLifecycleExplainResponse = explainResponse.getIndexResponses().get(managedIndex);
assertThat(indexLifecycleExplainResponse.getPhase(), is("warm"));
assertThat(indexLifecycleExplainResponse.getStep(), is(DataTierMigrationRoutedStep.NAME));
assertReplicaIsUnassigned();
}, 30, TimeUnit.SECONDS);

internalCluster().startNode(coldNode(Settings.EMPTY));

// the index should successfully allocate
ensureGreen(managedIndex);

// the index is successfully allocated but the migrate action from the cold phase re-configured the tier migration setting to the
// cold tier so ILM is stuck in `check-migration` in the cold phase this time
// we have 2 options to resume the ILM execution:
// 1. start another cold node so both the primary and replica can relocate to the cold nodes
// 2. remove the tier routing setting from the index again (we're doing this below)
assertBusy(() -> {
ExplainLifecycleRequest explainRequest = new ExplainLifecycleRequest().indices(managedIndex);
ExplainLifecycleResponse explainResponse = client().execute(ExplainLifecycleAction.INSTANCE,
explainRequest).get();

IndexLifecycleExplainResponse indexLifecycleExplainResponse = explainResponse.getIndexResponses().get(managedIndex);
assertThat(indexLifecycleExplainResponse.getPhase(), is("cold"));
assertThat(indexLifecycleExplainResponse.getStep(), is(DataTierMigrationRoutedStep.NAME));
});

// remove the tier routing setting again
assertAcked(client().admin().indices().updateSettings(updateSettingsRequest).actionGet());

// wait for lifecycle to complete in the cold phase
assertBusy(() -> {
ExplainLifecycleRequest explainRequest = new ExplainLifecycleRequest().indices(managedIndex);
ExplainLifecycleResponse explainResponse = client().execute(ExplainLifecycleAction.INSTANCE,
explainRequest).get();

IndexLifecycleExplainResponse indexLifecycleExplainResponse = explainResponse.getIndexResponses().get(managedIndex);
assertThat(indexLifecycleExplainResponse.getPhase(), is("cold"));
assertThat(indexLifecycleExplainResponse.getStep(), is("complete"));
}, 30, TimeUnit.SECONDS);
}

private void assertReplicaIsUnassigned() {
ClusterAllocationExplainRequest explainReplicaShard =
new ClusterAllocationExplainRequest().setIndex(managedIndex).setPrimary(false).setShard(0);
ClusterAllocationExplainResponse response = client().admin().cluster().allocationExplain(explainReplicaShard).actionGet();
assertThat(response.getExplanation().getShardState(), is(ShardRoutingState.UNASSIGNED));
}
}