Skip to content

Commit 6b869ad

Browse files
committed
Only log warning when actually failing shards (#28558)
Currently the master node logs a warning message whenever it receives a failed shard request. However, this can be noisy because - Multiple failed shard requests can be issued for a single shard - Failed shard requests can be still issued for an already failed shard This commit moves the log-warn to AllocationService in which the failing shard action actually happens. This is another prerequisite step in order to not ignore the shard not-available exceptions in the replication. Relates #28534
1 parent 48f5a64 commit 6b869ad

File tree

3 files changed

+7
-3
lines changed

3 files changed

+7
-3
lines changed

server/src/main/java/org/elasticsearch/cluster/action/shard/ShardStateAction.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ private static class ShardFailedTransportHandler implements TransportRequestHand
205205

206206
@Override
207207
public void messageReceived(FailedShardEntry request, TransportChannel channel) throws Exception {
208-
logger.warn((Supplier<?>) () -> new ParameterizedMessage("{} received shard failed for {}", request.shardId, request), request.failure);
208+
logger.debug((Supplier<?>) () -> new ParameterizedMessage("{} received shard failed for {}", request.shardId, request), request.failure);
209209
clusterService.submitStateUpdateTask(
210210
"shard-failed",
211211
request,

server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
package org.elasticsearch.cluster.routing.allocation;
2121

22+
import org.apache.logging.log4j.message.ParameterizedMessage;
2223
import org.elasticsearch.cluster.ClusterInfoService;
2324
import org.elasticsearch.cluster.ClusterState;
2425
import org.elasticsearch.cluster.RestoreInProgress;
@@ -160,7 +161,7 @@ public ClusterState applyFailedShards(final ClusterState clusterState, final Lis
160161
if (staleShards.isEmpty() && failedShards.isEmpty()) {
161162
return clusterState;
162163
}
163-
ClusterState tmpState = IndexMetaDataUpdater.removeStaleIdsWithoutRoutings(clusterState, staleShards);
164+
ClusterState tmpState = IndexMetaDataUpdater.removeStaleIdsWithoutRoutings(clusterState, staleShards, logger);
164165

165166
RoutingNodes routingNodes = getMutableRoutingNodes(tmpState);
166167
// shuffle the unassigned nodes, just so we won't have things like poison failed shards
@@ -188,6 +189,7 @@ public ClusterState applyFailedShards(final ClusterState clusterState, final Lis
188189
if (failedShardEntry.markAsStale()) {
189190
allocation.removeAllocationId(failedShard);
190191
}
192+
logger.warn(new ParameterizedMessage("failing shard [{}]", failedShardEntry), failedShardEntry.getFailure());
191193
routingNodes.failShard(logger, failedShard, unassignedInfo, indexMetaData, allocation.changes());
192194
} else {
193195
logger.trace("{} shard routing failed in an earlier iteration (routing: {})", shardToFail.shardId(), shardToFail);

server/src/main/java/org/elasticsearch/cluster/routing/allocation/IndexMetaDataUpdater.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
package org.elasticsearch.cluster.routing.allocation;
2121

22+
import org.apache.logging.log4j.Logger;
2223
import org.elasticsearch.cluster.ClusterState;
2324
import org.elasticsearch.cluster.metadata.IndexMetaData;
2425
import org.elasticsearch.cluster.metadata.MetaData;
@@ -210,7 +211,7 @@ private IndexMetaData.Builder updateInSyncAllocations(RoutingTable newRoutingTab
210211
* Removes allocation ids from the in-sync set for shard copies for which there is no routing entries in the routing table.
211212
* This method is called in AllocationService before any changes to the routing table are made.
212213
*/
213-
public static ClusterState removeStaleIdsWithoutRoutings(ClusterState clusterState, List<StaleShard> staleShards) {
214+
public static ClusterState removeStaleIdsWithoutRoutings(ClusterState clusterState, List<StaleShard> staleShards, Logger logger) {
214215
MetaData oldMetaData = clusterState.metaData();
215216
RoutingTable oldRoutingTable = clusterState.routingTable();
216217
MetaData.Builder metaDataBuilder = null;
@@ -238,6 +239,7 @@ public static ClusterState removeStaleIdsWithoutRoutings(ClusterState clusterSta
238239
}
239240
indexMetaDataBuilder.putInSyncAllocationIds(shardNumber, remainingInSyncAllocations);
240241
}
242+
logger.warn("{} marking unavailable shards as stale: {}", shardEntry.getKey(), idsToRemove);
241243
}
242244

243245
if (indexMetaDataBuilder != null) {

0 commit comments

Comments
 (0)