Skip to content

Less Verbose Serialization of Snapshot Failure in SLM Metadata #80942

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public class ElasticsearchException extends RuntimeException implements ToXConte
* to control if the {@code caused_by} element should render. Unlike most parameters to {@code toXContent} methods this parameter is
* internal only and not available as a URL parameter.
*/
private static final String REST_EXCEPTION_SKIP_CAUSE = "rest.exception.cause.skip";
public static final String REST_EXCEPTION_SKIP_CAUSE = "rest.exception.cause.skip";
/**
* Passed in the {@link Params} of {@link #generateThrowableXContent(XContentBuilder, Params, Throwable)}
* to control if the {@code stack_trace} element should render. Unlike most parameters to {@code toXContent} methods this parameter is
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ public class SnapshotInvocationRecord extends AbstractDiffable<SnapshotInvocatio
static final ParseField TIMESTAMP = new ParseField("time");
static final ParseField DETAILS = new ParseField("details");

private String snapshotName;
private Long snapshotStartTimestamp;
private long snapshotFinishTimestamp;
private String details;
private final String snapshotName;
private final Long snapshotStartTimestamp;
private final long snapshotFinishTimestamp;
private final String details;

public static final ConstructingObjectParser<SnapshotInvocationRecord, String> PARSER = new ConstructingObjectParser<>(
"snapshot_policy_invocation_record",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,10 @@
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.snapshots.SnapshotException;
import org.elasticsearch.snapshots.SnapshotInfo;
import org.elasticsearch.xcontent.ToXContent;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.json.JsonXContent;
import org.elasticsearch.xpack.core.ClientHelper;
import org.elasticsearch.xpack.core.scheduler.SchedulerEngine;
import org.elasticsearch.xpack.core.slm.SnapshotInvocationRecord;
Expand All @@ -39,13 +36,10 @@

import java.io.IOException;
import java.time.Instant;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;

import static org.elasticsearch.ElasticsearchException.REST_EXCEPTION_SKIP_STACK_TRACE;

public class SnapshotLifecycleTask implements SchedulerEngine.Listener {

private static final Logger logger = LogManager.getLogger(SnapshotLifecycleTask.class);
Expand Down Expand Up @@ -135,11 +129,6 @@ public void onResponse(CreateSnapshotResponse createSnapshotResponse) {
request.snapshot(),
"failed to create snapshot successfully, " + failures + " out of " + total + " total shards failed"
);
// Add each failed shard's exception as suppressed, the exception contains
// information about which shard failed
// TODO: this seems wrong, investigate whether we actually need all the shard level exception here given that we
// could be dealing with tens of thousands of them at a time
snapInfo.shardFailures().forEach(e::addSuppressed);
// Call the failure handler to register this as a failure and persist it
onFailure(e);
}
Expand Down Expand Up @@ -194,13 +183,17 @@ static Optional<SnapshotLifecyclePolicyMetadata> getSnapPolicyMetadata(final Str
);
}

public static String exceptionToString(Exception ex) {
return Strings.toString((builder, params) -> {
ElasticsearchException.generateThrowableXContent(builder, params, ex);
return builder;
}, ToXContent.EMPTY_PARAMS);
}

/**
* A cluster state update task to write the result of a snapshot job to the cluster metadata for the associated policy.
*/
private static class WriteJobStatus extends ClusterStateUpdateTask {
private static final ToXContent.Params STACKTRACE_PARAMS = new ToXContent.MapParams(
Collections.singletonMap(REST_EXCEPTION_SKIP_STACK_TRACE, "false")
);

private final String policyName;
private final String snapshotName;
Expand Down Expand Up @@ -230,18 +223,6 @@ static WriteJobStatus failure(String policyId, String snapshotName, long timesta
return new WriteJobStatus(policyId, snapshotName, timestamp, timestamp, Optional.of(exception));
}

private String exceptionToString() throws IOException {
if (exception.isPresent()) {
try (XContentBuilder causeXContentBuilder = JsonXContent.contentBuilder()) {
causeXContentBuilder.startObject();
ElasticsearchException.generateThrowableXContent(causeXContentBuilder, STACKTRACE_PARAMS, exception.get());
causeXContentBuilder.endObject();
return BytesReference.bytes(causeXContentBuilder).utf8ToString();
}
}
return null;
}

@Override
public ClusterState execute(ClusterState currentState) throws Exception {
SnapshotLifecycleMetadata snapMeta = currentState.metadata().custom(SnapshotLifecycleMetadata.TYPE);
Expand Down Expand Up @@ -274,7 +255,14 @@ public ClusterState execute(ClusterState currentState) throws Exception {

if (exception.isPresent()) {
stats.snapshotFailed(policyName);
newPolicyMetadata.setLastFailure(new SnapshotInvocationRecord(snapshotName, null, snapshotFinishTime, exceptionToString()));
newPolicyMetadata.setLastFailure(
new SnapshotInvocationRecord(
snapshotName,
null,
snapshotFinishTime,
exception.map(SnapshotLifecycleTask::exceptionToString).orElse(null)
)
);
} else {
stats.snapshotTaken(policyName);
newPolicyMetadata.setLastSuccess(new SnapshotInvocationRecord(snapshotName, snapshotStartTime, snapshotFinishTime, null));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@

package org.elasticsearch.xpack.slm.history;

import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
Expand All @@ -19,16 +17,13 @@
import org.elasticsearch.xcontent.ToXContentObject;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.json.JsonXContent;
import org.elasticsearch.xpack.core.slm.SnapshotLifecyclePolicy;
import org.elasticsearch.xpack.slm.SnapshotLifecycleTask;

import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.Objects;

import static org.elasticsearch.ElasticsearchException.REST_EXCEPTION_SKIP_STACK_TRACE;

/**
* Represents the record of a Snapshot Lifecycle Management action, so that it
* can be indexed in a history index or recorded to a log in a structured way
Expand Down Expand Up @@ -138,7 +133,7 @@ public static SnapshotHistoryItem creationFailureRecord(
String snapshotName,
Exception exception
) throws IOException {
String exceptionString = exceptionToString(exception);
String exceptionString = SnapshotLifecycleTask.exceptionToString(exception);
return new SnapshotHistoryItem(
timeStamp,
policy.getId(),
Expand All @@ -162,7 +157,7 @@ public static SnapshotHistoryItem deletionFailureRecord(
String repository,
Exception exception
) throws IOException {
String exceptionString = exceptionToString(exception);
String exceptionString = SnapshotLifecycleTask.exceptionToString(exception);
return new SnapshotHistoryItem(timestamp, policyId, repository, snapshotName, DELETE_OPERATION, false, null, exceptionString);
}

Expand Down Expand Up @@ -273,15 +268,4 @@ public String toString() {
return Strings.toString(this);
}

private static String exceptionToString(Exception exception) throws IOException {
Params stacktraceParams = new MapParams(Collections.singletonMap(REST_EXCEPTION_SKIP_STACK_TRACE, "false"));
String exceptionString;
try (XContentBuilder causeXContentBuilder = JsonXContent.contentBuilder()) {
causeXContentBuilder.startObject();
ElasticsearchException.generateThrowableXContent(causeXContentBuilder, stacktraceParams, exception);
causeXContentBuilder.endObject();
exceptionString = BytesReference.bytes(causeXContentBuilder).utf8ToString();
}
return exceptionString;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,6 @@ public void testPartialFailureSnapshot() throws Exception {
item.getErrorDetails(),
containsString("failed to create snapshot successfully, 1 out of 3 total shards failed")
);
assertThat(item.getErrorDetails(), containsString("forced failure"));
});

SnapshotLifecycleTask task = new SnapshotLifecycleTask(client, clusterService, historyStore);
Expand Down