Skip to content

Commit ee998ac

Browse files
committed
Do not fail snapshot when deleting a missing snapshotted file (#30332)
When deleting or creating a snapshot for a given shard, elasticsearch usually starts by listing all the existing snapshotted files in the repository. Then it computes a diff and deletes the snapshotted files that are not needed anymore. During this deletion, an exception is thrown if the file to be deleted does not exist anymore. This behavior is challenging with cloud based repository implementations like S3 where a file that has been deleted can still appear in the bucket for few seconds/minutes (because the deletion can take some time to be fully replicated on S3). If the deleted file appears in the listing of files, then the following deletion will fail with a NoSuchFileException and the snapshot will be partially created/deleted. This pull request makes the deletion of these files a bit less strict, ie not failing if the file we want to delete does not exist anymore. It introduces a new BlobContainer.deleteIgnoringIfNotExists() method that can be used at some specific places where not failing when deleting a file is considered harmless. Closes #28322
1 parent 9c77eab commit ee998ac

File tree

6 files changed

+79
-52
lines changed

6 files changed

+79
-52
lines changed

docs/CHANGELOG.asciidoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,8 @@ written to by an older Elasticsearch after writing to it with a newer Elasticsea
235235

236236
Java High Level REST Client::
237237
* Bulk processor#awaitClose to close scheduler {pull}29263[#29263]
238+
Fix NPE when CumulativeSum agg encounters null value/empty bucket ({pull}29641[#29641])
239+
Do not fail snapshot when deleting a missing snapshotted file ({pull}30332[#30332])
238240

239241
Java Low Level REST Client::
240242
* REST client: hosts marked dead for the first time should not be immediately retried {pull}29230[#29230]

server/src/main/java/org/elasticsearch/common/blobstore/BlobContainer.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ public interface BlobContainer {
7575
void writeBlob(String blobName, InputStream inputStream, long blobSize) throws IOException;
7676

7777
/**
78-
* Deletes a blob with giving name, if the blob exists. If the blob does not exist, this method throws an IOException.
78+
* Deletes a blob with giving name, if the blob exists. If the blob does not exist,
79+
* this method throws a NoSuchFileException.
7980
*
8081
* @param blobName
8182
* The name of the blob to delete.
@@ -84,6 +85,21 @@ public interface BlobContainer {
8485
*/
8586
void deleteBlob(String blobName) throws IOException;
8687

88+
/**
89+
* Deletes a blob with giving name, ignoring if the blob does not exist.
90+
*
91+
* @param blobName
92+
* The name of the blob to delete.
93+
* @throws IOException if the blob exists but could not be deleted.
94+
*/
95+
default void deleteBlobIgnoringIfNotExists(String blobName) throws IOException {
96+
try {
97+
deleteBlob(blobName);
98+
} catch (final NoSuchFileException ignored) {
99+
// This exception is ignored
100+
}
101+
}
102+
87103
/**
88104
* Lists all blobs in the container.
89105
*

server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshots.java

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -102,13 +102,6 @@ private BlobStoreIndexShardSnapshots(Map<String, FileInfo> files, List<SnapshotF
102102
this.physicalFiles = unmodifiableMap(mapBuilder);
103103
}
104104

105-
private BlobStoreIndexShardSnapshots() {
106-
shardSnapshots = Collections.emptyList();
107-
files = Collections.emptyMap();
108-
physicalFiles = Collections.emptyMap();
109-
}
110-
111-
112105
/**
113106
* Returns list of snapshots
114107
*

server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreFormat.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ public T read(BlobContainer blobContainer, String name) throws IOException {
9090
return readBlob(blobContainer, blobName);
9191
}
9292

93-
9493
/**
9594
* Deletes obj in the blob container
9695
*/

server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java

Lines changed: 44 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@
120120

121121
import static java.util.Collections.emptyMap;
122122
import static java.util.Collections.unmodifiableMap;
123+
import static org.elasticsearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot.FileInfo.canonicalName;
123124

124125
/**
125126
* BlobStore - based implementation of Snapshot Repository
@@ -780,7 +781,7 @@ private void writeAtomic(final String blobName, final BytesReference bytesRef) t
780781
} catch (IOException ex) {
781782
// temporary blob creation or move failed - try cleaning up
782783
try {
783-
snapshotsBlobContainer.deleteBlob(tempBlobName);
784+
snapshotsBlobContainer.deleteBlobIgnoringIfNotExists(tempBlobName);
784785
} catch (IOException e) {
785786
ex.addSuppressed(e);
786787
}
@@ -915,13 +916,13 @@ public void delete() {
915916
}
916917
}
917918
// finalize the snapshot and rewrite the snapshot index with the next sequential snapshot index
918-
finalize(newSnapshotsList, fileListGeneration + 1, blobs);
919+
finalize(newSnapshotsList, fileListGeneration + 1, blobs, "snapshot deletion [" + snapshotId + "]");
919920
}
920921

921922
/**
922923
* Loads information about shard snapshot
923924
*/
924-
public BlobStoreIndexShardSnapshot loadSnapshot() {
925+
BlobStoreIndexShardSnapshot loadSnapshot() {
925926
try {
926927
return indexShardSnapshotFormat(version).read(blobContainer, snapshotId.getUUID());
927928
} catch (IOException ex) {
@@ -930,54 +931,57 @@ public BlobStoreIndexShardSnapshot loadSnapshot() {
930931
}
931932

932933
/**
933-
* Removes all unreferenced files from the repository and writes new index file
934+
* Writes a new index file for the shard and removes all unreferenced files from the repository.
934935
*
935-
* We need to be really careful in handling index files in case of failures to make sure we have index file that
936-
* points to files that were deleted.
936+
* We need to be really careful in handling index files in case of failures to make sure we don't
937+
* have index file that points to files that were deleted.
937938
*
938-
*
939-
* @param snapshots list of active snapshots in the container
939+
* @param snapshots list of active snapshots in the container
940940
* @param fileListGeneration the generation number of the snapshot index file
941-
* @param blobs list of blobs in the container
941+
* @param blobs list of blobs in the container
942+
* @param reason a reason explaining why the shard index file is written
942943
*/
943-
protected void finalize(List<SnapshotFiles> snapshots, int fileListGeneration, Map<String, BlobMetaData> blobs) {
944-
BlobStoreIndexShardSnapshots newSnapshots = new BlobStoreIndexShardSnapshots(snapshots);
945-
// delete old index files first
946-
for (String blobName : blobs.keySet()) {
947-
if (indexShardSnapshotsFormat.isTempBlobName(blobName) || blobName.startsWith(SNAPSHOT_INDEX_PREFIX)) {
948-
try {
949-
blobContainer.deleteBlob(blobName);
950-
} catch (IOException e) {
951-
// We cannot delete index file - this is fatal, we cannot continue, otherwise we might end up
952-
// with references to non-existing files
953-
throw new IndexShardSnapshotFailedException(shardId, "error deleting index file ["
954-
+ blobName + "] during cleanup", e);
955-
}
944+
protected void finalize(final List<SnapshotFiles> snapshots,
945+
final int fileListGeneration,
946+
final Map<String, BlobMetaData> blobs,
947+
final String reason) {
948+
final String indexGeneration = Integer.toString(fileListGeneration);
949+
final String currentIndexGen = indexShardSnapshotsFormat.blobName(indexGeneration);
950+
951+
final BlobStoreIndexShardSnapshots updatedSnapshots = new BlobStoreIndexShardSnapshots(snapshots);
952+
try {
953+
// If we deleted all snapshots, we don't need to create a new index file
954+
if (snapshots.size() > 0) {
955+
indexShardSnapshotsFormat.writeAtomic(updatedSnapshots, blobContainer, indexGeneration);
956956
}
957-
}
958957

959-
// now go over all the blobs, and if they don't exist in a snapshot, delete them
960-
for (String blobName : blobs.keySet()) {
961-
// delete unused files
962-
if (blobName.startsWith(DATA_BLOB_PREFIX)) {
963-
if (newSnapshots.findNameFile(BlobStoreIndexShardSnapshot.FileInfo.canonicalName(blobName)) == null) {
958+
// Delete old index files
959+
for (final String blobName : blobs.keySet()) {
960+
if (indexShardSnapshotsFormat.isTempBlobName(blobName) || blobName.startsWith(SNAPSHOT_INDEX_PREFIX)) {
964961
try {
965-
blobContainer.deleteBlob(blobName);
962+
blobContainer.deleteBlobIgnoringIfNotExists(blobName);
966963
} catch (IOException e) {
967-
// TODO: don't catch and let the user handle it?
968-
logger.debug(() -> new ParameterizedMessage("[{}] [{}] error deleting blob [{}] during cleanup", snapshotId, shardId, blobName), e);
964+
logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to delete index blob [{}] during finalization",
965+
snapshotId, shardId, blobName), e);
966+
throw e;
969967
}
970968
}
971969
}
972-
}
973970

974-
// If we deleted all snapshots - we don't need to create the index file
975-
if (snapshots.size() > 0) {
976-
try {
977-
indexShardSnapshotsFormat.writeAtomic(newSnapshots, blobContainer, Integer.toString(fileListGeneration));
978-
} catch (IOException e) {
979-
throw new IndexShardSnapshotFailedException(shardId, "Failed to write file list", e);
971+
// Delete all blobs that don't exist in a snapshot
972+
for (final String blobName : blobs.keySet()) {
973+
if (blobName.startsWith(DATA_BLOB_PREFIX) && (updatedSnapshots.findNameFile(canonicalName(blobName)) == null)) {
974+
try {
975+
blobContainer.deleteBlobIgnoringIfNotExists(blobName);
976+
} catch (IOException e) {
977+
logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to delete data blob [{}] during finalization",
978+
snapshotId, shardId, blobName), e);
979+
}
980+
}
980981
}
982+
} catch (IOException e) {
983+
String message = "Failed to finalize " + reason + " with shard index [" + currentIndexGen + "]";
984+
throw new IndexShardSnapshotFailedException(shardId, message, e);
981985
}
982986
}
983987

@@ -1003,7 +1007,7 @@ protected long findLatestFileNameGeneration(Map<String, BlobMetaData> blobs) {
10031007
if (!name.startsWith(DATA_BLOB_PREFIX)) {
10041008
continue;
10051009
}
1006-
name = BlobStoreIndexShardSnapshot.FileInfo.canonicalName(name);
1010+
name = canonicalName(name);
10071011
try {
10081012
long currentGen = Long.parseLong(name.substring(DATA_BLOB_PREFIX.length()), Character.MAX_RADIX);
10091013
if (currentGen > generation) {
@@ -1217,7 +1221,7 @@ public void snapshot(final IndexCommit snapshotIndexCommit) {
12171221
newSnapshotsList.add(point);
12181222
}
12191223
// finalize the snapshot and rewrite the snapshot index with the next sequential snapshot index
1220-
finalize(newSnapshotsList, fileListGeneration + 1, blobs);
1224+
finalize(newSnapshotsList, fileListGeneration + 1, blobs, "snapshot creation [" + snapshotId + "]");
12211225
snapshotStatus.moveToDone(System.currentTimeMillis());
12221226

12231227
}

test/framework/src/main/java/org/elasticsearch/repositories/ESBlobStoreContainerTestCase.java

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,14 @@
2929

3030
import java.io.IOException;
3131
import java.io.InputStream;
32+
import java.nio.file.NoSuchFileException;
3233
import java.util.Arrays;
3334
import java.util.HashMap;
3435
import java.util.Map;
3536

36-
import static org.elasticsearch.repositories.ESBlobStoreTestCase.writeRandomBlob;
3737
import static org.elasticsearch.repositories.ESBlobStoreTestCase.randomBytes;
3838
import static org.elasticsearch.repositories.ESBlobStoreTestCase.readBlobFully;
39+
import static org.elasticsearch.repositories.ESBlobStoreTestCase.writeRandomBlob;
3940
import static org.hamcrest.CoreMatchers.equalTo;
4041
import static org.hamcrest.CoreMatchers.notNullValue;
4142

@@ -116,15 +117,27 @@ public void testDeleteBlob() throws IOException {
116117
try (BlobStore store = newBlobStore()) {
117118
final String blobName = "foobar";
118119
final BlobContainer container = store.blobContainer(new BlobPath());
119-
expectThrows(IOException.class, () -> container.deleteBlob(blobName));
120+
expectThrows(NoSuchFileException.class, () -> container.deleteBlob(blobName));
120121

121122
byte[] data = randomBytes(randomIntBetween(10, scaledRandomIntBetween(1024, 1 << 16)));
122123
final BytesArray bytesArray = new BytesArray(data);
123124
writeBlob(container, blobName, bytesArray);
124125
container.deleteBlob(blobName); // should not raise
125126

126127
// blob deleted, so should raise again
127-
expectThrows(IOException.class, () -> container.deleteBlob(blobName));
128+
expectThrows(NoSuchFileException.class, () -> container.deleteBlob(blobName));
129+
}
130+
}
131+
132+
public void testDeleteBlobIgnoringIfNotExists() throws IOException {
133+
try (BlobStore store = newBlobStore()) {
134+
BlobPath blobPath = new BlobPath();
135+
if (randomBoolean()) {
136+
blobPath = blobPath.add(randomAlphaOfLengthBetween(1, 10));
137+
}
138+
139+
final BlobContainer container = store.blobContainer(blobPath);
140+
container.deleteBlobIgnoringIfNotExists("does_not_exist");
128141
}
129142
}
130143

0 commit comments

Comments
 (0)