Skip to content

Commit a2c49a4

Browse files
Reduce Heap Use during Shard Snapshot (#60370) (#60440)
Instances of `BlobStoreIndexShardSnapshots` can be of non-trivial size. In case of snapshotting a larger number of shards the previous execution order would lead to memory use proportional to the number of shards for these objects. With this change, the number of these objects on heap is bounded by the size of the snapshot pool (except for in the BwC format path). This PR makes it so that they are written to the repository at the earliest possible point in time so that they can be garbage collected. If shard generations are used, we can safely write these right at the beginning of the shard snapshot. If shard generations are not used we can only write them at the end of the shard snapshot after all other blobs have been written. Closes #60173
1 parent 9e27f74 commit a2c49a4

File tree

1 file changed

+60
-45
lines changed

1 file changed

+60
-45
lines changed

server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java

Lines changed: 60 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1887,63 +1887,78 @@ public void snapshotShard(Store store, MapperService mapperService, SnapshotId s
18871887
snapshotStatus.moveToStarted(startTime, indexIncrementalFileCount,
18881888
indexTotalNumberOfFiles, indexIncrementalSize, indexTotalFileSize);
18891889

1890-
final StepListener<Collection<Void>> allFilesUploadedListener = new StepListener<>();
1891-
allFilesUploadedListener.whenComplete(v -> {
1892-
final IndexShardSnapshotStatus.Copy lastSnapshotStatus =
1893-
snapshotStatus.moveToFinalize(snapshotIndexCommit.getGeneration());
1894-
1895-
// now create and write the commit point
1896-
final BlobStoreIndexShardSnapshot snapshot = new BlobStoreIndexShardSnapshot(snapshotId.getName(),
1897-
lastSnapshotStatus.getIndexVersion(),
1898-
indexCommitPointFiles,
1899-
lastSnapshotStatus.getStartTime(),
1900-
threadPool.absoluteTimeInMillis() - lastSnapshotStatus.getStartTime(),
1901-
lastSnapshotStatus.getIncrementalFileCount(),
1902-
lastSnapshotStatus.getIncrementalSize()
1903-
);
1904-
1905-
logger.trace("[{}] [{}] writing shard snapshot file", shardId, snapshotId);
1890+
final String indexGeneration;
1891+
final boolean writeShardGens = SnapshotsService.useShardGenerations(repositoryMetaVersion);
1892+
// build a new BlobStoreIndexShardSnapshot, that includes this one and all the saved ones
1893+
List<SnapshotFiles> newSnapshotsList = new ArrayList<>();
1894+
newSnapshotsList.add(new SnapshotFiles(snapshotId.getName(), indexCommitPointFiles, shardStateIdentifier));
1895+
for (SnapshotFiles point : snapshots) {
1896+
newSnapshotsList.add(point);
1897+
}
1898+
final BlobStoreIndexShardSnapshots updatedBlobStoreIndexShardSnapshots = new BlobStoreIndexShardSnapshots(newSnapshotsList);
1899+
final Runnable afterWriteSnapBlob;
1900+
if (writeShardGens) {
1901+
// When using shard generations we can safely write the index-${uuid} blob before writing out any of the actual data
1902+
// for this shard since the uuid named blob will simply not be referenced in case of error and thus we will never
1903+
// reference a generation that has not had all its files fully upload.
1904+
indexGeneration = UUIDs.randomBase64UUID();
19061905
try {
1907-
INDEX_SHARD_SNAPSHOT_FORMAT.write(snapshot, shardContainer, snapshotId.getUUID(), compress);
1906+
writeShardIndexBlob(shardContainer, indexGeneration, updatedBlobStoreIndexShardSnapshots);
19081907
} catch (IOException e) {
1909-
throw new IndexShardSnapshotFailedException(shardId, "Failed to write commit point", e);
1910-
}
1911-
// build a new BlobStoreIndexShardSnapshot, that includes this one and all the saved ones
1912-
List<SnapshotFiles> newSnapshotsList = new ArrayList<>();
1913-
newSnapshotsList.add(new SnapshotFiles(snapshot.snapshot(), snapshot.indexFiles(), shardStateIdentifier));
1914-
for (SnapshotFiles point : snapshots) {
1915-
newSnapshotsList.add(point);
1908+
throw new IndexShardSnapshotFailedException(shardId,
1909+
"Failed to write shard level snapshot metadata for [" + snapshotId + "] to ["
1910+
+ INDEX_SHARD_SNAPSHOTS_FORMAT.blobName(indexGeneration) + "]", e);
19161911
}
1917-
final List<String> blobsToDelete;
1918-
final String indexGeneration;
1919-
final boolean writeShardGens = SnapshotsService.useShardGenerations(repositoryMetaVersion);
1920-
if (writeShardGens) {
1921-
indexGeneration = UUIDs.randomBase64UUID();
1922-
blobsToDelete = Collections.emptyList();
1923-
} else {
1924-
indexGeneration = Long.toString(Long.parseLong(fileListGeneration) + 1);
1925-
// Delete all previous index-N blobs
1926-
blobsToDelete = blobs.stream().filter(blob -> blob.startsWith(SNAPSHOT_INDEX_PREFIX)).collect(Collectors.toList());
1927-
assert blobsToDelete.stream().mapToLong(b -> Long.parseLong(b.replaceFirst(SNAPSHOT_INDEX_PREFIX, "")))
1912+
afterWriteSnapBlob = () -> {};
1913+
} else {
1914+
// When not using shard generations we can only write the index-${N} blob after all other work for this shard has
1915+
// completed.
1916+
// Also, in case of numeric shard generations the data node has to take care of deleting old shard generations.
1917+
indexGeneration = Long.toString(Long.parseLong(fileListGeneration) + 1);
1918+
// Delete all previous index-N blobs
1919+
final List<String> blobsToDelete = blobs.stream().filter(blob -> blob.startsWith(SNAPSHOT_INDEX_PREFIX))
1920+
.collect(Collectors.toList());
1921+
assert blobsToDelete.stream().mapToLong(b -> Long.parseLong(b.replaceFirst(SNAPSHOT_INDEX_PREFIX, "")))
19281922
.max().orElse(-1L) < Long.parseLong(indexGeneration)
19291923
: "Tried to delete an index-N blob newer than the current generation [" + indexGeneration
19301924
+ "] when deleting index-N blobs " + blobsToDelete;
1931-
}
1932-
try {
1933-
writeShardIndexBlob(shardContainer, indexGeneration, new BlobStoreIndexShardSnapshots(newSnapshotsList));
1934-
} catch (IOException e) {
1935-
throw new IndexShardSnapshotFailedException(shardId,
1936-
"Failed to finalize snapshot creation [" + snapshotId + "] with shard index ["
1937-
+ INDEX_SHARD_SNAPSHOTS_FORMAT.blobName(indexGeneration) + "]", e);
1938-
}
1939-
if (writeShardGens == false) {
1925+
afterWriteSnapBlob = () -> {
1926+
try {
1927+
writeShardIndexBlob(shardContainer, indexGeneration, updatedBlobStoreIndexShardSnapshots);
1928+
} catch (IOException e) {
1929+
throw new IndexShardSnapshotFailedException(shardId,
1930+
"Failed to finalize snapshot creation [" + snapshotId + "] with shard index ["
1931+
+ INDEX_SHARD_SNAPSHOTS_FORMAT.blobName(indexGeneration) + "]", e);
1932+
}
19401933
try {
19411934
deleteFromContainer(shardContainer, blobsToDelete);
19421935
} catch (IOException e) {
19431936
logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to delete old index-N blobs during finalization",
1944-
snapshotId, shardId), e);
1937+
snapshotId, shardId), e);
19451938
}
1939+
};
1940+
}
1941+
1942+
final StepListener<Collection<Void>> allFilesUploadedListener = new StepListener<>();
1943+
allFilesUploadedListener.whenComplete(v -> {
1944+
final IndexShardSnapshotStatus.Copy lastSnapshotStatus =
1945+
snapshotStatus.moveToFinalize(snapshotIndexCommit.getGeneration());
1946+
1947+
// now create and write the commit point
1948+
logger.trace("[{}] [{}] writing shard snapshot file", shardId, snapshotId);
1949+
try {
1950+
INDEX_SHARD_SNAPSHOT_FORMAT.write(new BlobStoreIndexShardSnapshot(snapshotId.getName(),
1951+
lastSnapshotStatus.getIndexVersion(),
1952+
indexCommitPointFiles,
1953+
lastSnapshotStatus.getStartTime(),
1954+
threadPool.absoluteTimeInMillis() - lastSnapshotStatus.getStartTime(),
1955+
lastSnapshotStatus.getIncrementalFileCount(),
1956+
lastSnapshotStatus.getIncrementalSize()
1957+
), shardContainer, snapshotId.getUUID(), compress);
1958+
} catch (IOException e) {
1959+
throw new IndexShardSnapshotFailedException(shardId, "Failed to write commit point", e);
19461960
}
1961+
afterWriteSnapBlob.run();
19471962
snapshotStatus.moveToDone(threadPool.absoluteTimeInMillis(), indexGeneration);
19481963
listener.onResponse(indexGeneration);
19491964
}, listener::onFailure);

0 commit comments

Comments
 (0)