From 2f5d61b20c30dfea756e7c21058174c9ef8c9852 Mon Sep 17 00:00:00 2001 From: Ashish Singh Date: Sun, 4 Aug 2024 16:22:09 +0530 Subject: [PATCH] Fix the deletion of shard level segment files --- .../blobstore/BlobStoreRepository.java | 67 +++++++++++++++++-- 1 file changed, 61 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java index c44a98f539579..4c04cff1704d1 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java @@ -1106,6 +1106,7 @@ private void doDeleteShardSnapshots( foundIndices, rootBlobs, updatedRepoData, + repositoryData, remoteStoreLockManagerFactory, afterCleanupsListener ); @@ -1131,6 +1132,7 @@ private void doDeleteShardSnapshots( foundIndices, rootBlobs, newRepoData, + repositoryData, remoteStoreLockManagerFactory, afterCleanupsListener ); @@ -1161,6 +1163,7 @@ private void cleanupUnlinkedRootAndIndicesBlobs( Map foundIndices, Map rootBlobs, RepositoryData updatedRepoData, + RepositoryData oldRepoData, RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, ActionListener listener ) { @@ -1169,6 +1172,7 @@ private void cleanupUnlinkedRootAndIndicesBlobs( foundIndices, rootBlobs, updatedRepoData, + oldRepoData, remoteStoreLockManagerFactory, ActionListener.map(listener, ignored -> null) ); @@ -1532,6 +1536,7 @@ private void cleanupStaleBlobs( Map foundIndices, Map rootBlobs, RepositoryData newRepoData, + RepositoryData oldRepoData, RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, ActionListener listener ) { @@ -1558,7 +1563,14 @@ private void cleanupStaleBlobs( if (foundIndices.keySet().equals(survivingIndexIds)) { groupedListener.onResponse(DeleteResult.ZERO); } else { - cleanupStaleIndices(foundIndices, survivingIndexIds, remoteStoreLockManagerFactory, groupedListener); + cleanupStaleIndices( + foundIndices, + survivingIndexIds, + remoteStoreLockManagerFactory, + groupedListener, + oldRepoData, + deletedSnapshots + ); } } @@ -1612,6 +1624,7 @@ public void cleanup( foundIndices, rootBlobs, repositoryData, + repositoryData, remoteStoreLockManagerFactory, ActionListener.map(listener, RepositoryCleanupResult::new) ), @@ -1705,7 +1718,9 @@ private void cleanupStaleIndices( Map foundIndices, Set survivingIndexIds, RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, - GroupedActionListener listener + GroupedActionListener listener, + RepositoryData oldRepoData, + Collection deletedSnapshots ) { final GroupedActionListener groupedListener = new GroupedActionListener<>(ActionListener.wrap(deleteResults -> { DeleteResult deleteResult = DeleteResult.ZERO; @@ -1729,7 +1744,13 @@ private void cleanupStaleIndices( foundIndices.size() - survivingIndexIds.size() ); for (int i = 0; i < workers; ++i) { - executeOneStaleIndexDelete(staleIndicesToDelete, remoteStoreLockManagerFactory, groupedListener); + executeOneStaleIndexDelete( + staleIndicesToDelete, + remoteStoreLockManagerFactory, + groupedListener, + oldRepoData, + deletedSnapshots + ); } } catch (Exception e) { // TODO: We shouldn't be blanket catching and suppressing all exceptions here and instead handle them safely upstream. @@ -1752,7 +1773,9 @@ private static boolean isIndexPresent(ClusterService clusterService, String inde private void executeOneStaleIndexDelete( BlockingQueue> staleIndicesToDelete, RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, - GroupedActionListener listener + GroupedActionListener listener, + RepositoryData oldRepoData, + Collection deletedSnapshots ) throws InterruptedException { Map.Entry indexEntry = staleIndicesToDelete.poll(0L, TimeUnit.MILLISECONDS); if (indexEntry != null) { @@ -1776,9 +1799,41 @@ private void executeOneStaleIndexDelete( } } } + } else { + String indexToBeDeleted = indexEntry.getKey(); + // The indices map within RepositoryData should have just 1 IndexId which has same id as the one + // being deleted. Hence we are adding an assertion here. We still let the deletion happen as + // usual considering there can be more than 1 matching IndexIds. + List indexIds = oldRepoData.getIndices() + .values() + .stream() + .filter(idx -> idx.getId().equals(indexToBeDeleted)) + .collect(Collectors.toList()); + if (indexIds.size() > 1) { + logger.warn("There are more than 1 matching index ids [{}]", indexIds); + } + assert indexIds.size() == 1 : "There should be exact 1 match of IndexId"; + for (SnapshotId snId : deletedSnapshots) { + for (IndexId idx : indexIds) { + String indexMetaGeneration = oldRepoData.indexMetaDataGenerations().indexMetaBlobId(snId, idx); + final BlobContainer indexContainer = indexContainer(idx); + IndexMetadata indexMetadata = INDEX_METADATA_FORMAT.read( + indexContainer, + indexMetaGeneration, + namedXContentRegistry + ); + int numOfShards = indexMetadata.getNumberOfShards(); + for (int i = 0; i < numOfShards; i++) { + deleteResult.add(shardContainer(idx, i).delete()); + } + } + } } + // TODO - We need to do a metadata lookup and delete the shard level folders. + // TODO - Shallow snapshot only has shallow snap file. Need to check if there will be more changes + // to handle shallow snapshot deletion. // Deleting the index folder - deleteResult = indexEntry.getValue().delete(); + deleteResult.add(indexEntry.getValue().delete()); logger.debug("[{}] Cleaned up stale index [{}]", metadata.name(), indexSnId); } catch (IOException e) { logger.warn( @@ -1795,7 +1850,7 @@ private void executeOneStaleIndexDelete( logger.warn(new ParameterizedMessage("[{}] Exception during single stale index delete", metadata.name()), e); } - executeOneStaleIndexDelete(staleIndicesToDelete, remoteStoreLockManagerFactory, listener); + executeOneStaleIndexDelete(staleIndicesToDelete, remoteStoreLockManagerFactory, listener, oldRepoData, deletedSnapshots); return deleteResult; })); }