Skip to content

Commit

Permalink
[PLAT-11299]: Backup deletion improvements
Browse files Browse the repository at this point in the history
Summary:
Added the changes to have 3 retries before actually calling out a backup deletion failure.
In the case of GCS backup deletion, we will now create batches of size 100 only, which does increase the backup deletion time, but it is mentioned as the best practice in Google Docs: https://cloud.google.com/storage/docs/batch
```
You should not include more than 100 calls in a single batch request. If you need to make more calls than that, use multiple batch requests. The total batch request payload must be less than 10MB.
```

On side note: I was able to verify that if we try to delete non-null zero blobs list, we get this below error, and fixed it by adding a check to return from func if blob size is zero.
```
java.lang.IllegalStateException: null
        at com.google.common.base.Preconditions.checkState(Preconditions.java:496)
        at com.google.api.client.util.Preconditions.checkState(Preconditions.java:79)
        at com.google.api.client.googleapis.batch.BatchRequest.execute(BatchRequest.java:231)
        at com.google.cloud.storage.spi.v1.HttpStorageRpc$DefaultRpcBatch.submit(HttpStorageRpc.java:205)
        at com.google.cloud.storage.StorageBatch.submit(StorageBatch.java:149)
```

Test Plan:
Tested manually by
 - failing the backups deletion once to check that backup deletion failed state is updated only after 3 retries.
 - deleting empty backup and it passed.
 - deleting a backup of 10000 obect by creating more than 4000 tables.

Reviewers: vpatibandla, vkumar, kkg, #yba-api-review!

Reviewed By: vpatibandla

Subscribers: yugaware

Differential Revision: https://phorge.dev.yugabyte.com/D30425
  • Loading branch information
vipul-yb committed Nov 27, 2023
1 parent 03e91df commit 8483e3d
Show file tree
Hide file tree
Showing 8 changed files with 171 additions and 73 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ public class BackupGarbageCollector {
private static final String GCS = Util.GCS;
private static final String S3 = Util.S3;
private static final String NFS = Util.NFS;
private static final int BACKUP_DELETION_MAX_RETRIES_COUNT = 3;

public static final Gauge DELETE_BACKUP_FAILURE =
Gauge.build("ybp_delete_backup_failure", "Count of failed delete backup attempt")
Expand Down Expand Up @@ -106,6 +107,7 @@ public BackupGarbageCollector(

public void start() {
Duration gcInterval = this.gcRunInterval();
handleDeleteInProgressBackups();
platformScheduler.schedule(
getClass().getSimpleName(), Duration.ZERO, gcInterval, this::scheduleRunner);
}
Expand All @@ -118,6 +120,18 @@ private int getDeleteExpiredBackupMaxGCCount() {
return confGetter.getGlobalConf(GlobalConfKeys.deleteExpiredBackupMaxGCSize);
}

public void handleDeleteInProgressBackups() {
for (Customer customer : Customer.getAll()) {
List<Backup> deleteInProgressBackups =
Backup.findAllBackupWithState(
customer.getUuid(), Collections.singletonList(BackupState.DeleteInProgress));
if (deleteInProgressBackups != null) {
deleteInProgressBackups.forEach(
backup -> backup.transitionState(BackupState.QueuedForDeletion));
}
}
}

void scheduleRunner() {
log.info("Running Backup Garbage Collector");
try {
Expand Down Expand Up @@ -307,11 +321,27 @@ public synchronized boolean deleteBackup(Customer customer, UUID backupUUID) {
case AZ:
CloudUtil cloudUtil = storageUtilFactory.getCloudUtil(customerConfig.getName());
backupLocations = BackupUtil.getBackupLocations(backup);
cloudUtil.deleteKeyIfExists(customerConfig.getDataObject(), backupLocations.get(0));
cloudUtil.deleteStorage(customerConfig.getDataObject(), backupLocations);
backup.delete();
deletedSuccessfully = true;
log.info("Backup {} is successfully deleted", backupUUID);
int numRetries = 0;
long sleepTimeInMilliSeconds = 5000;
while (numRetries < BACKUP_DELETION_MAX_RETRIES_COUNT && !deletedSuccessfully) {
if (cloudUtil.deleteKeyIfExists(
customerConfig.getDataObject(), backupLocations.get(0))
&& cloudUtil.deleteStorage(customerConfig.getDataObject(), backupLocations)) {
deletedSuccessfully = true;
}
if (!deletedSuccessfully) {
Thread.sleep(sleepTimeInMilliSeconds);
sleepTimeInMilliSeconds = sleepTimeInMilliSeconds * 2;
}
numRetries++;
}
if (deletedSuccessfully) {
backup.delete();
log.info("Backup {} is successfully deleted", backupUUID);
} else {
backup.transitionState(Backup.BackupState.FailedToDelete);
log.info("Backup {} deletion failed", backupUUID);
}
break;
case NFS:
if (isUniversePresent(backup)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,18 +94,28 @@ public void run() {
case GCS:
case AZ:
for (Backup backup : backupList) {
boolean success = true;
try {
CloudUtil cloudUtil = cloudUtilFactory.getCloudUtil(customerConfig.getName());
backupLocations = BackupUtil.getBackupLocations(backup);
cloudUtil.deleteKeyIfExists(
customerConfig.getDataObject(), backupLocations.get(0));
cloudUtil.deleteStorage(customerConfig.getDataObject(), backupLocations);
success =
success
&& cloudUtil.deleteKeyIfExists(
customerConfig.getDataObject(), backupLocations.get(0));
if (success) {
success =
success
&& cloudUtil.deleteStorage(
customerConfig.getDataObject(), backupLocations);
}
} catch (Exception e) {
success = false;
log.error(" Error in deleting backup " + backup.getBackupUUID().toString(), e);
backup.transitionState(Backup.BackupState.FailedToDelete);
} finally {
if (backup.getState() != Backup.BackupState.FailedToDelete) {
if (success) {
backup.delete();
} else {
backup.transitionState(Backup.BackupState.FailedToDelete);
}
}
}
Expand Down
21 changes: 10 additions & 11 deletions managed/src/main/java/com/yugabyte/yw/common/AWSUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,7 @@ public void checkStoragePrefixValidity(String configLocation, String backupLocat
}

@Override
public void deleteKeyIfExists(CustomerConfigData configData, String defaultBackupLocation)
throws Exception {
public boolean deleteKeyIfExists(CustomerConfigData configData, String defaultBackupLocation) {
String[] splitLocation = getSplitLocationValue(defaultBackupLocation);
String bucketName = splitLocation[0];
String objectPrefix = splitLocation[1];
Expand All @@ -230,17 +229,17 @@ public void deleteKeyIfExists(CustomerConfigData configData, String defaultBacku
ListObjectsV2Result listObjectsResult = s3Client.listObjectsV2(bucketName, keyLocation);
if (listObjectsResult.getKeyCount() == 0) {
log.info("Specified Location " + keyLocation + " does not contain objects");
return;
} else {
log.debug("Retrieved blobs info for bucket " + bucketName + " with prefix " + keyLocation);
retrieveAndDeleteObjects(listObjectsResult, bucketName, s3Client);
}
} catch (AmazonS3Exception e) {
log.error("Error while deleting key object from bucket " + bucketName, e.getErrorMessage());
throw e;
} catch (Exception e) {
log.error("Error while deleting key object at location: {}", keyLocation, e);
return false;
} finally {
maybeEnableCertVerification();
}
return true;
}

// For S3 location: s3://bucket/suffix
Expand Down Expand Up @@ -420,8 +419,7 @@ public String getOrCreateHostBase(
}

@Override
public void deleteStorage(CustomerConfigData configData, List<String> backupLocations)
throws Exception {
public boolean deleteStorage(CustomerConfigData configData, List<String> backupLocations) {
for (String backupLocation : backupLocations) {
try {
maybeDisableCertVerification();
Expand Down Expand Up @@ -450,13 +448,14 @@ public void deleteStorage(CustomerConfigData configData, List<String> backupLoca
"Retrieved blobs info for bucket " + bucketName + " with prefix " + objectPrefix);
retrieveAndDeleteObjects(listObjectsResult, bucketName, s3Client);
} while (nextContinuationToken != null);
} catch (AmazonS3Exception e) {
log.error(" Error in deleting objects at location " + backupLocation, e.getErrorMessage());
throw e;
} catch (Exception e) {
log.error(" Error in deleting objects at location " + backupLocation, e);
return false;
} finally {
maybeEnableCertVerification();
}
}
return true;
}

@Override
Expand Down
27 changes: 13 additions & 14 deletions managed/src/main/java/com/yugabyte/yw/common/AZUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,7 @@ public void checkStoragePrefixValidity(String configLocation, String backupLocat
}

@Override
public void deleteKeyIfExists(CustomerConfigData configData, String defaultBackupLocation)
throws Exception {
public boolean deleteKeyIfExists(CustomerConfigData configData, String defaultBackupLocation) {
CustomerConfigStorageAzureData azData = (CustomerConfigStorageAzureData) configData;
String[] splitLocation = getSplitLocationValue(defaultBackupLocation);
String azureUrl = "https://" + splitLocation[0];
Expand All @@ -150,12 +149,12 @@ public void deleteKeyIfExists(CustomerConfigData configData, String defaultBacku
retrieveAndDeleteObjects(pagedResponse, blobContainerClient);
} else {
log.info("Specified Location " + keyLocation + " does not contain objects");
return;
}
} catch (BlobStorageException e) {
log.error("Error while deleting key object from container " + container, e.getMessage());
throw e;
} catch (Exception e) {
log.error("Error while deleting key object at location: {}", keyLocation, e);
return false;
}
return true;
}

// Returns a map for <container_url, SAS token>
Expand Down Expand Up @@ -278,8 +277,7 @@ public BlobContainerClient createBlobContainerClient(String sasToken, String loc
}

@Override
public void deleteStorage(CustomerConfigData configData, List<String> backupLocations)
throws Exception {
public boolean deleteStorage(CustomerConfigData configData, List<String> backupLocations) {
CustomerConfigStorageAzureData azData = (CustomerConfigStorageAzureData) configData;
Map<String, String> containerTokenMap = getContainerTokenMap(azData);
for (String backupLocation : backupLocations) {
Expand All @@ -291,7 +289,8 @@ public void deleteStorage(CustomerConfigData configData, List<String> backupLoca
String containerEndpoint = String.format("%s/%s", azureUrl, container);
String sasToken = containerTokenMap.get(containerEndpoint);
if (StringUtils.isEmpty(sasToken)) {
throw new Exception(String.format("No SAS token for given location %s", backupLocation));
log.error("No SAS token for given location {}", backupLocation);
return false;
}
BlobContainerClient blobContainerClient =
createBlobContainerClient(azureUrl, sasToken, container);
Expand All @@ -301,16 +300,16 @@ public void deleteStorage(CustomerConfigData configData, List<String> backupLoca
Iterator<PagedResponse<BlobItem>> pagedResponse = pagedIterable.iterableByPage().iterator();
log.debug("Retrieved blobs info for container " + container + " with prefix " + blob);
retrieveAndDeleteObjects(pagedResponse, blobContainerClient);
} catch (BlobStorageException e) {
log.error(" Error in deleting objects at location " + backupLocation, e.getMessage());
throw e;
} catch (Exception e) {
log.error(" Error in deleting objects at location " + backupLocation, e);
return false;
}
}
return true;
}

public static void retrieveAndDeleteObjects(
Iterator<PagedResponse<BlobItem>> pagedResponse, BlobContainerClient blobContainerClient)
throws Exception {
Iterator<PagedResponse<BlobItem>> pagedResponse, BlobContainerClient blobContainerClient) {
while (pagedResponse.hasNext()) {
PagedResponse<BlobItem> response = pagedResponse.next();
BlobClient blobClient;
Expand Down
6 changes: 2 additions & 4 deletions managed/src/main/java/com/yugabyte/yw/common/CloudUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,9 @@ public static enum Protocol {

public ConfigLocationInfo getConfigLocationInfo(String location);

public void deleteKeyIfExists(CustomerConfigData configData, String defaultBackupLocation)
throws Exception;
public boolean deleteKeyIfExists(CustomerConfigData configData, String defaultBackupLocation);

public void deleteStorage(CustomerConfigData configData, List<String> backupLocations)
throws Exception;
public boolean deleteStorage(CustomerConfigData configData, List<String> backupLocations);

public <T> T listBuckets(CustomerConfigData configData);

Expand Down
Loading

0 comments on commit 8483e3d

Please sign in to comment.