Skip to content

Commit

Permalink
fix(recurringjob): keep snapshots for the backup
Browse files Browse the repository at this point in the history
Add a new global setting that allows users to keep the retain
number of snapshots when the recurring job does a backup.

Ref: 2997

Signed-off-by: James Lu <james.lu@suse.com>
  • Loading branch information
mantissahz authored and David Ko committed Nov 28, 2023
1 parent dd66388 commit 16c09fd
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 2 deletions.
25 changes: 23 additions & 2 deletions app/recurring_job.go
Original file line number Diff line number Diff line change
Expand Up @@ -565,11 +565,18 @@ func (job *Job) filterExpiredSnapshotsOfCurrentRecurringJob(snapshotCRs []longho
// Only consider deleting the snapshots that were created by our current job
snapshotCRs = filterSnapshotCRsWithLabel(snapshotCRs, types.RecurringJobLabel, jobLabel)

if job.task == longhorn.RecurringJobTypeSnapshot || job.task == longhorn.RecurringJobTypeSnapshotForceCreate {
allowBackupSnapshotDeleted, err := job.GetSettingAsBool(types.SettingNameAutoCleanupRecurringJobBackupSnapshot)
if err != nil {
job.logger.WithError(err).Warnf("Failed to get the setting %v", types.SettingNameAutoCleanupRecurringJobBackupSnapshot)
return []string{}
}

// For recurring snapshot job and AutoCleanupRecurringJobBackupSnapshot is disabled, keeps the number of the snapshots as job.retain.
if job.task == longhorn.RecurringJobTypeSnapshot || job.task == longhorn.RecurringJobTypeSnapshotForceCreate || !allowBackupSnapshotDeleted {
return filterExpiredItems(snapshotCRsToNameWithTimestamps(snapshotCRs), job.retain)
}

// For the recurring backup job, only keep the snapshot of the last backup and the current snapshot
// For the recurring backup job, only keep the snapshot of the last backup and the current snapshot when AutoCleanupRecurringJobBackupSnapshot is enabled.
retainingSnapshotCRs := map[string]struct{}{job.snapshotName: {}}
if !backupDone {
lastBackup, err := job.getLastBackup()
Expand Down Expand Up @@ -785,6 +792,20 @@ func (job *Job) UpdateVolumeStatus(v *longhorn.Volume) (*longhorn.Volume, error)
return job.lhClient.LonghornV1beta2().Volumes(job.namespace).UpdateStatus(context.TODO(), v, metav1.UpdateOptions{})
}

// GetSettingAsBool returns boolean of the setting value searching by name.
func (job *Job) GetSettingAsBool(name types.SettingName) (bool, error) {
obj, err := job.lhClient.LonghornV1beta2().Settings(job.namespace).Get(context.TODO(), string(name), metav1.GetOptions{})
if err != nil {
return false, err
}
value, err := strconv.ParseBool(obj.Value)
if err != nil {
return false, err
}

return value, nil
}

// waitForVolumeState timeout in second
func (job *Job) waitForVolumeState(state string, timeout int) (*longhornclient.Volume, error) {
volumeAPI := job.api.Volume
Expand Down
15 changes: 15 additions & 0 deletions types/setting.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ const (
SettingNameSystemManagedPodsImagePullPolicy = SettingName("system-managed-pods-image-pull-policy")
SettingNameAllowVolumeCreationWithDegradedAvailability = SettingName("allow-volume-creation-with-degraded-availability")
SettingNameAutoCleanupSystemGeneratedSnapshot = SettingName("auto-cleanup-system-generated-snapshot")
SettingNameAutoCleanupRecurringJobBackupSnapshot = SettingName("auto-cleanup-recurring-job-backup-snapshot")
SettingNameConcurrentAutomaticEngineUpgradePerNodeLimit = SettingName("concurrent-automatic-engine-upgrade-per-node-limit")
SettingNameBackingImageCleanupWaitInterval = SettingName("backing-image-cleanup-wait-interval")
SettingNameBackingImageRecoveryWaitInterval = SettingName("backing-image-recovery-wait-interval")
Expand Down Expand Up @@ -157,6 +158,7 @@ var (
SettingNameSystemManagedPodsImagePullPolicy,
SettingNameAllowVolumeCreationWithDegradedAvailability,
SettingNameAutoCleanupSystemGeneratedSnapshot,
SettingNameAutoCleanupRecurringJobBackupSnapshot,
SettingNameConcurrentAutomaticEngineUpgradePerNodeLimit,
SettingNameBackingImageCleanupWaitInterval,
SettingNameBackingImageRecoveryWaitInterval,
Expand Down Expand Up @@ -258,6 +260,7 @@ var (
SettingNameSystemManagedPodsImagePullPolicy: SettingDefinitionSystemManagedPodsImagePullPolicy,
SettingNameAllowVolumeCreationWithDegradedAvailability: SettingDefinitionAllowVolumeCreationWithDegradedAvailability,
SettingNameAutoCleanupSystemGeneratedSnapshot: SettingDefinitionAutoCleanupSystemGeneratedSnapshot,
SettingNameAutoCleanupRecurringJobBackupSnapshot: SettingDefinitionAutoCleanupRecurringJobBackupSnapshot,
SettingNameConcurrentAutomaticEngineUpgradePerNodeLimit: SettingDefinitionConcurrentAutomaticEngineUpgradePerNodeLimit,
SettingNameBackingImageCleanupWaitInterval: SettingDefinitionBackingImageCleanupWaitInterval,
SettingNameBackingImageRecoveryWaitInterval: SettingDefinitionBackingImageRecoveryWaitInterval,
Expand Down Expand Up @@ -809,6 +812,16 @@ var (
Default: "true",
}

SettingDefinitionAutoCleanupRecurringJobBackupSnapshot = SettingDefinition{
DisplayName: "Automatically Cleanup Recurring Job Backup Snapshot",
Description: "This setting enables Longhorn to automatically cleanup the snapshot generated by a recurring backup job.",
Category: SettingCategorySnapshot,
Type: SettingTypeBool,
Required: true,
ReadOnly: false,
Default: "true",
}

SettingDefinitionConcurrentAutomaticEngineUpgradePerNodeLimit = SettingDefinition{
DisplayName: "Concurrent Automatic Engine Upgrade Per Node Limit",
Description: "This setting controls how Longhorn automatically upgrades volumes' engines after upgrading Longhorn manager. " +
Expand Down Expand Up @@ -1242,6 +1255,8 @@ func ValidateSetting(name, value string) (err error) {
fallthrough
case SettingNameAutoCleanupSystemGeneratedSnapshot:
fallthrough
case SettingNameAutoCleanupRecurringJobBackupSnapshot:
fallthrough
case SettingNameAutoDeletePodWhenVolumeDetachedUnexpectedly:
fallthrough
case SettingNameKubernetesClusterAutoscalerEnabled:
Expand Down

0 comments on commit 16c09fd

Please sign in to comment.