Skip to content

Commit

Permalink
RIAv2 async operations controller work
Browse files Browse the repository at this point in the history
Signed-off-by: Scott Seago <sseago@redhat.com>
  • Loading branch information
sseago committed Mar 17, 2023
1 parent 117d5e8 commit 2155b2b
Show file tree
Hide file tree
Showing 33 changed files with 1,478 additions and 79 deletions.
1 change: 1 addition & 0 deletions changelogs/unreleased/5993-sseago
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
RIAv2 async operations controller work
18 changes: 18 additions & 0 deletions config/crd/v1/bases/velero.io_restores.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,10 @@ spec:
type: string
nullable: true
type: array
itemOperationTimeout:
description: ItemOperationTimeout specifies the time used to wait
for RestoreItemAction operations The default value is 1 hour.
type: string
labelSelector:
description: LabelSelector is a metav1.LabelSelector to filter with
when restoring individual objects from the backup. If empty or nil,
Expand Down Expand Up @@ -434,6 +438,20 @@ spec:
due to plugins that return additional related items to restore
type: integer
type: object
restoreItemOperationsAttempted:
description: RestoreItemOperationsAttempted is the total number of
attempted async RestoreItemAction operations for this restore.
type: integer
restoreItemOperationsCompleted:
description: RestoreItemOperationsCompleted is the total number of
successfully completed async RestoreItemAction operations for this
restore.
type: integer
restoreItemOperationsFailed:
description: RestoreItemOperationsFailed is the total number of async
RestoreItemAction operations for this restore which ended with an
error.
type: integer
startTimestamp:
description: StartTimestamp records the time the restore operation
was started. The server's time is used for StartTimestamps
Expand Down
2 changes: 1 addition & 1 deletion config/crd/v1/crds/crds.go

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,12 @@ rules:
- get
- patch
- update
- apiGroups:
- velero.io
resources:
- restorestoragelocations
verbs:
- get
- apiGroups:
- velero.io
resources:
Expand Down
21 changes: 21 additions & 0 deletions design/riav2-design.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,27 @@ message OperationProgress {
}
```

In addition to the three new rpc methods added to the RestoreItemAction interface, there is also a new `Name()` method. This one is only actually used internally by Velero to get the name that the plugin was registered with, but it still must be defined in a plugin which implements RestoreItemActionV2 in order to implement the interface. It doesn't really matter what it returns, though, as this particular method is not delegated to the plugin via RPC calls. The new (and modified) interface methods for `RestoreItemAction` are as follows:
```
type BackupItemAction interface {
...
Name() string
...
Progress(operationID string, restore *api.Restore) (velero.OperationProgress, error)
Cancel(operationID string, backup *api.Restore) error
AreAdditionalItemsReady(AdditionalItems []velero.ResourceIdentifier, restore *api.Restore) (bool, error)
...
}
type RestoreItemActionExecuteOutput struct {
UpdatedItem runtime.Unstructured
AdditionalItems []ResourceIdentifier
SkipRestore bool
OperationID string
WaitForAdditionalItems bool
}
```

A new PluginKind, `RestoreItemActionV2`, will be created, and the restore process will be modified to use this plugin kind.
See [Plugin Versioning](plugin-versioning.md) for more details on implementation plans, including v1 adapters, etc.

Expand Down
20 changes: 20 additions & 0 deletions pkg/apis/velero/v1/restore_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,11 @@ type RestoreSpec struct {
// +optional
// +nullable
ExistingResourcePolicy PolicyType `json:"existingResourcePolicy,omitempty"`

// ItemOperationTimeout specifies the time used to wait for RestoreItemAction operations
// The default value is 1 hour.
// +optional
ItemOperationTimeout metav1.Duration `json:"itemOperationTimeout,omitempty"`
}

// RestoreHooks contains custom behaviors that should be executed during or post restore.
Expand Down Expand Up @@ -314,6 +319,21 @@ type RestoreStatus struct {
// +optional
// +nullable
Progress *RestoreProgress `json:"progress,omitempty"`

// RestoreItemOperationsAttempted is the total number of attempted
// async RestoreItemAction operations for this restore.
// +optional
RestoreItemOperationsAttempted int `json:"restoreItemOperationsAttempted,omitempty"`

// RestoreItemOperationsCompleted is the total number of successfully completed
// async RestoreItemAction operations for this restore.
// +optional
RestoreItemOperationsCompleted int `json:"restoreItemOperationsCompleted,omitempty"`

// RestoreItemOperationsFailed is the total number of async
// RestoreItemAction operations for this restore which ended with an error.
// +optional
RestoreItemOperationsFailed int `json:"restoreItemOperationsFailed,omitempty"`
}

// RestoreProgress stores information about the restore's execution progress
Expand Down
1 change: 1 addition & 0 deletions pkg/apis/velero/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions pkg/builder/restore_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,9 @@ func (b *RestoreBuilder) CompletionTimestamp(val time.Time) *RestoreBuilder {
b.object.Status.CompletionTimestamp = &metav1.Time{Time: val}
return b
}

// ItemOperationTimeout sets the Restore's ItemOperationTimeout
func (b *RestoreBuilder) ItemOperationTimeout(timeout time.Duration) *RestoreBuilder {
b.object.Spec.ItemOperationTimeout.Duration = timeout
return b
}
5 changes: 5 additions & 0 deletions pkg/cmd/cli/restore/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ type CreateOptions struct {
IncludeClusterResources flag.OptionalBool
Wait bool
AllowPartiallyFailed flag.OptionalBool
ItemOperationTimeout time.Duration

client veleroclient.Interface
}
Expand Down Expand Up @@ -120,6 +121,7 @@ func (o *CreateOptions) BindFlags(flags *pflag.FlagSet) {
flags.Var(&o.StatusIncludeResources, "status-include-resources", "Resources to include in the restore status, formatted as resource.group, such as storageclasses.storage.k8s.io.")
flags.Var(&o.StatusExcludeResources, "status-exclude-resources", "Resources to exclude from the restore status, formatted as resource.group, such as storageclasses.storage.k8s.io.")
flags.VarP(&o.Selector, "selector", "l", "Only restore resources matching this label selector.")
flags.DurationVar(&o.ItemOperationTimeout, "item-operation-timeout", o.ItemOperationTimeout, "How long to wait for async plugin operations before timeout.")
f := flags.VarPF(&o.RestoreVolumes, "restore-volumes", "", "Whether to restore volumes from snapshots.")
// this allows the user to just specify "--restore-volumes" as shorthand for "--restore-volumes=true"
// like a normal bool flag
Expand Down Expand Up @@ -280,6 +282,9 @@ func (o *CreateOptions) Run(c *cobra.Command, f client.Factory) error {
RestorePVs: o.RestoreVolumes.Value,
PreserveNodePorts: o.PreserveNodePorts.Value,
IncludeClusterResources: o.IncludeClusterResources.Value,
ItemOperationTimeout: metav1.Duration{
Duration: o.ItemOperationTimeout,
},
},
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/cmd/cli/restore/logs.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ func NewLogsCommand(f client.Factory) *cobra.Command {
}

switch restore.Status.Phase {
case velerov1api.RestorePhaseCompleted, velerov1api.RestorePhaseFailed, velerov1api.RestorePhasePartiallyFailed:
// terminal phases, don't exit.
case velerov1api.RestorePhaseCompleted, velerov1api.RestorePhaseFailed, velerov1api.RestorePhasePartiallyFailed, velerov1api.RestorePhaseWaitingForPluginOperations, velerov1api.RestorePhaseWaitingForPluginOperationsPartiallyFailed:
// terminal and waiting for plugin operations phases, don't exit.
default:
cmd.Exit("Logs for restore %q are not available until it's finished processing. Please wait "+
"until the restore has a phase of Completed or Failed and try again.", restoreName)
Expand Down
20 changes: 20 additions & 0 deletions pkg/cmd/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
controller.DownloadRequest: {},
controller.GarbageCollection: {},
controller.Restore: {},
controller.RestoreOperations: {},
controller.Schedule: {},
controller.ServerStatusRequest: {},
}
Expand Down Expand Up @@ -831,6 +832,23 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
}
}

restoreOpsMap := itemoperationmap.NewRestoreItemOperationsMap()
if _, ok := enabledRuntimeControllers[controller.RestoreOperations]; ok {
r := controller.NewRestoreOperationsReconciler(
s.logger,
s.namespace,
s.mgr.GetClient(),
s.config.itemOperationSyncFrequency,
newPluginManager,
backupStoreGetter,
s.metrics,
restoreOpsMap,
)
if err := r.SetupWithManager(s.mgr); err != nil {
s.logger.Fatal(err, "unable to create controller", "controller", controller.BackupOperations)
}
}

if _, ok := enabledRuntimeControllers[controller.DownloadRequest]; ok {
r := controller.NewDownloadRequestReconciler(
s.mgr.GetClient(),
Expand All @@ -839,6 +857,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
backupStoreGetter,
s.logger,
backupOpsMap,
restoreOpsMap,
)
if err := r.SetupWithManager(s.mgr); err != nil {
s.logger.Fatal(err, "unable to create controller", "controller", controller.DownloadRequest)
Expand Down Expand Up @@ -890,6 +909,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
backupStoreGetter,
s.metrics,
s.config.formatFlag.Parse(),
s.config.defaultItemOperationTimeout,
)

if err = r.SetupWithManager(s.mgr); err != nil {
Expand Down
59 changes: 59 additions & 0 deletions pkg/cmd/util/output/restore_describer.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/cmd/util/downloadrequest"
clientset "github.com/vmware-tanzu/velero/pkg/generated/clientset/versioned"
"github.com/vmware-tanzu/velero/pkg/itemoperation"
"github.com/vmware-tanzu/velero/pkg/util/results"
)

Expand Down Expand Up @@ -154,18 +155,48 @@ func DescribeRestore(ctx context.Context, kbClient kbclient.Client, restore *vel
s = string(restore.Spec.ExistingResourcePolicy)
}
d.Printf("Existing Resource Policy: \t%s\n", s)
d.Printf("ItemOperationTimeout:\t%s\n", restore.Spec.ItemOperationTimeout.Duration)

d.Println()
d.Printf("Preserve Service NodePorts:\t%s\n", BoolPointerString(restore.Spec.PreserveNodePorts, "false", "true", "auto"))

d.Println()
describeRestoreItemOperations(ctx, kbClient, d, restore, details, insecureSkipTLSVerify, caCertFile)

if details {
describeRestoreResourceList(ctx, kbClient, d, restore, insecureSkipTLSVerify, caCertFile)
d.Println()
}
})
}

func describeRestoreItemOperations(ctx context.Context, kbClient kbclient.Client, d *Describer, restore *velerov1api.Restore, details bool, insecureSkipTLSVerify bool, caCertPath string) {
status := restore.Status
if status.RestoreItemOperationsAttempted > 0 {
if !details {
d.Printf("Restore Item Operations:\t%d of %d completed successfully, %d failed (specify --details for more information)\n", status.RestoreItemOperationsCompleted, status.RestoreItemOperationsAttempted, status.RestoreItemOperationsFailed)
return
}

buf := new(bytes.Buffer)
if err := downloadrequest.Stream(ctx, kbClient, restore.Namespace, restore.Name, velerov1api.DownloadTargetKindRestoreItemOperations, buf, downloadRequestTimeout, insecureSkipTLSVerify, caCertPath); err != nil {
d.Printf("Restore Item Operations:\t<error getting operation info: %v>\n", err)
return
}

var operations []*itemoperation.RestoreOperation
if err := json.NewDecoder(buf).Decode(&operations); err != nil {
d.Printf("Restore Item Operations:\t<error reading operation info: %v>\n", err)
return
}

d.Printf("Restore Item Operations:\n")
for _, operation := range operations {
describeRestoreItemOperation(d, operation)
}
}
}

func describeRestoreResults(ctx context.Context, kbClient kbclient.Client, d *Describer, restore *velerov1api.Restore, insecureSkipTLSVerify bool, caCertPath string) {
if restore.Status.Warnings == 0 && restore.Status.Errors == 0 {
return
Expand Down Expand Up @@ -208,6 +239,34 @@ func describeResult(d *Describer, name string, result results.Result) {
}
}

func describeRestoreItemOperation(d *Describer, operation *itemoperation.RestoreOperation) {
d.Printf("\tOperation for %s %s/%s:\n", operation.Spec.ResourceIdentifier, operation.Spec.ResourceIdentifier.Namespace, operation.Spec.ResourceIdentifier.Name)
d.Printf("\t\tRestore Item Action Plugin:\t%s\n", operation.Spec.RestoreItemAction)
d.Printf("\t\tOperation ID:\t%s\n", operation.Spec.OperationID)
d.Printf("\t\tPhase:\t%s\n", operation.Status.Phase)
if operation.Status.Error != "" {
d.Printf("\t\tOperation Error:\t%s\n", operation.Status.Error)
}
if operation.Status.NTotal > 0 || operation.Status.NCompleted > 0 {
d.Printf("\t\tProgress:\t%v of %v complete (%s)\n",
operation.Status.NCompleted,
operation.Status.NTotal,
operation.Status.OperationUnits)
}
if operation.Status.Description != "" {
d.Printf("\t\tProgress description:\t%s\n", operation.Status.Description)
}
if operation.Status.Created != nil {
d.Printf("\t\tCreated:\t%s\n", operation.Status.Created.String())
}
if operation.Status.Started != nil {
d.Printf("\t\tStarted:\t%s\n", operation.Status.Started.String())
}
if operation.Status.Updated != nil {
d.Printf("\t\tUpdated:\t%s\n", operation.Status.Updated.String())
}
}

// describePodVolumeRestores describes pod volume restores in human-readable format.
func describePodVolumeRestores(d *Describer, restores []velerov1api.PodVolumeRestore, details bool) {
// Get the type of pod volume uploader. Since the uploader only comes from a single source, we can
Expand Down
6 changes: 4 additions & 2 deletions pkg/controller/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ limitations under the License.
package controller

const (
BackupOperations = "backup-operations"
Backup = "backup"
BackupOperations = "backup-operations"
BackupDeletion = "backup-deletion"
BackupFinalizer = "backup-finalizer"
BackupRepo = "backup-repo"
Expand All @@ -29,21 +29,23 @@ const (
PodVolumeBackup = "pod-volume-backup"
PodVolumeRestore = "pod-volume-restore"
Restore = "restore"
RestoreOperations = "restore-operations"
Schedule = "schedule"
ServerStatusRequest = "server-status-request"
)

// DisableableControllers is a list of controllers that can be disabled
var DisableableControllers = []string{
BackupOperations,
Backup,
BackupOperations,
BackupDeletion,
BackupFinalizer,
BackupSync,
DownloadRequest,
GarbageCollection,
BackupRepo,
Restore,
RestoreOperations,
Schedule,
ServerStatusRequest,
}
26 changes: 19 additions & 7 deletions pkg/controller/download_request_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ type downloadRequestReconciler struct {

// used to force update of async backup item operations before processing download request
backupItemOperationsMap *itemoperationmap.BackupItemOperationsMap
// used to force update of async restore item operations before processing download request
restoreItemOperationsMap *itemoperationmap.RestoreItemOperationsMap

log logrus.FieldLogger
}
Expand All @@ -56,14 +58,16 @@ func NewDownloadRequestReconciler(
backupStoreGetter persistence.ObjectBackupStoreGetter,
log logrus.FieldLogger,
backupItemOperationsMap *itemoperationmap.BackupItemOperationsMap,
restoreItemOperationsMap *itemoperationmap.RestoreItemOperationsMap,
) *downloadRequestReconciler {
return &downloadRequestReconciler{
client: client,
clock: clock,
newPluginManager: newPluginManager,
backupStoreGetter: backupStoreGetter,
backupItemOperationsMap: backupItemOperationsMap,
log: log,
client: client,
clock: clock,
newPluginManager: newPluginManager,
backupStoreGetter: backupStoreGetter,
backupItemOperationsMap: backupItemOperationsMap,
restoreItemOperationsMap: restoreItemOperationsMap,
log: log,
}
}

Expand Down Expand Up @@ -129,7 +133,8 @@ func (r *downloadRequestReconciler) Reconcile(ctx context.Context, req ctrl.Requ

if downloadRequest.Spec.Target.Kind == velerov1api.DownloadTargetKindRestoreLog ||
downloadRequest.Spec.Target.Kind == velerov1api.DownloadTargetKindRestoreResults ||
downloadRequest.Spec.Target.Kind == velerov1api.DownloadTargetKindRestoreResourceList {
downloadRequest.Spec.Target.Kind == velerov1api.DownloadTargetKindRestoreResourceList ||
downloadRequest.Spec.Target.Kind == velerov1api.DownloadTargetKindRestoreItemOperations {
restore := &velerov1api.Restore{}
if err := r.client.Get(ctx, kbclient.ObjectKey{
Namespace: downloadRequest.Namespace,
Expand Down Expand Up @@ -172,6 +177,13 @@ func (r *downloadRequestReconciler) Reconcile(ctx context.Context, req ctrl.Requ
// ignore errors here. If we can't upload anything here, process the download as usual
_ = r.backupItemOperationsMap.UpdateForBackup(backupStore, backupName)
}
// If this is a request for restore item operations, force upload of in-memory operations that
// are not yet uploaded (if there are any)
if downloadRequest.Spec.Target.Kind == velerov1api.DownloadTargetKindRestoreItemOperations &&
r.restoreItemOperationsMap != nil {
// ignore errors here. If we can't upload anything here, process the download as usual
_ = r.restoreItemOperationsMap.UpdateForRestore(backupStore, downloadRequest.Spec.Target.Name)
}
if downloadRequest.Status.DownloadURL, err = backupStore.GetDownloadURL(downloadRequest.Spec.Target); err != nil {
return ctrl.Result{Requeue: true}, errors.WithStack(err)
}
Expand Down
Loading

0 comments on commit 2155b2b

Please sign in to comment.