Skip to content

Commit

Permalink
Merge 2abefce into e0b7929
Browse files Browse the repository at this point in the history
  • Loading branch information
matthchr authored Aug 4, 2021
2 parents e0b7929 + 2abefce commit e0dd5b6
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 112 deletions.
20 changes: 9 additions & 11 deletions hack/generated/controllers/generic_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ type GenericReconciler struct {
Name string
GVK schema.GroupVersionKind
Controller controller.Controller
RequeueDelay time.Duration
RequeueDelayOverride time.Duration
CreateDeploymentName func(obj metav1.Object) (string, error)
}

Expand All @@ -54,11 +54,6 @@ type Options struct {
}

func (options *Options) setDefaults() {
// default requeue delay to 5 seconds
if options.RequeueDelay == 0 {
options.RequeueDelay = 5 * time.Second
}

// override deployment name generator, if provided
if options.CreateDeploymentName == nil {
options.CreateDeploymentName = createDeploymentName
Expand Down Expand Up @@ -135,7 +130,7 @@ func register(mgr ctrl.Manager, reconciledResourceLookup map[schema.GroupKind]sc
Log: log.WithName(controllerName),
Recorder: mgr.GetEventRecorderFor(controllerName),
GVK: gvk,
RequeueDelay: options.RequeueDelay,
RequeueDelayOverride: options.RequeueDelay,
CreateDeploymentName: options.CreateDeploymentName,
}

Expand Down Expand Up @@ -225,10 +220,13 @@ func (gr *GenericReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
return ctrl.Result{}, err
}

// TODO: This actually means we don't get exponential backoff which Kubernetes does by default,
// TODO: but we need this for test today to get fast reconciles.
if result.Requeue && result.RequeueAfter == time.Duration(0) {
result.RequeueAfter = gr.RequeueDelay
// If we have a requeue delay override, set it for all situations where
// we are requeueing.
hasRequeueDelayOverride := gr.RequeueDelayOverride != time.Duration(0)
isRequeueing := result.Requeue || result.RequeueAfter > time.Duration(0)
if hasRequeueDelayOverride && isRequeueing {
result.RequeueAfter = gr.RequeueDelayOverride
result.Requeue = true
}

return result, nil
Expand Down
169 changes: 69 additions & 100 deletions hack/generated/pkg/reconcilers/azure_deployment_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"encoding/json"
"fmt"
"net/http"
"strconv"
"strings"
"time"

Expand Down Expand Up @@ -47,8 +46,6 @@ const (
ResourceStateAnnotation = "resource-state.infra.azure.com"
ResourceErrorAnnotation = "resource-error.infra.azure.com"
ResourceSigAnnotationKey = "resource-sig.infra.azure.com"
// PreserveDeploymentAnnotation is the key which tells the applier to keep or delete the deployment
PreserveDeploymentAnnotation = "x-preserve-deployment"
)

// TODO: Do we actually want this at the controller level or this level?
Expand Down Expand Up @@ -186,23 +183,6 @@ func (r *AzureDeploymentReconciler) SetDeploymentName(name string) {
genruntime.AddAnnotation(r.obj, DeploymentNameAnnotation, name)
}

func (r *AzureDeploymentReconciler) GetShouldPreserveDeployment() bool {
preserveDeploymentString, ok := r.obj.GetAnnotations()[PreserveDeploymentAnnotation]
if !ok {
return false
}

preserveDeployment, err := strconv.ParseBool(preserveDeploymentString)
// Anything other than an error is assumed to be false...
// TODO: Would we rather have any usage of this key imply true (regardless of value?)
if err != nil {
// TODO: Log here
return false
}

return preserveDeployment
}

func (r *AzureDeploymentReconciler) SetResourceProvisioningState(state armclient.ProvisioningState) {
// TODO: It's almost certainly not safe to use this as our serialized format as it's not guaranteed backwards compatible?
genruntime.AddAnnotation(r.obj, ResourceStateAnnotation, string(state))
Expand Down Expand Up @@ -497,7 +477,7 @@ func (r *AzureDeploymentReconciler) CreateDeployment(ctx context.Context) (ctrl.

err = r.CommitUpdate(ctx)
if err != nil {
// This is a superfluous error as per https://github.com/kubernetes-sigs/controller-runtime/issues/377
// NotFound is a superfluous error as per https://github.com/kubernetes-sigs/controller-runtime/issues/377
// The correct handling is just to ignore it and we will get an event shortly with the updated version to patch
return ctrl.Result{}, client.IgnoreNotFound(err)
}
Expand Down Expand Up @@ -531,7 +511,7 @@ func (r *AzureDeploymentReconciler) CreateDeployment(ctx context.Context) (ctrl.
err = r.CommitUpdate(ctx)

if err != nil {
// This is a superfluous error as per https://github.com/kubernetes-sigs/controller-runtime/issues/377
// NotFound is a superfluous error as per https://github.com/kubernetes-sigs/controller-runtime/issues/377
// The correct handling is just to ignore it and we will get an event shortly with the updated version to patch
return ctrl.Result{}, client.IgnoreNotFound(err)
}
Expand Down Expand Up @@ -560,7 +540,40 @@ func (r *AzureDeploymentReconciler) CreateDeployment(ctx context.Context) (ctrl.
return result, err
}

// TODO: There's a bit too much duplicated code between this and create deployment -- should be a good way to combine them?
func (r *AzureDeploymentReconciler) handleDeploymentFinished(ctx context.Context, deployment *armclient.Deployment) (ctrl.Result, error) {
var status genruntime.FromARMConverter
if deployment.IsSuccessful() {
// TODO: There's some overlap here with what Update does
if len(deployment.Properties.OutputResources) == 0 {
return ctrl.Result{}, errors.Errorf("template deployment didn't have any output resources")
}

resourceID, err := deployment.ResourceID()
if err != nil {
return ctrl.Result{}, errors.Wrap(err, "getting resource ID from resource")
}

status, _, err = r.getStatus(ctx, resourceID)
if err != nil {
return ctrl.Result{}, errors.Wrap(err, "getting status from ARM")
}
}

err := r.Update(deployment, status)
if err != nil {
return ctrl.Result{}, errors.Wrap(err, "updating obj")
}

err = r.CommitUpdate(ctx)
if err != nil {
// NotFound is a superfluous error as per https://github.com/kubernetes-sigs/controller-runtime/issues/377
// The correct handling is just to ignore it and we will get an event shortly with the updated version to patch
return ctrl.Result{}, client.IgnoreNotFound(err)
}

return ctrl.Result{}, nil
}

func (r *AzureDeploymentReconciler) MonitorDeployment(ctx context.Context) (ctrl.Result, error) {
deploymentID, deploymentIDOk := r.GetDeploymentID()
if !deploymentIDOk {
Expand All @@ -576,7 +589,7 @@ func (r *AzureDeploymentReconciler) MonitorDeployment(ctx context.Context) (ctrl
r.SetDeploymentName("")
err = r.CommitUpdate(ctx)
if err != nil {
// This is a superfluous error as per https://github.com/kubernetes-sigs/controller-runtime/issues/377
// NotFound is a superfluous error as per https://github.com/kubernetes-sigs/controller-runtime/issues/377
// The correct handling is just to ignore it and we will get an event shortly with the updated version to patch
return ctrl.Result{}, client.IgnoreNotFound(err)
}
Expand All @@ -593,90 +606,46 @@ func (r *AzureDeploymentReconciler) MonitorDeployment(ctx context.Context) (ctrl
return ctrl.Result{}, errors.Wrapf(err, "getting deployment %q from ARM", deploymentID)
}

var status genruntime.FromARMConverter
if deployment.IsSuccessful() {
// TODO: There's some overlap here with what Update does
if len(deployment.Properties.OutputResources) == 0 {
return ctrl.Result{}, errors.Errorf("template deployment didn't have any output resources")
}

resourceID, idErr := deployment.ResourceID()
if idErr != nil {
return ctrl.Result{}, errors.Wrap(idErr, "getting resource ID from resource")
}

s, _, statusErr := r.getStatus(ctx, resourceID)
if statusErr != nil {
return ctrl.Result{}, errors.Wrap(statusErr, "getting status from ARM")
}
r.log.V(4).Info(
"Monitoring deployment",
"action", string(CreateOrUpdateActionMonitorDeployment),
"id", deploymentID,
"state", deployment.ProvisioningStateOrUnknown())
r.recorder.Event(
r.obj,
v1.EventTypeNormal,
string(CreateOrUpdateActionMonitorDeployment),
fmt.Sprintf("Monitoring Azure deployment ID=%q, state=%q", deploymentID, deployment.ProvisioningStateOrUnknown()))

status = s
// If the deployment isn't done yet, there's nothing to do just bail out
if !deployment.IsTerminalProvisioningState() {
r.log.V(3).Info("Deployment still running")
return ctrl.Result{Requeue: true, RequeueAfter: retryAfter}, nil
}

err = r.Update(deployment, status)
if err != nil {
return ctrl.Result{}, errors.Wrap(err, "updating obj")
}

err = r.CommitUpdate(ctx)
// The deployment is in a terminal state - let's handle it
r.log.Info(
"Deployment in terminal state",
"DeploymentID", deployment.ID,
"State", deployment.ProvisioningStateOrUnknown(),
"Error", deployment.ErrorOrEmpty())

// It is possible that we delete the deployment here and then are unable to persist the details of the created
// resource to etcd below. If this happens, a subsequent reconciliation will attempt to GET the deployment which will
// fail. That will trigger us to throw the deployment ID away and create a new one (which will end up being a no-op
// because the Azure resource already exists). Since it's expected that this sequence of events is rare, we don't
// try to optimize for preventing it with some sort of two phase commit or anything.
// TODO: Create a unit test that forces this specific sequence of events
r.log.V(4).Info("Deleting deployment", "DeploymentID", deployment.ID)
_, err = r.ARMClient.DeleteDeployment(ctx, deployment.ID)
if err != nil {
// This is a superfluous error as per https://github.com/kubernetes-sigs/controller-runtime/issues/377
// The correct handling is just to ignore it and we will get an event shortly with the updated version to patch
return ctrl.Result{}, client.IgnoreNotFound(err)
return ctrl.Result{}, errors.Wrapf(err, "failed deleting deployment %q", deployment.ID)
}

// TODO: Could somehow have a method that grouped both of these calls
currentState := r.GetResourceProvisioningState()
r.log.V(4).Info("Monitoring deployment", "action", string(CreateOrUpdateActionMonitorDeployment), "id", deployment.ID, "state", currentState)
r.recorder.Event(r.obj, v1.EventTypeNormal, string(CreateOrUpdateActionMonitorDeployment), fmt.Sprintf("Monitoring Azure deployment ID=%q, state=%q", deployment.ID, currentState))

// We do two patches here because if we remove the deployment before we've actually confirmed we persisted
// the resource ID, then we will be unable to get the resource ID the next time around. Only once we have
// persisted the resource ID can we safely delete the deployment
retryAfter = time.Duration(0) // ARM can tell us how long to check after issuing DELETE
if deployment.IsTerminalProvisioningState() && !r.GetShouldPreserveDeployment() {
r.log.Info(
"Deployment in terminal state",
"DeploymentID", deployment.ID,
"State", deployment.ProvisioningStateOrUnknown(),
"Error", deployment.ErrorOrEmpty())

r.log.V(4).Info("Deleting deployment", "DeploymentID", deployment.ID)

retryAfter, err = r.ARMClient.DeleteDeployment(ctx, deployment.ID)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed deleting deployment %q", deployment.ID)
}

deployment.ID = ""
deployment.Name = ""

err = r.Update(deployment, status)
if err != nil {
return ctrl.Result{}, errors.Wrap(err, "updating obj")
}

err = r.CommitUpdate(ctx)
if err != nil {
// This is a superfluous error as per https://github.com/kubernetes-sigs/controller-runtime/issues/377
// The correct handling is just to ignore it and we will get an event shortly with the updated version to patch
return ctrl.Result{}, client.IgnoreNotFound(err)
}

r.log.V(4).Info(
"Cleared deployment info from resource",
"DeploymentID", r.GetDeploymentIDOrDefault(),
"DeploymentName", r.GetDeploymentNameOrDefault())
}

if deployment.IsTerminalProvisioningState() {
// we are done
return ctrl.Result{}, nil
}
deployment.ID = ""
deployment.Name = ""

r.log.V(3).Info("Deployment still running")
return ctrl.Result{Requeue: true, RequeueAfter: retryAfter}, err
return r.handleDeploymentFinished(ctx, deployment)
}

func (r *AzureDeploymentReconciler) ManageOwnership(ctx context.Context) (ctrl.Result, error) {
Expand Down
2 changes: 1 addition & 1 deletion hack/generated/pkg/testcommon/kube_test_context_envtest.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func createEnvtestContext(perTestContext PerTestContext) (*KubeBaseTestContext,
return nil, errors.Wrapf(err, "creating controller-runtime manager")
}

var requeueDelay time.Duration // defaults to 5s when zero is passed
var requeueDelay time.Duration
if perTestContext.AzureClientRecorder.Mode() == recorder.ModeReplaying {
perTestContext.T.Log("Minimizing requeue delay")
// skip requeue delays when replaying
Expand Down

0 comments on commit e0dd5b6

Please sign in to comment.