Skip to content

Commit 52898d2

Browse files
committed
EvictionController: Do not disable compute-service anymore
The HypervisorMaintenanceController already disabled the compute-service, so drop doing that in the eviction controller. We just wait for the condition to be reached.
1 parent 6c733e0 commit 52898d2

File tree

2 files changed

+76
-270
lines changed

2 files changed

+76
-270
lines changed

internal/controller/eviction_controller.go

Lines changed: 22 additions & 194 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,12 @@ import (
2828
"github.com/gophercloud/gophercloud/v2"
2929
"github.com/gophercloud/gophercloud/v2/openstack/compute/v2/hypervisors"
3030
"github.com/gophercloud/gophercloud/v2/openstack/compute/v2/servers"
31-
"github.com/gophercloud/gophercloud/v2/openstack/compute/v2/services"
3231
"k8s.io/apimachinery/pkg/api/meta"
3332
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3433
"k8s.io/apimachinery/pkg/runtime"
3534
"k8s.io/apimachinery/pkg/types"
3635
ctrl "sigs.k8s.io/controller-runtime"
3736
"sigs.k8s.io/controller-runtime/pkg/client"
38-
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
3937
logger "sigs.k8s.io/controller-runtime/pkg/log"
4038

4139
kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
@@ -50,7 +48,6 @@ type EvictionReconciler struct {
5048
}
5149

5250
const (
53-
evictionFinalizerName = "eviction-controller.cloud.sap/finalizer"
5451
EvictionControllerName = "eviction"
5552
shortRetryTime = 1 * time.Second
5653
defaultPollTime = 10 * time.Second
@@ -84,15 +81,7 @@ func (r *EvictionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
8481

8582
// Being deleted
8683
if !eviction.DeletionTimestamp.IsZero() {
87-
err := r.handleFinalizer(ctx, eviction, hv)
88-
if err != nil {
89-
if errors.Is(err, ErrRetry) {
90-
return ctrl.Result{RequeueAfter: defaultWaitTime}, nil
91-
}
92-
return ctrl.Result{}, err
93-
}
94-
log.Info("deleted")
95-
return ctrl.Result{}, err
84+
return ctrl.Result{}, nil
9685
}
9786

9887
statusCondition := meta.FindStatusCondition(eviction.Status.Conditions, kvmv1.ConditionTypeEvicting)
@@ -157,9 +146,25 @@ func (r *EvictionReconciler) updateStatus(ctx context.Context, eviction, base *k
157146

158147
func (r *EvictionReconciler) handlePreflight(ctx context.Context, eviction *kvmv1.Eviction, hv *kvmv1.Hypervisor) (ctrl.Result, error) {
159148
base := eviction.DeepCopy()
160-
expectHypervisor := HasStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding)
161-
// Does the hypervisor even exist? Is it enabled/disabled?
162-
hypervisor, err := hypervisors.Get(ctx, r.computeClient, hv.Status.HypervisorID).Extract()
149+
expectHypervisor := hv.Status.HypervisorID != "" && hv.Status.ServiceID != "" // The hypervisor has been registered
150+
151+
// If the hypervisor should exist, then we need to ensure it is disabled before we start evicting
152+
if expectHypervisor && !meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeHypervisorDisabled) {
153+
// Hypervisor is not disabled (yet?), reflect that as a failing preflight check
154+
if meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
155+
Type: kvmv1.ConditionTypePreflight,
156+
Status: metav1.ConditionFalse,
157+
Message: "hypervisor not disabled",
158+
Reason: kvmv1.ConditionReasonFailed,
159+
}) {
160+
return ctrl.Result{}, r.updateStatus(ctx, eviction, base)
161+
}
162+
return ctrl.Result{RequeueAfter: defaultPollTime}, nil // Wait for hypervisor to be disabled
163+
}
164+
165+
// Fetch all virtual machines on the hypervisor
166+
trueVal := true
167+
hypervisor, err := hypervisors.GetExt(ctx, r.computeClient, hv.Status.HypervisorID, hypervisors.GetOpts{WithServers: &trueVal}).Extract()
163168
if err != nil {
164169
if !gophercloud.ResponseCodeIs(err, http.StatusNotFound) {
165170
return ctrl.Result{}, err
@@ -190,18 +195,6 @@ func (r *EvictionReconciler) handlePreflight(ctx context.Context, eviction *kvmv
190195
}
191196
}
192197

193-
if !meta.IsStatusConditionTrue(eviction.Status.Conditions, kvmv1.ConditionTypeHypervisorDisabled) {
194-
// Hypervisor is not disabled/ensured, so we need to disable it
195-
return ctrl.Result{}, r.disableHypervisor(ctx, hypervisor, eviction)
196-
}
197-
198-
// Fetch all virtual machines on the hypervisor
199-
trueVal := true
200-
hypervisor, err = hypervisors.GetExt(ctx, r.computeClient, hv.Status.HypervisorID, hypervisors.GetOpts{WithServers: &trueVal}).Extract()
201-
if err != nil {
202-
return ctrl.Result{}, err
203-
}
204-
205198
if hypervisor.Servers != nil {
206199
uuids := make([]string, len(*hypervisor.Servers))
207200
for i, server := range *hypervisor.Servers {
@@ -281,11 +274,9 @@ func (r *EvictionReconciler) evictNext(ctx context.Context, eviction *kvmv1.Evic
281274
Message: fmt.Sprintf("Migration of instance %s failed: %s", vm.ID, vm.Fault.Message),
282275
Reason: kvmv1.ConditionReasonFailed,
283276
})
284-
if err := r.updateStatus(ctx, eviction, base); err != nil {
285-
return ctrl.Result{}, err
286-
}
287277

288-
return ctrl.Result{}, fmt.Errorf("error migrating instance %v", uuid)
278+
return ctrl.Result{}, errors.Join(fmt.Errorf("error migrating instance %v", uuid),
279+
r.updateStatus(ctx, eviction, base))
289280
}
290281

291282
currentHypervisor, _, _ := strings.Cut(vm.HypervisorHostname, ".")
@@ -358,169 +349,6 @@ func (r *EvictionReconciler) evictNext(ctx context.Context, eviction *kvmv1.Evic
358349
return ctrl.Result{RequeueAfter: defaultPollTime}, err
359350
}
360351

361-
func (r *EvictionReconciler) evictionReason(eviction *kvmv1.Eviction) string {
362-
return fmt.Sprintf("Eviction %v/%v: %v", eviction.Namespace, eviction.Name, eviction.Spec.Reason)
363-
}
364-
365-
func (r *EvictionReconciler) handleFinalizer(ctx context.Context, eviction *kvmv1.Eviction, hypervisor *kvmv1.Hypervisor) error {
366-
if !controllerutil.ContainsFinalizer(eviction, evictionFinalizerName) {
367-
return nil
368-
}
369-
370-
// As long as we didn't succeed to re-enable the hypervisor, which includes
371-
// - the hypervisor being gone, because it has been torn down
372-
// - the hypervisor having been enabled by someone else
373-
if !meta.IsStatusConditionTrue(eviction.Status.Conditions, kvmv1.ConditionTypeHypervisorReEnabled) {
374-
err := r.enableHypervisorService(ctx, eviction, hypervisor)
375-
if err != nil {
376-
return err
377-
}
378-
}
379-
380-
base := eviction.DeepCopy()
381-
controllerutil.RemoveFinalizer(eviction, evictionFinalizerName)
382-
return r.Patch(ctx, eviction, client.MergeFromWithOptions(base, client.MergeFromWithOptimisticLock{}))
383-
}
384-
385-
func (r *EvictionReconciler) enableHypervisorService(ctx context.Context, eviction *kvmv1.Eviction, hv *kvmv1.Hypervisor) error {
386-
log := logger.FromContext(ctx)
387-
388-
hypervisor, err := hypervisors.Get(ctx, r.computeClient, hv.Status.HypervisorID).Extract()
389-
if err != nil {
390-
if errors.Is(err, openstack.ErrNoHypervisor) {
391-
base := eviction.DeepCopy()
392-
changed := meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
393-
Type: kvmv1.ConditionTypeHypervisorReEnabled,
394-
Status: metav1.ConditionTrue,
395-
Message: "Hypervisor is gone, no need to re-enable",
396-
Reason: kvmv1.ConditionReasonSucceeded,
397-
})
398-
if changed {
399-
return r.updateStatus(ctx, eviction, base)
400-
} else {
401-
return nil
402-
}
403-
} else {
404-
base := eviction.DeepCopy()
405-
errorMessage := fmt.Sprintf("failed to get hypervisor due to %s", err)
406-
// update the condition to reflect the error, and retry the reconciliation
407-
changed := meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
408-
Type: kvmv1.ConditionTypeHypervisorReEnabled,
409-
Status: metav1.ConditionFalse,
410-
Message: errorMessage,
411-
Reason: kvmv1.ConditionReasonFailed,
412-
})
413-
414-
if changed {
415-
if err2 := r.updateStatus(ctx, eviction, base); err2 != nil {
416-
log.Error(err, "failed to store error message in condition", "message", errorMessage)
417-
return err2
418-
}
419-
}
420-
421-
return ErrRetry
422-
}
423-
}
424-
425-
if hypervisor.Service.DisabledReason != r.evictionReason(eviction) {
426-
base := eviction.DeepCopy()
427-
changed := meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
428-
Type: kvmv1.ConditionTypeHypervisorReEnabled,
429-
Status: metav1.ConditionTrue,
430-
Message: "Hypervisor already re-enabled for reason:" + hypervisor.Service.DisabledReason,
431-
Reason: kvmv1.ConditionReasonSucceeded,
432-
})
433-
if changed {
434-
return r.updateStatus(ctx, eviction, base)
435-
} else {
436-
return nil
437-
}
438-
}
439-
440-
enableService := services.UpdateOpts{Status: services.ServiceEnabled}
441-
log.Info("Enabling hypervisor", "id", hypervisor.Service.ID)
442-
_, err = services.Update(ctx, r.computeClient, hypervisor.Service.ID, enableService).Extract()
443-
444-
if err != nil {
445-
base := eviction.DeepCopy()
446-
errorMessage := fmt.Sprintf("failed to enable hypervisor due to %s", err)
447-
changed := meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
448-
Type: kvmv1.ConditionTypeHypervisorReEnabled,
449-
Status: metav1.ConditionFalse,
450-
Message: errorMessage,
451-
Reason: kvmv1.ConditionReasonFailed,
452-
})
453-
if changed {
454-
if err2 := r.updateStatus(ctx, eviction, base); err2 != nil {
455-
log.Error(err, "failed to store error message in condition", "message", errorMessage)
456-
}
457-
}
458-
return ErrRetry
459-
} else {
460-
base := eviction.DeepCopy()
461-
changed := meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
462-
Type: kvmv1.ConditionTypeHypervisorReEnabled,
463-
Status: metav1.ConditionTrue,
464-
Message: "Hypervisor re-enabled successfully",
465-
Reason: kvmv1.ConditionReasonSucceeded,
466-
})
467-
if changed {
468-
return r.updateStatus(ctx, eviction, base)
469-
} else {
470-
return nil
471-
}
472-
}
473-
}
474-
475-
// disableHypervisor disables the hypervisor service and adds a finalizer to the eviction
476-
// will add Condition HypervisorDisabled to the eviction status with the outcome
477-
func (r *EvictionReconciler) disableHypervisor(ctx context.Context, hypervisor *hypervisors.Hypervisor, eviction *kvmv1.Eviction) error {
478-
evictionReason := r.evictionReason(eviction)
479-
disabledReason := hypervisor.Service.DisabledReason
480-
481-
if disabledReason != "" && disabledReason != evictionReason {
482-
base := eviction.DeepCopy()
483-
// Disabled for another reason already
484-
meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
485-
Type: kvmv1.ConditionTypeHypervisorDisabled,
486-
Status: metav1.ConditionTrue,
487-
Message: fmt.Sprintf("Hypervisor already disabled for reason %q", disabledReason),
488-
Reason: kvmv1.ConditionReasonSucceeded,
489-
})
490-
return r.updateStatus(ctx, eviction, base)
491-
}
492-
493-
if !controllerutil.ContainsFinalizer(eviction, evictionFinalizerName) {
494-
base := eviction.DeepCopy()
495-
controllerutil.AddFinalizer(eviction, evictionFinalizerName)
496-
return r.Patch(ctx, eviction, client.MergeFromWithOptions(base, client.MergeFromWithOptimisticLock{}), client.FieldOwner(EvictionControllerName))
497-
}
498-
499-
disableService := services.UpdateOpts{Status: services.ServiceDisabled,
500-
DisabledReason: r.evictionReason(eviction)}
501-
502-
_, err := services.Update(ctx, r.computeClient, hypervisor.Service.ID, disableService).Extract()
503-
base := eviction.DeepCopy()
504-
if err != nil {
505-
// We expect OpenStack calls to be transient errors, so we retry for the next reconciliation
506-
meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
507-
Type: kvmv1.ConditionTypeHypervisorDisabled,
508-
Status: metav1.ConditionFalse,
509-
Message: fmt.Sprintf("Failed to disable hypervisor: %v", err),
510-
Reason: kvmv1.ConditionReasonFailed,
511-
})
512-
return r.updateStatus(ctx, eviction, base)
513-
}
514-
515-
meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
516-
Type: kvmv1.ConditionTypeHypervisorDisabled,
517-
Status: metav1.ConditionTrue,
518-
Message: "Hypervisor disabled successfully",
519-
Reason: kvmv1.ConditionReasonSucceeded,
520-
})
521-
return r.updateStatus(ctx, eviction, base)
522-
}
523-
524352
func (r *EvictionReconciler) liveMigrate(ctx context.Context, uuid string, eviction *kvmv1.Eviction) error {
525353
log := logger.FromContext(ctx)
526354

0 commit comments

Comments
 (0)