@@ -28,14 +28,12 @@ import (
2828 "github.com/gophercloud/gophercloud/v2"
2929 "github.com/gophercloud/gophercloud/v2/openstack/compute/v2/hypervisors"
3030 "github.com/gophercloud/gophercloud/v2/openstack/compute/v2/servers"
31- "github.com/gophercloud/gophercloud/v2/openstack/compute/v2/services"
3231 "k8s.io/apimachinery/pkg/api/meta"
3332 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3433 "k8s.io/apimachinery/pkg/runtime"
3534 "k8s.io/apimachinery/pkg/types"
3635 ctrl "sigs.k8s.io/controller-runtime"
3736 "sigs.k8s.io/controller-runtime/pkg/client"
38- "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
3937 logger "sigs.k8s.io/controller-runtime/pkg/log"
4038
4139 kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
@@ -50,7 +48,6 @@ type EvictionReconciler struct {
5048}
5149
5250const (
53- evictionFinalizerName = "eviction-controller.cloud.sap/finalizer"
5451 EvictionControllerName = "eviction"
5552 shortRetryTime = 1 * time .Second
5653 defaultPollTime = 10 * time .Second
@@ -84,15 +81,7 @@ func (r *EvictionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
8481
8582 // Being deleted
8683 if ! eviction .DeletionTimestamp .IsZero () {
87- err := r .handleFinalizer (ctx , eviction , hv )
88- if err != nil {
89- if errors .Is (err , ErrRetry ) {
90- return ctrl.Result {RequeueAfter : defaultWaitTime }, nil
91- }
92- return ctrl.Result {}, err
93- }
94- log .Info ("deleted" )
95- return ctrl.Result {}, err
84+ return ctrl.Result {}, nil
9685 }
9786
9887 statusCondition := meta .FindStatusCondition (eviction .Status .Conditions , kvmv1 .ConditionTypeEvicting )
@@ -157,9 +146,25 @@ func (r *EvictionReconciler) updateStatus(ctx context.Context, eviction, base *k
157146
158147func (r * EvictionReconciler ) handlePreflight (ctx context.Context , eviction * kvmv1.Eviction , hv * kvmv1.Hypervisor ) (ctrl.Result , error ) {
159148 base := eviction .DeepCopy ()
160- expectHypervisor := HasStatusCondition (hv .Status .Conditions , kvmv1 .ConditionTypeOnboarding )
161- // Does the hypervisor even exist? Is it enabled/disabled?
162- hypervisor , err := hypervisors .Get (ctx , r .computeClient , hv .Status .HypervisorID ).Extract ()
149+ expectHypervisor := hv .Status .HypervisorID != "" && hv .Status .ServiceID != "" // The hypervisor has been registered
150+
151+ // If the hypervisor should exist, then we need to ensure it is disabled before we start evicting
152+ if expectHypervisor && ! meta .IsStatusConditionTrue (hv .Status .Conditions , kvmv1 .ConditionTypeHypervisorDisabled ) {
153+ // Hypervisor is not disabled (yet?), reflect that as a failing preflight check
154+ if meta .SetStatusCondition (& eviction .Status .Conditions , metav1.Condition {
155+ Type : kvmv1 .ConditionTypePreflight ,
156+ Status : metav1 .ConditionFalse ,
157+ Message : "hypervisor not disabled" ,
158+ Reason : kvmv1 .ConditionReasonFailed ,
159+ }) {
160+ return ctrl.Result {}, r .updateStatus (ctx , eviction , base )
161+ }
162+ return ctrl.Result {RequeueAfter : defaultPollTime }, nil // Wait for hypervisor to be disabled
163+ }
164+
165+ // Fetch all virtual machines on the hypervisor
166+ trueVal := true
167+ hypervisor , err := hypervisors .GetExt (ctx , r .computeClient , hv .Status .HypervisorID , hypervisors.GetOpts {WithServers : & trueVal }).Extract ()
163168 if err != nil {
164169 if ! gophercloud .ResponseCodeIs (err , http .StatusNotFound ) {
165170 return ctrl.Result {}, err
@@ -190,18 +195,6 @@ func (r *EvictionReconciler) handlePreflight(ctx context.Context, eviction *kvmv
190195 }
191196 }
192197
193- if ! meta .IsStatusConditionTrue (eviction .Status .Conditions , kvmv1 .ConditionTypeHypervisorDisabled ) {
194- // Hypervisor is not disabled/ensured, so we need to disable it
195- return ctrl.Result {}, r .disableHypervisor (ctx , hypervisor , eviction )
196- }
197-
198- // Fetch all virtual machines on the hypervisor
199- trueVal := true
200- hypervisor , err = hypervisors .GetExt (ctx , r .computeClient , hv .Status .HypervisorID , hypervisors.GetOpts {WithServers : & trueVal }).Extract ()
201- if err != nil {
202- return ctrl.Result {}, err
203- }
204-
205198 if hypervisor .Servers != nil {
206199 uuids := make ([]string , len (* hypervisor .Servers ))
207200 for i , server := range * hypervisor .Servers {
@@ -281,11 +274,9 @@ func (r *EvictionReconciler) evictNext(ctx context.Context, eviction *kvmv1.Evic
281274 Message : fmt .Sprintf ("Migration of instance %s failed: %s" , vm .ID , vm .Fault .Message ),
282275 Reason : kvmv1 .ConditionReasonFailed ,
283276 })
284- if err := r .updateStatus (ctx , eviction , base ); err != nil {
285- return ctrl.Result {}, err
286- }
287277
288- return ctrl.Result {}, fmt .Errorf ("error migrating instance %v" , uuid )
278+ return ctrl.Result {}, errors .Join (fmt .Errorf ("error migrating instance %v" , uuid ),
279+ r .updateStatus (ctx , eviction , base ))
289280 }
290281
291282 currentHypervisor , _ , _ := strings .Cut (vm .HypervisorHostname , "." )
@@ -358,169 +349,6 @@ func (r *EvictionReconciler) evictNext(ctx context.Context, eviction *kvmv1.Evic
358349 return ctrl.Result {RequeueAfter : defaultPollTime }, err
359350}
360351
361- func (r * EvictionReconciler ) evictionReason (eviction * kvmv1.Eviction ) string {
362- return fmt .Sprintf ("Eviction %v/%v: %v" , eviction .Namespace , eviction .Name , eviction .Spec .Reason )
363- }
364-
365- func (r * EvictionReconciler ) handleFinalizer (ctx context.Context , eviction * kvmv1.Eviction , hypervisor * kvmv1.Hypervisor ) error {
366- if ! controllerutil .ContainsFinalizer (eviction , evictionFinalizerName ) {
367- return nil
368- }
369-
370- // As long as we didn't succeed to re-enable the hypervisor, which includes
371- // - the hypervisor being gone, because it has been torn down
372- // - the hypervisor having been enabled by someone else
373- if ! meta .IsStatusConditionTrue (eviction .Status .Conditions , kvmv1 .ConditionTypeHypervisorReEnabled ) {
374- err := r .enableHypervisorService (ctx , eviction , hypervisor )
375- if err != nil {
376- return err
377- }
378- }
379-
380- base := eviction .DeepCopy ()
381- controllerutil .RemoveFinalizer (eviction , evictionFinalizerName )
382- return r .Patch (ctx , eviction , client .MergeFromWithOptions (base , client.MergeFromWithOptimisticLock {}))
383- }
384-
385- func (r * EvictionReconciler ) enableHypervisorService (ctx context.Context , eviction * kvmv1.Eviction , hv * kvmv1.Hypervisor ) error {
386- log := logger .FromContext (ctx )
387-
388- hypervisor , err := hypervisors .Get (ctx , r .computeClient , hv .Status .HypervisorID ).Extract ()
389- if err != nil {
390- if errors .Is (err , openstack .ErrNoHypervisor ) {
391- base := eviction .DeepCopy ()
392- changed := meta .SetStatusCondition (& eviction .Status .Conditions , metav1.Condition {
393- Type : kvmv1 .ConditionTypeHypervisorReEnabled ,
394- Status : metav1 .ConditionTrue ,
395- Message : "Hypervisor is gone, no need to re-enable" ,
396- Reason : kvmv1 .ConditionReasonSucceeded ,
397- })
398- if changed {
399- return r .updateStatus (ctx , eviction , base )
400- } else {
401- return nil
402- }
403- } else {
404- base := eviction .DeepCopy ()
405- errorMessage := fmt .Sprintf ("failed to get hypervisor due to %s" , err )
406- // update the condition to reflect the error, and retry the reconciliation
407- changed := meta .SetStatusCondition (& eviction .Status .Conditions , metav1.Condition {
408- Type : kvmv1 .ConditionTypeHypervisorReEnabled ,
409- Status : metav1 .ConditionFalse ,
410- Message : errorMessage ,
411- Reason : kvmv1 .ConditionReasonFailed ,
412- })
413-
414- if changed {
415- if err2 := r .updateStatus (ctx , eviction , base ); err2 != nil {
416- log .Error (err , "failed to store error message in condition" , "message" , errorMessage )
417- return err2
418- }
419- }
420-
421- return ErrRetry
422- }
423- }
424-
425- if hypervisor .Service .DisabledReason != r .evictionReason (eviction ) {
426- base := eviction .DeepCopy ()
427- changed := meta .SetStatusCondition (& eviction .Status .Conditions , metav1.Condition {
428- Type : kvmv1 .ConditionTypeHypervisorReEnabled ,
429- Status : metav1 .ConditionTrue ,
430- Message : "Hypervisor already re-enabled for reason:" + hypervisor .Service .DisabledReason ,
431- Reason : kvmv1 .ConditionReasonSucceeded ,
432- })
433- if changed {
434- return r .updateStatus (ctx , eviction , base )
435- } else {
436- return nil
437- }
438- }
439-
440- enableService := services.UpdateOpts {Status : services .ServiceEnabled }
441- log .Info ("Enabling hypervisor" , "id" , hypervisor .Service .ID )
442- _ , err = services .Update (ctx , r .computeClient , hypervisor .Service .ID , enableService ).Extract ()
443-
444- if err != nil {
445- base := eviction .DeepCopy ()
446- errorMessage := fmt .Sprintf ("failed to enable hypervisor due to %s" , err )
447- changed := meta .SetStatusCondition (& eviction .Status .Conditions , metav1.Condition {
448- Type : kvmv1 .ConditionTypeHypervisorReEnabled ,
449- Status : metav1 .ConditionFalse ,
450- Message : errorMessage ,
451- Reason : kvmv1 .ConditionReasonFailed ,
452- })
453- if changed {
454- if err2 := r .updateStatus (ctx , eviction , base ); err2 != nil {
455- log .Error (err , "failed to store error message in condition" , "message" , errorMessage )
456- }
457- }
458- return ErrRetry
459- } else {
460- base := eviction .DeepCopy ()
461- changed := meta .SetStatusCondition (& eviction .Status .Conditions , metav1.Condition {
462- Type : kvmv1 .ConditionTypeHypervisorReEnabled ,
463- Status : metav1 .ConditionTrue ,
464- Message : "Hypervisor re-enabled successfully" ,
465- Reason : kvmv1 .ConditionReasonSucceeded ,
466- })
467- if changed {
468- return r .updateStatus (ctx , eviction , base )
469- } else {
470- return nil
471- }
472- }
473- }
474-
475- // disableHypervisor disables the hypervisor service and adds a finalizer to the eviction
476- // will add Condition HypervisorDisabled to the eviction status with the outcome
477- func (r * EvictionReconciler ) disableHypervisor (ctx context.Context , hypervisor * hypervisors.Hypervisor , eviction * kvmv1.Eviction ) error {
478- evictionReason := r .evictionReason (eviction )
479- disabledReason := hypervisor .Service .DisabledReason
480-
481- if disabledReason != "" && disabledReason != evictionReason {
482- base := eviction .DeepCopy ()
483- // Disabled for another reason already
484- meta .SetStatusCondition (& eviction .Status .Conditions , metav1.Condition {
485- Type : kvmv1 .ConditionTypeHypervisorDisabled ,
486- Status : metav1 .ConditionTrue ,
487- Message : fmt .Sprintf ("Hypervisor already disabled for reason %q" , disabledReason ),
488- Reason : kvmv1 .ConditionReasonSucceeded ,
489- })
490- return r .updateStatus (ctx , eviction , base )
491- }
492-
493- if ! controllerutil .ContainsFinalizer (eviction , evictionFinalizerName ) {
494- base := eviction .DeepCopy ()
495- controllerutil .AddFinalizer (eviction , evictionFinalizerName )
496- return r .Patch (ctx , eviction , client .MergeFromWithOptions (base , client.MergeFromWithOptimisticLock {}), client .FieldOwner (EvictionControllerName ))
497- }
498-
499- disableService := services.UpdateOpts {Status : services .ServiceDisabled ,
500- DisabledReason : r .evictionReason (eviction )}
501-
502- _ , err := services .Update (ctx , r .computeClient , hypervisor .Service .ID , disableService ).Extract ()
503- base := eviction .DeepCopy ()
504- if err != nil {
505- // We expect OpenStack calls to be transient errors, so we retry for the next reconciliation
506- meta .SetStatusCondition (& eviction .Status .Conditions , metav1.Condition {
507- Type : kvmv1 .ConditionTypeHypervisorDisabled ,
508- Status : metav1 .ConditionFalse ,
509- Message : fmt .Sprintf ("Failed to disable hypervisor: %v" , err ),
510- Reason : kvmv1 .ConditionReasonFailed ,
511- })
512- return r .updateStatus (ctx , eviction , base )
513- }
514-
515- meta .SetStatusCondition (& eviction .Status .Conditions , metav1.Condition {
516- Type : kvmv1 .ConditionTypeHypervisorDisabled ,
517- Status : metav1 .ConditionTrue ,
518- Message : "Hypervisor disabled successfully" ,
519- Reason : kvmv1 .ConditionReasonSucceeded ,
520- })
521- return r .updateStatus (ctx , eviction , base )
522- }
523-
524352func (r * EvictionReconciler ) liveMigrate (ctx context.Context , uuid string , eviction * kvmv1.Eviction ) error {
525353 log := logger .FromContext (ctx )
526354
0 commit comments