Skip to content

Commit bb6a696

Browse files
committed
Add provider healthcheck controller
1 parent 9398981 commit bb6a696

File tree

5 files changed

+489
-5
lines changed

5 files changed

+489
-5
lines changed

cmd/main.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import (
4343
operatorv1alpha1 "sigs.k8s.io/cluster-api-operator/api/v1alpha1"
4444
operatorv1 "sigs.k8s.io/cluster-api-operator/api/v1alpha2"
4545
providercontroller "sigs.k8s.io/cluster-api-operator/internal/controller"
46+
healtchcheckcontroller "sigs.k8s.io/cluster-api-operator/internal/controller/healthcheck"
4647
)
4748

4849
var (
@@ -233,6 +234,13 @@ func setupReconcilers(mgr ctrl.Manager) {
233234
setupLog.Error(err, "unable to create controller", "controller", "AddonProvider")
234235
os.Exit(1)
235236
}
237+
238+
if err := (&healtchcheckcontroller.ProviderHealthCheckReconciler{
239+
Client: mgr.GetClient(),
240+
}).SetupWithManager(mgr, concurrency(concurrencyNumber)); err != nil {
241+
setupLog.Error(err, "unable to create controller", "controller", "Healthcheck")
242+
os.Exit(1)
243+
}
236244
}
237245

238246
func setupWebhooks(mgr ctrl.Manager) {

internal/controller/genericprovider_controller.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -155,11 +155,7 @@ func patchProvider(ctx context.Context, provider genericprovider.GenericProvider
155155
operatorv1.ProviderInstalledCondition,
156156
}
157157

158-
conditions.SetSummary(provider, conditions.WithConditions(conds...))
159-
160-
options = append(options,
161-
patch.WithOwnedConditions{Conditions: append(conds, clusterv1.ReadyCondition)},
162-
)
158+
options = append(options, patch.WithOwnedConditions{Conditions: conds})
163159

164160
return patchHelper.Patch(ctx, provider.GetObject(), options...)
165161
}
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
/*
2+
Copyright 2023 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package healthcheck
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"time"
23+
24+
appsv1 "k8s.io/api/apps/v1"
25+
corev1 "k8s.io/api/core/v1"
26+
apierrors "k8s.io/apimachinery/pkg/api/errors"
27+
"k8s.io/apimachinery/pkg/runtime"
28+
"k8s.io/apimachinery/pkg/types"
29+
operatorv1 "sigs.k8s.io/cluster-api-operator/api/v1alpha2"
30+
"sigs.k8s.io/cluster-api-operator/internal/controller/genericprovider"
31+
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
32+
"sigs.k8s.io/cluster-api/util/conditions"
33+
"sigs.k8s.io/cluster-api/util/patch"
34+
ctrl "sigs.k8s.io/controller-runtime"
35+
"sigs.k8s.io/controller-runtime/pkg/builder"
36+
"sigs.k8s.io/controller-runtime/pkg/client"
37+
"sigs.k8s.io/controller-runtime/pkg/controller"
38+
"sigs.k8s.io/controller-runtime/pkg/event"
39+
"sigs.k8s.io/controller-runtime/pkg/predicate"
40+
"sigs.k8s.io/controller-runtime/pkg/reconcile"
41+
)
42+
43+
type ProviderHealthCheckReconciler struct {
44+
Client client.Client
45+
}
46+
47+
const (
48+
providerLabelKey = "cluster.x-k8s.io/provider"
49+
)
50+
51+
func (r *ProviderHealthCheckReconciler) SetupWithManager(mgr ctrl.Manager, options controller.Options) error {
52+
return ctrl.NewControllerManagedBy(mgr).
53+
For(&appsv1.Deployment{}, builder.WithPredicates(providerDeploymentPredicates())).
54+
WithOptions(options).
55+
Complete(r)
56+
}
57+
58+
func (r *ProviderHealthCheckReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, reterr error) {
59+
log := ctrl.LoggerFrom(ctx)
60+
61+
log.Info("Checking provider health")
62+
63+
result := ctrl.Result{}
64+
65+
deployment := &appsv1.Deployment{}
66+
67+
if err := r.Client.Get(ctx, req.NamespacedName, deployment); err != nil {
68+
if apierrors.IsNotFound(err) {
69+
// Object not found, return. Created objects are automatically garbage collected.
70+
// For additional cleanup logic use finalizers.
71+
return result, nil
72+
}
73+
// Error reading the object - requeue the request.
74+
return result, err
75+
}
76+
77+
// There should be just one owner reference - to a Provider resource.
78+
if len(deployment.GetOwnerReferences()) != 1 {
79+
return result, fmt.Errorf("incorrect number of owner references for provider deployment %s", req.NamespacedName)
80+
}
81+
82+
deploymentOwner := deployment.GetOwnerReferences()[0]
83+
84+
deploymentCondition := getDeploymentCondition(deployment.Status, appsv1.DeploymentAvailable)
85+
86+
typedProvider, err := r.getGenericProvider(ctx, deploymentOwner.Kind, deploymentOwner.Name, req.Namespace)
87+
if err != nil {
88+
return result, err
89+
}
90+
91+
// Stop earlier if this provider is not fully installed yet.
92+
if !conditions.IsTrue(typedProvider, operatorv1.ProviderInstalledCondition) {
93+
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
94+
}
95+
96+
// Compare provider's Ready condition with the deployment's Available condition and stop if they already match.
97+
currentReadyCondition := conditions.Get(typedProvider, clusterv1.ReadyCondition)
98+
if currentReadyCondition != nil && deploymentCondition != nil && currentReadyCondition.Status == deploymentCondition.Status {
99+
return result, nil
100+
}
101+
102+
// Initialize the patch helper
103+
patchHelper, err := patch.NewHelper(typedProvider.GetObject(), r.Client)
104+
if err != nil {
105+
return result, err
106+
}
107+
108+
if deploymentCondition != nil {
109+
conditions.Set(typedProvider, &clusterv1.Condition{
110+
Type: clusterv1.ReadyCondition,
111+
Status: deploymentCondition.Status,
112+
Reason: deploymentCondition.Reason,
113+
})
114+
} else {
115+
conditions.Set(typedProvider, &clusterv1.Condition{
116+
Type: clusterv1.ReadyCondition,
117+
Status: corev1.ConditionFalse,
118+
})
119+
}
120+
121+
// Don't requeue immediately if the deployment is not ready, but rather wait 5 seconds.
122+
if conditions.IsFalse(typedProvider, clusterv1.ReadyCondition) {
123+
result = ctrl.Result{RequeueAfter: 5 * time.Second}
124+
}
125+
126+
options := patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{clusterv1.ReadyCondition}}
127+
128+
return result, patchHelper.Patch(ctx, typedProvider.GetObject(), options)
129+
}
130+
131+
func (r *ProviderHealthCheckReconciler) getGenericProvider(ctx context.Context, providerKind, providerName, providerNamespace string) (genericprovider.GenericProvider, error) {
132+
switch providerKind {
133+
case "CoreProvider":
134+
provider := &operatorv1.CoreProvider{}
135+
if err := r.Client.Get(ctx, types.NamespacedName{Name: providerName, Namespace: providerNamespace}, provider); err != nil {
136+
return nil, err
137+
}
138+
139+
return &genericprovider.CoreProviderWrapper{CoreProvider: provider}, nil
140+
case "BootstrapProvider":
141+
provider := &operatorv1.BootstrapProvider{}
142+
if err := r.Client.Get(ctx, types.NamespacedName{Name: providerName, Namespace: providerNamespace}, provider); err != nil {
143+
return nil, err
144+
}
145+
146+
return &genericprovider.BootstrapProviderWrapper{BootstrapProvider: provider}, nil
147+
case "ControlPlaneProvider":
148+
provider := &operatorv1.ControlPlaneProvider{}
149+
if err := r.Client.Get(ctx, types.NamespacedName{Name: providerName, Namespace: providerNamespace}, provider); err != nil {
150+
return nil, err
151+
}
152+
153+
return &genericprovider.ControlPlaneProviderWrapper{ControlPlaneProvider: provider}, nil
154+
case "InfrastructureProvider":
155+
provider := &operatorv1.InfrastructureProvider{}
156+
if err := r.Client.Get(ctx, types.NamespacedName{Name: providerName, Namespace: providerNamespace}, provider); err != nil {
157+
return nil, err
158+
}
159+
160+
return &genericprovider.InfrastructureProviderWrapper{InfrastructureProvider: provider}, nil
161+
case "AddonProvider":
162+
provider := &operatorv1.AddonProvider{}
163+
if err := r.Client.Get(ctx, types.NamespacedName{Name: providerName, Namespace: providerNamespace}, provider); err != nil {
164+
return nil, err
165+
}
166+
167+
return &genericprovider.AddonProviderWrapper{AddonProvider: provider}, nil
168+
default:
169+
return nil, fmt.Errorf("failed to cast interface for type: %s", providerKind)
170+
}
171+
}
172+
173+
// getDeploymentCondition returns the deployment condition with the provided type.
174+
func getDeploymentCondition(status appsv1.DeploymentStatus, condType appsv1.DeploymentConditionType) *appsv1.DeploymentCondition {
175+
for i := range status.Conditions {
176+
c := status.Conditions[i]
177+
if c.Type == condType {
178+
return &c
179+
}
180+
}
181+
182+
return nil
183+
}
184+
185+
func providerDeploymentPredicates() predicate.Funcs {
186+
isProviderDeployment := func(obj runtime.Object) bool {
187+
clusterOperator, ok := obj.(*appsv1.Deployment)
188+
if !ok {
189+
panic("expected to get an of object of type appsv1.Deployment")
190+
}
191+
192+
_, found := clusterOperator.GetLabels()[providerLabelKey]
193+
194+
return found
195+
}
196+
197+
return predicate.Funcs{
198+
CreateFunc: func(e event.CreateEvent) bool { return false },
199+
UpdateFunc: func(e event.UpdateEvent) bool { return isProviderDeployment(e.ObjectNew) },
200+
GenericFunc: func(e event.GenericEvent) bool { return false },
201+
DeleteFunc: func(e event.DeleteEvent) bool { return false },
202+
}
203+
}

0 commit comments

Comments
 (0)