Skip to content

Commit

Permalink
[node-agent] Introduce Node controller (gardener#8632)
Browse files Browse the repository at this point in the history
* Documentation

* Controller + Reconciler incl. business logic

* Integration test

* Address PR review feedback
  • Loading branch information
rfranzke authored Oct 13, 2023
1 parent 9337df7 commit 46d8d6b
Show file tree
Hide file tree
Showing 11 changed files with 646 additions and 7 deletions.
38 changes: 38 additions & 0 deletions cmd/gardener-node-agent/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/spf13/pflag"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/rest"
clientcmdlatest "k8s.io/client-go/tools/clientcmd/api/latest"
Expand Down Expand Up @@ -138,6 +139,12 @@ func run(ctx context.Context, log logr.Logger, cfg *config.NodeAgentConfiguratio
}
}

log.Info("Fetching node name based on hostname")
nodeName, err := getNodeName(ctx, log, restConfig)
if err != nil {
return err
}

log.Info("Setting up manager")
mgr, err := manager.New(restConfig, manager.Options{
Logger: log,
Expand All @@ -155,6 +162,9 @@ func run(ctx context.Context, log logr.Logger, cfg *config.NodeAgentConfiguratio
&corev1.Secret{}: {
Namespaces: map[string]cache.Config{metav1.NamespaceSystem: {}},
},
&corev1.Node{}: {
Field: fields.SelectorFromSet(fields.Set{metav1.ObjectNameField: nodeName}),
},
},
},
LeaderElection: false,
Expand Down Expand Up @@ -224,3 +234,31 @@ func fetchAccessTokenViaBootstrapToken(ctx context.Context, log logr.Logger, res

return nil
}

func getNodeName(ctx context.Context, log logr.Logger, restConfig *rest.Config) (string, error) {
hostname, err := os.Hostname()
if err != nil {
return "", fmt.Errorf("failed fetching hostname: %w", err)
}

cl, err := client.New(restConfig, client.Options{})
if err != nil {
return "", fmt.Errorf("unable to create client: %w", err)
}

nodeList := &metav1.PartialObjectMetadataList{}
nodeList.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("NodeList"))
if err := cl.List(ctx, nodeList, client.MatchingLabels{corev1.LabelHostname: hostname}); err != nil {
return "", err
}

switch len(nodeList.Items) {
case 0:
return "", fmt.Errorf("could not find any node with label %s=%s", corev1.LabelHostname, hostname)
case 1:
log.Info("Found node name based on hostname", "hostname", hostname, "nodeName", nodeList.Items[0].Name)
return nodeList.Items[0].Name, nil
default:
return "", fmt.Errorf("found more than one node with label %s=%s", corev1.LabelHostname, hostname)
}
}
10 changes: 10 additions & 0 deletions docs/concepts/node-agent.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ In a bootstrapping phase, the `gardener-node-agent` sets itself up as a systemd

This section describes the controllers in more details.

### [`Node` Controller](../../pkg/nodeagent/controller/node)

This controller watches the `Node` object for the machine it runs on.
The correct `Node` is identified based on the hostname of the machine (`Node`s have the `kubernetes.io/hostname` label).
Whenever the `worker.gardener.cloud/restart-systemd-services` annotation changes, the controller performs the desired changes by restarting the specified systemd unit files.
See also [this document](../usage/shoot_operations.md#restart-systemd-services-on-particular-worker-nodes) for more information.
After restarting all units, the annotation is removed.

> ℹ️ When the `gardener-node-agent` systemd service itself is requested to be restarted, the annotation is removed first to ensure it does not restart itself indefinitely.
### [Token Controller](../../pkg/nodeagent/controller/token)

This controller watches the access token `Secret` in the `kube-system` namespace whose name is provided via the `gardener-node-agent`'s component configuration (`.accessTokenSecret` field).
Expand Down
5 changes: 5 additions & 0 deletions pkg/nodeagent/controller/add.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,16 @@ import (
"sigs.k8s.io/controller-runtime/pkg/manager"

"github.com/gardener/gardener/pkg/nodeagent/apis/config"
"github.com/gardener/gardener/pkg/nodeagent/controller/node"
"github.com/gardener/gardener/pkg/nodeagent/controller/token"
)

// AddToManager adds all controllers to the given manager.
func AddToManager(mgr manager.Manager, cfg *config.NodeAgentConfiguration) error {
if err := (&node.Reconciler{}).AddToManager(mgr); err != nil {
return fmt.Errorf("failed adding node controller: %w", err)
}

if err := (&token.Reconciler{
AccessTokenSecretName: cfg.AccessTokenSecretName,
}).AddToManager(mgr); err != nil {
Expand Down
69 changes: 69 additions & 0 deletions pkg/nodeagent/controller/node/add.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright 2023 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v. 2 except as noted otherwise in the LICENSE file
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package node

import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/predicate"

"github.com/gardener/gardener/pkg/nodeagent/dbus"
)

// ControllerName is the name of this controller.
const ControllerName = "node"

// AddToManager adds Reconciler to the given manager.
func (r *Reconciler) AddToManager(mgr manager.Manager) error {
if r.Client == nil {
r.Client = mgr.GetClient()
}
if r.Recorder == nil {
r.Recorder = mgr.GetEventRecorderFor(ControllerName)
}
if r.DBus == nil {
r.DBus = dbus.New()
}

node := &metav1.PartialObjectMetadata{}
node.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("Node"))

return builder.
ControllerManagedBy(mgr).
Named(ControllerName).
For(node, builder.WithPredicates(r.NodePredicate())).
WithOptions(controller.Options{MaxConcurrentReconciles: 1}).
Complete(r)
}

// NodePredicate returns 'true' when the annotation describing which systemd services should be restarted gets set or
// changed. When it's removed, 'false' is returned.
func (r *Reconciler) NodePredicate() predicate.Predicate {
return predicate.Funcs{
CreateFunc: func(e event.CreateEvent) bool {
return e.Object.GetAnnotations()[annotationRestartSystemdServices] != ""
},
UpdateFunc: func(e event.UpdateEvent) bool {
return e.ObjectOld.GetAnnotations()[annotationRestartSystemdServices] != e.ObjectNew.GetAnnotations()[annotationRestartSystemdServices] &&
e.ObjectNew.GetAnnotations()[annotationRestartSystemdServices] != ""
},
DeleteFunc: func(_ event.DeleteEvent) bool { return false },
GenericFunc: func(_ event.GenericEvent) bool { return false },
}
}
88 changes: 88 additions & 0 deletions pkg/nodeagent/controller/node/add_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// Copyright 2023 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v. 2 except as noted otherwise in the LICENSE file
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package node_test

import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/predicate"

. "github.com/gardener/gardener/pkg/nodeagent/controller/node"
)

var _ = Describe("Add", func() {
Describe("#NodePredicate", func() {
var (
p predicate.Predicate
node *corev1.Node
)

BeforeEach(func() {
p = (&Reconciler{}).NodePredicate()
node = &corev1.Node{}
})

Describe("#Create", func() {
It("should return false because annotation is not present", func() {
Expect(p.Create(event.CreateEvent{Object: node})).To(BeFalse())
})

It("should return true because annotation is present", func() {
metav1.SetMetaDataAnnotation(&node.ObjectMeta, "worker.gardener.cloud/restart-systemd-services", "foo")
Expect(p.Create(event.CreateEvent{Object: node})).To(BeTrue())
})
})

Describe("#Update", func() {
It("should return false because annotation is not present", func() {
Expect(p.Update(event.UpdateEvent{ObjectOld: node, ObjectNew: node})).To(BeFalse())
})

It("should return true because annotation got set", func() {
oldNode := node.DeepCopy()
metav1.SetMetaDataAnnotation(&node.ObjectMeta, "worker.gardener.cloud/restart-systemd-services", "foo")
Expect(p.Update(event.UpdateEvent{ObjectOld: oldNode, ObjectNew: node})).To(BeTrue())
})

It("should return true because annotation got changed", func() {
metav1.SetMetaDataAnnotation(&node.ObjectMeta, "worker.gardener.cloud/restart-systemd-services", "foo")
oldNode := node.DeepCopy()
metav1.SetMetaDataAnnotation(&node.ObjectMeta, "worker.gardener.cloud/restart-systemd-services", "bar")
Expect(p.Update(event.UpdateEvent{ObjectOld: oldNode, ObjectNew: node})).To(BeTrue())
})

It("should return false because annotation got removed", func() {
oldNode := node.DeepCopy()
metav1.SetMetaDataAnnotation(&oldNode.ObjectMeta, "worker.gardener.cloud/restart-systemd-services", "foo")
Expect(p.Update(event.UpdateEvent{ObjectOld: oldNode, ObjectNew: node})).To(BeFalse())
})
})

Describe("#Delete", func() {
It("should return false", func() {
Expect(p.Delete(event.DeleteEvent{})).To(BeFalse())
})
})

Describe("#Generic", func() {
It("should return false", func() {
Expect(p.Generic(event.GenericEvent{})).To(BeFalse())
})
})
})
})
27 changes: 27 additions & 0 deletions pkg/nodeagent/controller/node/node_suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright 2023 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v. 2 except as noted otherwise in the LICENSE file
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package node_test

import (
"testing"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

func TestNode(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "NodeAgent Controller Node Suite")
}
100 changes: 100 additions & 0 deletions pkg/nodeagent/controller/node/reconciler.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// Copyright 2023 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v. 2 except as noted otherwise in the LICENSE file
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package node

import (
"context"
"fmt"
"strings"

"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

"github.com/gardener/gardener/pkg/controllerutils"
nodeagentv1alpha1 "github.com/gardener/gardener/pkg/nodeagent/apis/config/v1alpha1"
"github.com/gardener/gardener/pkg/nodeagent/dbus"
)

const annotationRestartSystemdServices = "worker.gardener.cloud/restart-systemd-services"

// Reconciler checks for node annotation changes and restarts the specified systemd services.
type Reconciler struct {
Client client.Client
Recorder record.EventRecorder
DBus dbus.DBus
}

// Reconcile checks for node annotation changes and restarts the specified systemd services.
func (r *Reconciler) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) {
log := logf.FromContext(ctx)

ctx, cancel := controllerutils.GetMainReconciliationContext(ctx, controllerutils.DefaultReconciliationTimeout)
defer cancel()

node := &metav1.PartialObjectMetadata{}
node.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("Node"))
if err := r.Client.Get(ctx, request.NamespacedName, node); err != nil {
if apierrors.IsNotFound(err) {
log.V(1).Info("Object is gone, stop reconciling")
return reconcile.Result{}, nil
}
return reconcile.Result{}, fmt.Errorf("error retrieving object from store: %w", err)
}

services, ok := node.Annotations[annotationRestartSystemdServices]
if !ok {
return reconcile.Result{}, nil
}

var restartGardenerNodeAgent bool
for _, serviceName := range strings.Split(services, ",") {
// If the gardener-node-agent itself should be restarted, we have to first remove the annotation from the node.
// Otherwise, the annotation is never removed and it restarts itself indefinitely.
if strings.HasPrefix(serviceName, "gardener-node-agent") {
restartGardenerNodeAgent = true
continue
}
r.restartService(ctx, log, node, serviceName)
}

log.Info("Removing annotation from node", "annotation", annotationRestartSystemdServices)
patch := client.MergeFrom(node.DeepCopy())
delete(node.Annotations, annotationRestartSystemdServices)
if err := r.Client.Patch(ctx, node, patch); err != nil {
return reconcile.Result{}, err
}

if restartGardenerNodeAgent {
r.restartService(ctx, log, node, nodeagentv1alpha1.UnitName)
}

return reconcile.Result{}, nil
}

func (r *Reconciler) restartService(ctx context.Context, log logr.Logger, node client.Object, serviceName string) {
log.Info("Restarting systemd service", "serviceName", serviceName)
if err := r.DBus.Restart(ctx, r.Recorder, node, serviceName); err != nil {
// We don't return the error here since we don't want to repeatedly try to restart services again and again.
// In both cases (success or failure), an event will be recorded on the Node so that users can check whether
// the restart worked.
log.Error(err, "Failed restarting systemd service", "serviceName", serviceName)
}
}
Loading

0 comments on commit 46d8d6b

Please sign in to comment.