Skip to content

Commit

Permalink
Add NRT garbage collector
Browse files Browse the repository at this point in the history
Signed-off-by: PiotrProkop <pprokop@nvidia.com>
  • Loading branch information
PiotrProkop committed Jan 5, 2023
1 parent 0159ab0 commit 08346ac
Show file tree
Hide file tree
Showing 12 changed files with 665 additions and 0 deletions.
4 changes: 4 additions & 0 deletions cmd/nfd-topology-updater/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ func initFlags(flagset *flag.FlagSet) (*topology.Args, *resourcemonitor.Args) {
args := &topology.Args{}
resourcemonitorArgs := &resourcemonitor.Args{}

flagset.BoolVar(&args.GCEnabled, "gc-enabled", false,
"Run NodeResourceTopology Garbage Collector")
flagset.DurationVar(&args.GCPeriod, "gc-interval", time.Duration(10)*time.Minute,
"Interval between which Garbage Collector will try to cleanup any missed but already obsolete NodeResourceTopology. [Default: 10m]")
flagset.BoolVar(&args.Oneshot, "oneshot", false,
"Update once and exit")
flagset.BoolVar(&args.NoPublish, "no-publish", false,
Expand Down
9 changes: 9 additions & 0 deletions deployment/base/rbac-topologyupdater-gc/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

namespace: node-feature-discovery

resources:
- topologyupdater-gc-clusterrole.yaml
- topologyupdater-gc-clusterrolebinding.yaml
- topologyupdater-gc-serviceaccount.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: nfd-topology-gc
rules:
- apiGroups:
- ""
resources:
- nodes
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- nodes/proxy
verbs:
- get
- apiGroups:
- topology.node.k8s.io
resources:
- noderesourcetopologies
verbs:
- delete
- list
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: nfd-topology-gc
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: nfd-topology-gc
subjects:
- kind: ServiceAccount
name: nfd-topology-gc
namespace: default
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: nfd-topology-gc
7 changes: 7 additions & 0 deletions deployment/base/topologyupdater-gc/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

namespace: node-feature-discovery

resources:
- topologyupdater-gc.yaml
25 changes: 25 additions & 0 deletions deployment/base/topologyupdater-gc/topologyupdater-gc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: nfd
name: nfd-topology-gc
spec:
selector:
matchLabels:
app: nfd-topology-gc
template:
metadata:
labels:
app: nfd-topology-gc
spec:
dnsPolicy: ClusterFirstWithHostNet
serviceAccount: nfd-topology-gc
containers:
- name: nfd-topology-gc
image: gcr.io/k8s-staging-nfd/node-feature-discovery:master
imagePullPolicy: Always
command:
- "nfd-topology-updater"
- "-gc-enabled"
- "-gc-interval=10s"
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ namespace: node-feature-discovery
bases:
- ../../base/rbac
- ../../base/rbac-topologyupdater
- ../../base/rbac-topologyupdater-gc
- ../../base/nfd-crds
- ../../base/master
- ../../base/worker-daemonset
- ../../base/noderesourcetopologies-crd
- ../../base/topologyupdater-daemonset
- ../../base/topologyupdater-gc

resources:
- namespace.yaml
Expand Down
154 changes: 154 additions & 0 deletions pkg/nfd-master/nfd-nrt-gc_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package nfdmaster

import (
"context"
"testing"
"time"

nrtapi "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1"
faketopologyv1alpha1 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/clientset/versioned/fake"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
fakek8sclientset "k8s.io/client-go/kubernetes/fake"

. "github.com/smartystreets/goconvey/convey"
)

func TestInformer(t *testing.T) {
Convey("When theres is old NRT ", t, func() {
k8sClient := fakek8sclientset.NewSimpleClientset()

fakeClient := faketopologyv1alpha1.NewSimpleClientset(&nrtapi.NodeResourceTopology{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
},
})

stopChan := make(chan struct{}, 1)

nrtGC := &nrtGarbageCollector{
k8sClient: k8sClient,
topoClient: fakeClient,
stopChan: stopChan,
}

err := nrtGC.start()
So(err, ShouldBeNil)

nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
So(err, ShouldBeNil)
So(nrts.Items, ShouldHaveLength, 0)
})
Convey("When theres is one old NRT and one up to date", t, func() {
k8sClient := fakek8sclientset.NewSimpleClientset(&corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
},
})

fakeClient := faketopologyv1alpha1.NewSimpleClientset(&nrtapi.NodeResourceTopology{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
},
},
&nrtapi.NodeResourceTopology{
ObjectMeta: metav1.ObjectMeta{
Name: "node2",
},
},
)

stopChan := make(chan struct{}, 1)

nrtGC := &nrtGarbageCollector{
k8sClient: k8sClient,
topoClient: fakeClient,
stopChan: stopChan,
}

err := nrtGC.start()
So(err, ShouldBeNil)

nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
So(err, ShouldBeNil)
So(nrts.Items, ShouldHaveLength, 1)
So(nrts.Items[0].GetName(), ShouldEqual, "node1")

})
Convey("Should react to delete event", t, func() {
k8sClient := fakek8sclientset.NewSimpleClientset(
&corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
},
},
&corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node2",
},
},
)

fakeClient := faketopologyv1alpha1.NewSimpleClientset(
&nrtapi.NodeResourceTopology{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
},
},
&nrtapi.NodeResourceTopology{
ObjectMeta: metav1.ObjectMeta{
Name: "node2",
},
},
)

stopChan := make(chan struct{}, 1)

nrtGC := &nrtGarbageCollector{
k8sClient: k8sClient,
topoClient: fakeClient,
stopChan: stopChan,
}

err := nrtGC.start()
So(err, ShouldBeNil)

nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
So(err, ShouldBeNil)

So(nrts.Items, ShouldHaveLength, 2)

err = k8sClient.CoreV1().Nodes().Delete(context.TODO(), "node1", metav1.DeleteOptions{})
So(err, ShouldBeNil)
// simple sleep with retry loop to make sure indexer will pick up event and trigger deleteNode Function
deleted := false
for i := 0; i < 5; i++ {
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
So(err, ShouldBeNil)

if len(nrts.Items) == 1 {
deleted = true
break
}
time.Sleep(time.Second)
}
So(deleted, ShouldBeTrue)
})

}
Loading

0 comments on commit 08346ac

Please sign in to comment.