Skip to content

chore: Use watch APIs to list k8s resources #716

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions pkg/cache/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"sort"
"strings"
"sync"
"sync/atomic"
"time"

"github.com/go-logr/logr"
Expand All @@ -30,6 +31,7 @@ import (
"k8s.io/client-go/tools/pager"
watchutil "k8s.io/client-go/tools/watch"
"k8s.io/client-go/util/retry"
"k8s.io/client-go/util/watchlist"
"k8s.io/klog/v2/textlogger"
"k8s.io/kubectl/pkg/util/openapi"

Expand Down Expand Up @@ -248,7 +250,9 @@ type clusterCache struct {
openAPISchema openapi.Resources
gvkParser *managedfields.GvkParser

respectRBAC int
respectRBAC int
listResourcesUsingWatchAPI atomic.Int32
listResourcesUsingRegularAPI atomic.Int32
Comment on lines +254 to +255
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think maybe we shouldn't do this, because listResources gets called a lot over the lifetime of a controller run (i.e. every time the cache expires or there's a retry due to an error). I don't think it's worth it just for the test. And maybe we can find a different way to test.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree that it's not ideal, I'll think how to test better.

}

type clusterCacheSync struct {
Expand Down Expand Up @@ -600,10 +604,26 @@ func (c *clusterCache) listResources(ctx context.Context, resClient dynamic.Reso
listRetry = retry.DefaultRetry
}

if opts.ResourceVersion == "" {
opts.ResourceVersion = "0"
}
Comment on lines +607 to +609
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this necessary?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Required by the list options construction function's code, otherwise it would return false as success.


watchListOpts, success, watchListErr := watchlist.PrepareWatchListOptionsFromListOptions(opts)
var listOpts metav1.ListOptions
if success {
listOpts = watchListOpts
c.listResourcesUsingWatchAPI.Add(1)
c.log.Info("Would try to use watch list options to list resources.")
} else {
listOpts = opts
c.listResourcesUsingRegularAPI.Add(1)
c.log.Info(fmt.Sprintf("Would use regular options to list resources. Watch list options couldn't be prepared. Optional error: %v", watchListErr))
}

listRetry.Steps = int(c.listRetryLimit)
err := retry.OnError(listRetry, c.listRetryFunc, func() error {
var ierr error
res, ierr = resClient.List(ctx, opts)
res, ierr = resClient.List(ctx, listOpts)
if ierr != nil {
// Log out a retry
if c.listRetryLimit > 1 && c.listRetryFunc(ierr) {
Expand Down
94 changes: 67 additions & 27 deletions pkg/cache/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/dynamic/fake"
clientfeatures "k8s.io/client-go/features"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
testcore "k8s.io/client-go/testing"
Expand Down Expand Up @@ -189,41 +190,80 @@ func Benchmark_sync(t *testing.B) {
}
}

type AlwaysEnabledGates struct{}

func (AlwaysEnabledGates) Enabled(clientfeatures.Feature) bool {
return true
}

func TestEnsureSynced(t *testing.T) {
obj1 := &appsv1.Deployment{
TypeMeta: metav1.TypeMeta{
APIVersion: "apps/v1",
Kind: "Deployment",
},
ObjectMeta: metav1.ObjectMeta{
Name: "helm-guestbook1",
Namespace: "default1",
},
}
obj2 := &appsv1.Deployment{
TypeMeta: metav1.TypeMeta{
APIVersion: "apps/v1",
Kind: "Deployment",
tests := []struct {
name string
listUsingWatchAPIs bool
}{
{
name: "WatchAPIUsed",
listUsingWatchAPIs: true,
},
ObjectMeta: metav1.ObjectMeta{
Name: "helm-guestbook2",
Namespace: "default2",
{
name: "WatchAPINotUsed",
listUsingWatchAPIs: false,
},
}

cluster := newCluster(t, obj1, obj2)
err := cluster.EnsureSynced()
require.NoError(t, err)
originalFeatureGates := clientfeatures.FeatureGates()

cluster.lock.Lock()
defer cluster.lock.Unlock()
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
if tc.listUsingWatchAPIs {
// Enable WatchListClient in particular. Setting via env variable here is too late.
clientfeatures.ReplaceFeatureGates(AlwaysEnabledGates{})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like this enables every feature gate that can be enabled. Can we instead enable just the watch-list gate?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a bit more complicated, and it's just for this test, so I'd rather keep this.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although shouldn't be too hard to construct gates more precisely.

}
obj1 := &appsv1.Deployment{
TypeMeta: metav1.TypeMeta{
APIVersion: "apps/v1",
Kind: "Deployment",
},
ObjectMeta: metav1.ObjectMeta{
Name: "helm-guestbook1",
Namespace: "default1",
},
}
obj2 := &appsv1.Deployment{
TypeMeta: metav1.TypeMeta{
APIVersion: "apps/v1",
Kind: "Deployment",
},
ObjectMeta: metav1.ObjectMeta{
Name: "helm-guestbook2",
Namespace: "default2",
},
}

assert.Len(t, cluster.resources, 2)
var names []string
for k := range cluster.resources {
names = append(names, k.Name)
cluster := newCluster(t, obj1, obj2)
err := cluster.EnsureSynced()
require.NoError(t, err)

cluster.lock.Lock()
defer cluster.lock.Unlock()

assert.Len(t, cluster.resources, 2)
var names []string
for k := range cluster.resources {
names = append(names, k.Name)
}
assert.ElementsMatch(t, []string{"helm-guestbook1", "helm-guestbook2"}, names)

if tc.listUsingWatchAPIs {
assert.Positive(t, cluster.listResourcesUsingWatchAPI.Load())
assert.Equal(t, int32(0), cluster.listResourcesUsingRegularAPI.Load())
} else {
assert.Equal(t, int32(0), cluster.listResourcesUsingWatchAPI.Load())
assert.Positive(t, cluster.listResourcesUsingRegularAPI.Load())
}
clientfeatures.ReplaceFeatureGates(originalFeatureGates)
})
}
assert.ElementsMatch(t, []string{"helm-guestbook1", "helm-guestbook2"}, names)
}

func TestStatefulSetOwnershipInferred(t *testing.T) {
Expand Down
Loading