diff --git a/deploy/crds/atlassian.com_cyclenoderequests_crd.yaml b/deploy/crds/atlassian.com_cyclenoderequests_crd.yaml index 9dc164f..da37872 100644 --- a/deploy/crds/atlassian.com_cyclenoderequests_crd.yaml +++ b/deploy/crds/atlassian.com_cyclenoderequests_crd.yaml @@ -44,6 +44,8 @@ spec: of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' type: string + clusterName: + type: string kind: description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client @@ -101,6 +103,13 @@ spec: - Drain - Wait type: string + strictValidation: + description: StrictValidation is a boolean which determines whether + named nodes selected in a CNR must exist and be valid nodes + before cycling can begin. If set to true when invalid nodes + are selected the CNR will be transitioned to the "Failed" phase + before cycling can begin again. + type: boolean required: - method type: object diff --git a/deploy/crds/atlassian.com_cyclenodestatuses_crd.yaml b/deploy/crds/atlassian.com_cyclenodestatuses_crd.yaml index f53d3e4..358ea72 100644 --- a/deploy/crds/atlassian.com_cyclenodestatuses_crd.yaml +++ b/deploy/crds/atlassian.com_cyclenodestatuses_crd.yaml @@ -101,6 +101,13 @@ spec: - Drain - Wait type: string + strictValidation: + description: StrictValidation is a boolean which determines whether + named nodes selected in a CNR must exist and be valid nodes + before cycling can begin. If set to true when invalid nodes + are selected the CNR will be transitioned to the "Failed" phase + before cycling can begin again. + type: boolean required: - method type: object diff --git a/deploy/crds/atlassian.com_nodegroups_crd.yaml b/deploy/crds/atlassian.com_nodegroups_crd.yaml index d75ee73..08b822c 100644 --- a/deploy/crds/atlassian.com_nodegroups_crd.yaml +++ b/deploy/crds/atlassian.com_nodegroups_crd.yaml @@ -93,6 +93,13 @@ spec: - Drain - Wait type: string + strictValidation: + description: StrictValidation is a boolean which determines whether + named nodes selected in a CNR must exist and be valid nodes + before cycling can begin. If set to true when invalid nodes + are selected the CNR will be transitioned to the "Failed" phase + before cycling can begin again. + type: boolean required: - method type: object diff --git a/pkg/apis/atlassian/v1/common_types.go b/pkg/apis/atlassian/v1/common_types.go index 09236dc..98e71f6 100644 --- a/pkg/apis/atlassian/v1/common_types.go +++ b/pkg/apis/atlassian/v1/common_types.go @@ -46,6 +46,11 @@ type CycleSettings struct { // in-progress CNS request timeout from the time it's worked on by the controller. // If no cyclingTimeout is provided, CNS will use the default controller CNS cyclingTimeout. CyclingTimeout *metav1.Duration `json:"cyclingTimeout,omitempty"` + + // StrictValidation is a boolean which determines whether named nodes selected in a CNR must + // exist and be valid nodes before cycling can begin. If set to true when invalid nodes are + // selected the CNR will be transitioned to the "Failed" phase before cycling can begin again. + StrictValidation bool `json:"strictValidation,omitempty"` } // HealthCheck defines the health check configuration for the NodeGroup diff --git a/pkg/apis/atlassian/v1/zz_generated.deepcopy.go b/pkg/apis/atlassian/v1/zz_generated.deepcopy.go index 04671d9..5df3ee0 100644 --- a/pkg/apis/atlassian/v1/zz_generated.deepcopy.go +++ b/pkg/apis/atlassian/v1/zz_generated.deepcopy.go @@ -1,3 +1,4 @@ +//go:build !ignore_autogenerated // +build !ignore_autogenerated // Code generated by operator-sdk. DO NOT EDIT. diff --git a/pkg/controller/cyclenoderequest/transitioner/node.go b/pkg/controller/cyclenoderequest/transitioner/node.go index b22f119..603dbd6 100644 --- a/pkg/controller/cyclenoderequest/transitioner/node.go +++ b/pkg/controller/cyclenoderequest/transitioner/node.go @@ -105,7 +105,7 @@ func (t *CycleNodeRequestTransitioner) getNodesToTerminate(numNodes int64) (node // addNamedNodesToTerminate adds the named nodes for this CycleNodeRequest to the list of nodes to terminate. // Skips any named node that does not exist in the node group for this CycleNodeRequest. -func (t *CycleNodeRequestTransitioner) addNamedNodesToTerminate(kubeNodes map[string]corev1.Node, nodeGroupInstances map[string]cloudprovider.Instance) { +func (t *CycleNodeRequestTransitioner) addNamedNodesToTerminate(kubeNodes map[string]corev1.Node, nodeGroupInstances map[string]cloudprovider.Instance) error { kubeNodesMap := make(map[string]corev1.Node) for _, node := range kubeNodes { @@ -117,6 +117,11 @@ func (t *CycleNodeRequestTransitioner) addNamedNodesToTerminate(kubeNodes map[st if !found { t.rm.Logger.Info("could not find node by name, skipping", "nodeName", namedNode) + + if t.cycleNodeRequest.Spec.CycleSettings.StrictValidation { + return fmt.Errorf("could not find node by name: %v", namedNode) + } + continue } @@ -130,6 +135,8 @@ func (t *CycleNodeRequestTransitioner) addNamedNodesToTerminate(kubeNodes map[st newCycleNodeRequestNode(&kubeNode, nodeGroupInstances[kubeNode.Spec.ProviderID].NodeGroupName()), ) } + + return nil } // newCycleNodeRequestNode converts a corev1.Node to a v1.CycleNodeRequestNode. This is done multiple diff --git a/pkg/controller/cyclenoderequest/transitioner/transitions.go b/pkg/controller/cyclenoderequest/transitioner/transitions.go index f16bde8..c122b89 100644 --- a/pkg/controller/cyclenoderequest/transitioner/transitions.go +++ b/pkg/controller/cyclenoderequest/transitioner/transitions.go @@ -70,6 +70,10 @@ func (t *CycleNodeRequestTransitioner) transitionPending() (reconcile.Result, er return t.transitionToHealing(err) } + if len(kubeNodes) == 0 { + return t.transitionToHealing(fmt.Errorf("no nodes matched selector")) + } + // Only retain nodes which still exist inside cloud provider var nodeProviderIDs []string @@ -176,7 +180,10 @@ func (t *CycleNodeRequestTransitioner) transitionPending() (reconcile.Result, er if len(t.cycleNodeRequest.Spec.NodeNames) > 0 { // If specific node names are provided, check they actually exist in the node group t.rm.LogEvent(t.cycleNodeRequest, "SelectingNodes", "Adding named nodes to NodesToTerminate") - t.addNamedNodesToTerminate(kubeNodes, nodeGroupInstances) + err := t.addNamedNodesToTerminate(kubeNodes, nodeGroupInstances) + if err != nil { + return t.transitionToHealing(err) + } } else { // Otherwise just add all the nodes in the node group t.rm.LogEvent(t.cycleNodeRequest, "SelectingNodes", "Adding all node group nodes to NodesToTerminate") diff --git a/pkg/controller/cyclenoderequest/transitioner/transitions_pending_test.go b/pkg/controller/cyclenoderequest/transitioner/transitions_pending_test.go index 9c85f63..a5cccdd 100644 --- a/pkg/controller/cyclenoderequest/transitioner/transitions_pending_test.go +++ b/pkg/controller/cyclenoderequest/transitioner/transitions_pending_test.go @@ -59,7 +59,9 @@ func TestPendingSimpleCase(t *testing.T) { assert.Equal(t, cnr.Status.NumNodesCycled, 0) } -// Test to ensure the Pending phase will accept a CNR with a correct named node. +// Test to ensure the Pending phase will reject a CNR with a named node that +// does not match any of the nodes matching the node selector if strict +// validation is enabled. It should error out immediately. func TestPendingWithNamedNode(t *testing.T) { nodegroup, err := mock.NewNodegroup("ng-1", 2) if err != nil { @@ -108,9 +110,10 @@ func TestPendingWithNamedNode(t *testing.T) { assert.Equal(t, cnr.Status.NumNodesCycled, 0) } -// Test to ensure the Pending phase will reject a CNR with a named node that -// does not match any of the nodes matching the node selector. It should error -// out immediately. +// Test to ensure the Pending phase will accept a CNR with a named node that +// does not match any of the nodes matching the node selector if strict +// validation is not enabled. It will just select the select the nodes that +// exist. func TestPendingWrongNamedNode(t *testing.T) { nodegroup, err := mock.NewNodegroup("ng-1", 2) if err != nil { @@ -148,6 +151,59 @@ func TestPendingWrongNamedNode(t *testing.T) { WithCloudProviderInstances(nodegroup), ) + result, err := fakeTransitioner.Run() + assert.NoError(t, err) + assert.True(t, result.Requeue) + + // It should move to the Initialised phase and set up the status of the CNR + // in a predictable manner + assert.Equal(t, v1.CycleNodeRequestInitialised, cnr.Status.Phase) + assert.Len(t, cnr.Status.NodesToTerminate, 1) + assert.Equal(t, cnr.Status.ActiveChildren, int64(0)) + assert.Equal(t, cnr.Status.NumNodesCycled, 0) +} + +// Test to ensure the Pending phase will reject a CNR with a named node that +// does not match any of the nodes matching the node selector if strict +// validation is enabled. It should error out immediately. +func TestPendingWrongNamedNodeStrictValidation(t *testing.T) { + nodegroup, err := mock.NewNodegroup("ng-1", 2) + if err != nil { + assert.NoError(t, err) + } + + cnr := &v1.CycleNodeRequest{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cnr-1", + Namespace: "kube-system", + }, + Spec: v1.CycleNodeRequestSpec{ + NodeGroupsList: []string{"ng-1"}, + CycleSettings: v1.CycleSettings{ + Concurrency: 1, + Method: v1.CycleNodeRequestMethodDrain, + StrictValidation: true, + }, + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "customer": "kitt", + }, + }, + NodeNames: []string{ + "ng-1-node-0", + "ng-1-node-2", + }, + }, + Status: v1.CycleNodeRequestStatus{ + Phase: v1.CycleNodeRequestPending, + }, + } + + fakeTransitioner := NewFakeTransitioner(cnr, + WithKubeNodes(nodegroup), + WithCloudProviderInstances(nodegroup), + ) + _, err = fakeTransitioner.Run() assert.Error(t, err) assert.Equal(t, v1.CycleNodeRequestHealing, cnr.Status.Phase)