diff --git a/deploy/crds/atlassian.com_cyclenoderequests_crd.yaml b/deploy/crds/atlassian.com_cyclenoderequests_crd.yaml index 9dc164f..d6225e5 100644 --- a/deploy/crds/atlassian.com_cyclenoderequests_crd.yaml +++ b/deploy/crds/atlassian.com_cyclenoderequests_crd.yaml @@ -44,6 +44,8 @@ spec: of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' type: string + clusterName: + type: string kind: description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client @@ -327,6 +329,19 @@ spec: description: SkipPreTerminationChecks is an optional flag to skip pre-termination checks during cycling type: boolean + validationOptions: + description: ValidationOptions stores the settings to use for validating + state of nodegroups in kube and the cloud provider for cycling the + nodes. + properties: + skipMissingNamedNodes: + description: SkipMissingNodeNames is a boolean which determines + whether named nodes selected in a CNR must exist and be valid + nodes before cycling can begin. If set to true named nodes which + don't exist will be ignored rather than transitioning the CNR + to the failed phase. + type: boolean + type: object required: - cycleSettings - nodeGroupName diff --git a/deploy/crds/atlassian.com_nodegroups_crd.yaml b/deploy/crds/atlassian.com_nodegroups_crd.yaml index d75ee73..b537504 100644 --- a/deploy/crds/atlassian.com_nodegroups_crd.yaml +++ b/deploy/crds/atlassian.com_nodegroups_crd.yaml @@ -311,6 +311,19 @@ spec: description: SkipPreTerminationChecks is an optional flag to skip pre-termination checks during cycling type: boolean + validationOptions: + description: ValidationOptions stores the settings to use for validating + state of nodegroups in kube and the cloud provider for cycling the + nodes. + properties: + skipMissingNamedNodes: + description: SkipMissingNodeNames is a boolean which determines + whether named nodes selected in a CNR must exist and be valid + nodes before cycling can begin. If set to true named nodes which + don't exist will be ignored rather than transitioning the CNR + to the failed phase. + type: boolean + type: object required: - cycleSettings - nodeGroupName diff --git a/docs/automation/README.md b/docs/automation/README.md index 292ca21..b793f8b 100644 --- a/docs/automation/README.md +++ b/docs/automation/README.md @@ -34,6 +34,8 @@ spec: cycleSettings: method: Drain concurrency: 1 + validationOptions: + skipMissingNamedNodes: true healthChecks: - endpoint: http://{{ .NodeIP }}:8080/ready regexMatch: Ready diff --git a/docs/cycling/README.md b/docs/cycling/README.md index e17cfcf..4f374e9 100644 --- a/docs/cycling/README.md +++ b/docs/cycling/README.md @@ -106,6 +106,11 @@ spec: - "node-name-A" - "node-name-B" + # Optional section - collection of validation options to define stricter or more lenient validation during cycling. + validationOptions: + # Optional field - Skip node names defined in the CNR that do not match any existing nodes in the Kubernetes API. + skipMissingNamedNodes: true|false + cycleNodeSettings: # Method can be "Wait" or "Drain", defaults to "Drain" if not provided # "Wait" will wait for pods on the node to complete, while "Drain" will forcefully drain them off the node diff --git a/pkg/apis/atlassian/v1/common_types.go b/pkg/apis/atlassian/v1/common_types.go index 09236dc..181cea6 100644 --- a/pkg/apis/atlassian/v1/common_types.go +++ b/pkg/apis/atlassian/v1/common_types.go @@ -104,3 +104,12 @@ type TLSConfig struct { // sent as part of the request to the upstream host for mTLS. Key string `json:"key,omitempty"` } + +// ValidationOptions stores the settings to use for validating state of nodegroups +// in kube and the cloud provider for cycling the nodes. +type ValidationOptions struct { + // SkipMissingNodeNames is a boolean which determines whether named nodes selected in a CNR must + // exist and be valid nodes before cycling can begin. If set to true named nodes which don't exist + // will be ignored rather than transitioning the CNR to the failed phase. + SkipMissingNamedNodes bool `json:"skipMissingNamedNodes,omitempty"` +} diff --git a/pkg/apis/atlassian/v1/cyclenoderequest_types.go b/pkg/apis/atlassian/v1/cyclenoderequest_types.go index c6009af..867fabd 100644 --- a/pkg/apis/atlassian/v1/cyclenoderequest_types.go +++ b/pkg/apis/atlassian/v1/cyclenoderequest_types.go @@ -26,6 +26,10 @@ type CycleNodeRequestSpec struct { // CycleSettings stores the settings to use for cycling the nodes. CycleSettings CycleSettings `json:"cycleSettings"` + // ValidationOptions stores the settings to use for validating state of nodegroups + // in kube and the cloud provider for cycling the nodes. + ValidationOptions ValidationOptions `json:"validationOptions,omitempty"` + // HealthChecks stores the settings to configure instance custom health checks HealthChecks []HealthCheck `json:"healthChecks,omitempty"` diff --git a/pkg/apis/atlassian/v1/nodegroup_types.go b/pkg/apis/atlassian/v1/nodegroup_types.go index 52d4c69..9d0e0e0 100644 --- a/pkg/apis/atlassian/v1/nodegroup_types.go +++ b/pkg/apis/atlassian/v1/nodegroup_types.go @@ -19,6 +19,10 @@ type NodeGroupSpec struct { // CycleSettings stores the settings to use for cycling the nodes. CycleSettings CycleSettings `json:"cycleSettings"` + // ValidationOptions stores the settings to use for validating state of nodegroups + // in kube and the cloud provider for cycling the nodes. + ValidationOptions ValidationOptions `json:"validationOptions,omitempty"` + // Healthchecks stores the settings to configure instance custom health checks HealthChecks []HealthCheck `json:"healthChecks,omitempty"` diff --git a/pkg/apis/atlassian/v1/zz_generated.deepcopy.go b/pkg/apis/atlassian/v1/zz_generated.deepcopy.go index 04671d9..dbc0e9d 100644 --- a/pkg/apis/atlassian/v1/zz_generated.deepcopy.go +++ b/pkg/apis/atlassian/v1/zz_generated.deepcopy.go @@ -1,3 +1,4 @@ +//go:build !ignore_autogenerated // +build !ignore_autogenerated // Code generated by operator-sdk. DO NOT EDIT. @@ -101,6 +102,7 @@ func (in *CycleNodeRequestSpec) DeepCopyInto(out *CycleNodeRequestSpec) { copy(*out, *in) } in.CycleSettings.DeepCopyInto(&out.CycleSettings) + out.ValidationOptions = in.ValidationOptions if in.HealthChecks != nil { in, out := &in.HealthChecks, &out.HealthChecks *out = make([]HealthCheck, len(*in)) @@ -460,6 +462,7 @@ func (in *NodeGroupSpec) DeepCopyInto(out *NodeGroupSpec) { } in.NodeSelector.DeepCopyInto(&out.NodeSelector) in.CycleSettings.DeepCopyInto(&out.CycleSettings) + out.ValidationOptions = in.ValidationOptions if in.HealthChecks != nil { in, out := &in.HealthChecks, &out.HealthChecks *out = make([]HealthCheck, len(*in)) @@ -584,3 +587,19 @@ func (in *TLSConfig) DeepCopy() *TLSConfig { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ValidationOptions) DeepCopyInto(out *ValidationOptions) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ValidationOptions. +func (in *ValidationOptions) DeepCopy() *ValidationOptions { + if in == nil { + return nil + } + out := new(ValidationOptions) + in.DeepCopyInto(out) + return out +} diff --git a/pkg/controller/cyclenoderequest/transitioner/checks.go b/pkg/controller/cyclenoderequest/transitioner/checks.go index e2e303a..a4df2bc 100644 --- a/pkg/controller/cyclenoderequest/transitioner/checks.go +++ b/pkg/controller/cyclenoderequest/transitioner/checks.go @@ -189,7 +189,7 @@ func (t *CycleNodeRequestTransitioner) performHealthCheck(node v1.CycleNodeReque // performInitialHealthChecks on the nodes selected to be terminated before cycling begin. If any health // check fails return an error to prevent cycling from starting -func (t *CycleNodeRequestTransitioner) performInitialHealthChecks(kubeNodes []corev1.Node) error { +func (t *CycleNodeRequestTransitioner) performInitialHealthChecks(kubeNodes map[string]corev1.Node) error { // Build a set of ready nodes from which to check below readyNodesSet := make(map[string]v1.CycleNodeRequestNode) @@ -241,7 +241,7 @@ func (t *CycleNodeRequestTransitioner) performInitialHealthChecks(kubeNodes []co // performCyclingHealthChecks before terminating an instance selected for termination. Cycling pauses // until all health checks pass for the new instance before terminating the old one -func (t *CycleNodeRequestTransitioner) performCyclingHealthChecks(kubeNodes []corev1.Node) (bool, error) { +func (t *CycleNodeRequestTransitioner) performCyclingHealthChecks(kubeNodes map[string]corev1.Node) (bool, error) { var allHealthChecksPassed bool = true // Find new instsances attached to the nodegroup and perform health checks on them diff --git a/pkg/controller/cyclenoderequest/transitioner/node.go b/pkg/controller/cyclenoderequest/transitioner/node.go index 046148a..d6b7b53 100644 --- a/pkg/controller/cyclenoderequest/transitioner/node.go +++ b/pkg/controller/cyclenoderequest/transitioner/node.go @@ -11,12 +11,15 @@ import ( // listReadyNodes lists nodes that are "ready". By default lists nodes that have also not been touched by Cyclops. // A label is used to determine whether nodes have been touched by this CycleNodeRequest. -func (t *CycleNodeRequestTransitioner) listReadyNodes(includeInProgress bool) (nodes []corev1.Node, err error) { +func (t *CycleNodeRequestTransitioner) listReadyNodes(includeInProgress bool) (map[string]corev1.Node, error) { + nodes := make(map[string]corev1.Node) + // Get the nodes selector, err := t.cycleNodeRequest.NodeLabelSelector() if err != nil { return nodes, err } + nodeList, err := t.rm.ListNodes(selector) if err != nil { return nodes, err @@ -30,14 +33,16 @@ func (t *CycleNodeRequestTransitioner) listReadyNodes(includeInProgress bool) (n continue } } + // Only add "Ready" nodes for _, cond := range node.Status.Conditions { if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue { - nodes = append(nodes, node) + nodes[node.Spec.ProviderID] = node break } } } + return nodes, nil } @@ -56,29 +61,34 @@ func (t *CycleNodeRequestTransitioner) getNodesToTerminate(numNodes int64) (node } for _, kubeNode := range kubeNodes { - // Skip nodes that are already being worked on so we don't duplicate our work if value, ok := kubeNode.Labels[cycleNodeLabel]; ok && value == t.cycleNodeRequest.Name { numNodesInProgress++ + } + } + + for _, nodeToTerminate := range t.cycleNodeRequest.Status.NodesToTerminate { + kubeNode, found := kubeNodes[nodeToTerminate.ProviderID] + + if !found { continue } - for _, nodeToTerminate := range t.cycleNodeRequest.Status.NodesToTerminate { - // Add nodes that need to be terminated but have not yet been actioned - if kubeNode.Name == nodeToTerminate.Name && kubeNode.Spec.ProviderID == nodeToTerminate.ProviderID { - nodes = append(nodes, &kubeNode) + // Skip nodes that are already being worked on so we don't duplicate our work + if value, ok := kubeNode.Labels[cycleNodeLabel]; ok && value == t.cycleNodeRequest.Name { + continue + } - for i := 0; i < len(t.cycleNodeRequest.Status.NodesAvailable); i++ { - if kubeNode.Name == t.cycleNodeRequest.Status.NodesAvailable[i].Name { - // Remove nodes from available if they are also scheduled for termination - // Slice syntax removes this node at `i` from the array - t.cycleNodeRequest.Status.NodesAvailable = append( - t.cycleNodeRequest.Status.NodesAvailable[:i], - t.cycleNodeRequest.Status.NodesAvailable[i+1:]..., - ) + // Add nodes that need to be terminated but have not yet been actioned + nodes = append(nodes, &kubeNode) - break - } - } + for i := 0; i < len(t.cycleNodeRequest.Status.NodesAvailable); i++ { + if kubeNode.Name == t.cycleNodeRequest.Status.NodesAvailable[i].Name { + // Remove nodes from available if they are also scheduled for termination + // Slice syntax removes this node at `i` from the array + t.cycleNodeRequest.Status.NodesAvailable = append( + t.cycleNodeRequest.Status.NodesAvailable[:i], + t.cycleNodeRequest.Status.NodesAvailable[i+1:]..., + ) break } @@ -94,32 +104,39 @@ func (t *CycleNodeRequestTransitioner) getNodesToTerminate(numNodes int64) (node } // addNamedNodesToTerminate adds the named nodes for this CycleNodeRequest to the list of nodes to terminate. -// Returns an error if any named node does not exist in the node group for this CycleNodeRequest. -func (t *CycleNodeRequestTransitioner) addNamedNodesToTerminate(kubeNodes []corev1.Node, nodeGroupInstances map[string]cloudprovider.Instance) error { - for _, namedNode := range t.cycleNodeRequest.Spec.NodeNames { - foundNode := false - for _, kubeNode := range kubeNodes { - if kubeNode.Name == namedNode { - foundNode = true +// Skips any named node that does not exist in the node group for this CycleNodeRequest. +func (t *CycleNodeRequestTransitioner) addNamedNodesToTerminate(kubeNodes map[string]corev1.Node, nodeGroupInstances map[string]cloudprovider.Instance) error { + nodeLookupByName := make(map[string]corev1.Node) - t.cycleNodeRequest.Status.NodesAvailable = append( - t.cycleNodeRequest.Status.NodesAvailable, - newCycleNodeRequestNode(&kubeNode, nodeGroupInstances[kubeNode.Spec.ProviderID].NodeGroupName()), - ) + for _, node := range kubeNodes { + nodeLookupByName[node.Name] = node + } - t.cycleNodeRequest.Status.NodesToTerminate = append( - t.cycleNodeRequest.Status.NodesToTerminate, - newCycleNodeRequestNode(&kubeNode, nodeGroupInstances[kubeNode.Spec.ProviderID].NodeGroupName()), - ) + for _, namedNode := range t.cycleNodeRequest.Spec.NodeNames { + kubeNode, found := nodeLookupByName[namedNode] - break + if !found { + t.rm.Logger.Info("could not find node by name, skipping", "nodeName", namedNode) + + if !t.cycleNodeRequest.Spec.ValidationOptions.SkipMissingNamedNodes { + return fmt.Errorf("could not find node by name: %v", namedNode) } - } - if !foundNode { - return fmt.Errorf("could not find node by name: %v", namedNode) + t.rm.LogEvent(t.cycleNodeRequest, "SkipMissingNamedNode", "Named node %s not found", namedNode) + continue } + + t.cycleNodeRequest.Status.NodesAvailable = append( + t.cycleNodeRequest.Status.NodesAvailable, + newCycleNodeRequestNode(&kubeNode, nodeGroupInstances[kubeNode.Spec.ProviderID].NodeGroupName()), + ) + + t.cycleNodeRequest.Status.NodesToTerminate = append( + t.cycleNodeRequest.Status.NodesToTerminate, + newCycleNodeRequestNode(&kubeNode, nodeGroupInstances[kubeNode.Spec.ProviderID].NodeGroupName()), + ) } + return nil } diff --git a/pkg/controller/cyclenoderequest/transitioner/transitions.go b/pkg/controller/cyclenoderequest/transitioner/transitions.go index 0cc5d78..c122b89 100644 --- a/pkg/controller/cyclenoderequest/transitioner/transitions.go +++ b/pkg/controller/cyclenoderequest/transitioner/transitions.go @@ -64,10 +64,12 @@ func (t *CycleNodeRequestTransitioner) transitionUndefined() (reconcile.Result, func (t *CycleNodeRequestTransitioner) transitionPending() (reconcile.Result, error) { // Fetch the node names for the cycleNodeRequest, using the label selector provided t.rm.LogEvent(t.cycleNodeRequest, "SelectingNodes", "Selecting nodes with label selector") + kubeNodes, err := t.listReadyNodes(true) if err != nil { return t.transitionToHealing(err) } + if len(kubeNodes) == 0 { return t.transitionToHealing(fmt.Errorf("no nodes matched selector")) } @@ -83,14 +85,12 @@ func (t *CycleNodeRequestTransitioner) transitionPending() (reconcile.Result, er if err != nil { return t.transitionToHealing(errors.Wrap(err, "failed to check instances that exist from cloud provider")) } - var existingKubeNodes []corev1.Node - for _, node := range kubeNodes { - for _, validProviderID := range existingProviderIDs { - if node.Spec.ProviderID == validProviderID { - existingKubeNodes = append(existingKubeNodes, node) - break - } + existingKubeNodes := make(map[string]corev1.Node) + + for _, validProviderID := range existingProviderIDs { + if node, found := kubeNodes[validProviderID]; found { + existingKubeNodes[node.Spec.ProviderID] = node } } @@ -120,19 +120,40 @@ func (t *CycleNodeRequestTransitioner) transitionPending() (reconcile.Result, er // Do some sanity checking before we start filtering things // Check the instance count of the node group matches the number of nodes found in Kubernetes if len(kubeNodes) != len(nodeGroupInstances) { - nodesNotInCPNodeGroup, nodesNotInKube := findOffendingNodes(kubeNodes, nodeGroupInstances) var offendingNodesInfo string + + nodesNotInCPNodeGroup, nodesNotInKube := findOffendingNodes(kubeNodes, nodeGroupInstances) + if len(nodesNotInCPNodeGroup) > 0 { + providerIDs := make([]string, 0) + + for providerID := range nodesNotInCPNodeGroup { + providerIDs = append(providerIDs, + fmt.Sprintf("id %q", providerID), + ) + } + offendingNodesInfo += "nodes not in node group: " - offendingNodesInfo += strings.Join(nodesNotInCPNodeGroup, ",") + offendingNodesInfo += strings.Join(providerIDs, ",") } + if len(nodesNotInKube) > 0 { if offendingNodesInfo != "" { offendingNodesInfo += ";" } + + providerIDs := make([]string, 0) + + for providerID, node := range nodesNotInKube { + providerIDs = append(providerIDs, + fmt.Sprintf("id %q in %q", providerID, node.NodeGroupName()), + ) + } + offendingNodesInfo += "nodes not inside cluster: " - offendingNodesInfo += strings.Join(nodesNotInKube, ",") + offendingNodesInfo += strings.Join(providerIDs, ",") } + t.rm.LogEvent(t.cycleNodeRequest, "NodeCountMismatch", "node group: %v, kube: %v. %v", len(nodeGroupInstances), len(kubeNodes), offendingNodesInfo) @@ -142,12 +163,16 @@ func (t *CycleNodeRequestTransitioner) transitionPending() (reconcile.Result, er if err != nil { return t.transitionToHealing(err) } + if timedOut { err := fmt.Errorf( - "node count mismatch, number of kubernetes of nodes does not match number of cloud provider instances after %v", - nodeEquilibriumWaitLimit) + "node count mismatch, number of kubernetes nodes does not match number of cloud provider instances after %v", + nodeEquilibriumWaitLimit, + ) + return t.transitionToHealing(err) } + return reconcile.Result{Requeue: true, RequeueAfter: requeueDuration}, nil } @@ -162,6 +187,7 @@ func (t *CycleNodeRequestTransitioner) transitionPending() (reconcile.Result, er } else { // Otherwise just add all the nodes in the node group t.rm.LogEvent(t.cycleNodeRequest, "SelectingNodes", "Adding all node group nodes to NodesToTerminate") + for _, kubeNode := range kubeNodes { // Check to ensure the kubeNode object maps to an existing node in the ASG // If this isn't the case, this is a phantom node. Fail the cnr to be safe. @@ -213,7 +239,9 @@ func (t *CycleNodeRequestTransitioner) transitionInitialised() (reconcile.Result // The maximum nodes we can select are bounded by our concurrency. We take into account the number // of nodes we are already working on, and only introduce up to our concurrency cap more nodes in this step. maxNodesToSelect := t.cycleNodeRequest.Spec.CycleSettings.Concurrency - t.cycleNodeRequest.Status.ActiveChildren + t.rm.Logger.Info("Selecting nodes to terminate", "numNodes", maxNodesToSelect) + nodes, numNodesInProgress, err := t.getNodesToTerminate(maxNodesToSelect) if err != nil { return t.transitionToHealing(err) diff --git a/pkg/controller/cyclenoderequest/transitioner/transitions_pending_test.go b/pkg/controller/cyclenoderequest/transitioner/transitions_pending_test.go index 9c85f63..c95ccaa 100644 --- a/pkg/controller/cyclenoderequest/transitioner/transitions_pending_test.go +++ b/pkg/controller/cyclenoderequest/transitioner/transitions_pending_test.go @@ -59,7 +59,9 @@ func TestPendingSimpleCase(t *testing.T) { assert.Equal(t, cnr.Status.NumNodesCycled, 0) } -// Test to ensure the Pending phase will accept a CNR with a correct named node. +// Test to ensure the Pending phase will reject a CNR with a named node that +// does not match any of the nodes matching the node selector if strict +// validation is enabled. It should error out immediately. func TestPendingWithNamedNode(t *testing.T) { nodegroup, err := mock.NewNodegroup("ng-1", 2) if err != nil { @@ -108,9 +110,65 @@ func TestPendingWithNamedNode(t *testing.T) { assert.Equal(t, cnr.Status.NumNodesCycled, 0) } +// Test to ensure the Pending phase will accept a CNR with a named node that +// does not match any of the nodes matching the node selector if strict +// validation is not enabled. It will just select the select the nodes that +// exist. +func TestPendingWrongNamedNodeSkipMissingNamedNodes(t *testing.T) { + nodegroup, err := mock.NewNodegroup("ng-1", 2) + if err != nil { + assert.NoError(t, err) + } + + cnr := &v1.CycleNodeRequest{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cnr-1", + Namespace: "kube-system", + }, + Spec: v1.CycleNodeRequestSpec{ + NodeGroupsList: []string{"ng-1"}, + CycleSettings: v1.CycleSettings{ + Concurrency: 1, + Method: v1.CycleNodeRequestMethodDrain, + }, + ValidationOptions: v1.ValidationOptions{ + SkipMissingNamedNodes: true, + }, + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "customer": "kitt", + }, + }, + NodeNames: []string{ + "ng-1-node-0", + "ng-1-node-2", + }, + }, + Status: v1.CycleNodeRequestStatus{ + Phase: v1.CycleNodeRequestPending, + }, + } + + fakeTransitioner := NewFakeTransitioner(cnr, + WithKubeNodes(nodegroup), + WithCloudProviderInstances(nodegroup), + ) + + result, err := fakeTransitioner.Run() + assert.NoError(t, err) + assert.True(t, result.Requeue) + + // It should move to the Initialised phase and set up the status of the CNR + // in a predictable manner + assert.Equal(t, v1.CycleNodeRequestInitialised, cnr.Status.Phase) + assert.Len(t, cnr.Status.NodesToTerminate, 1) + assert.Equal(t, cnr.Status.ActiveChildren, int64(0)) + assert.Equal(t, cnr.Status.NumNodesCycled, 0) +} + // Test to ensure the Pending phase will reject a CNR with a named node that -// does not match any of the nodes matching the node selector. It should error -// out immediately. +// does not match any of the nodes matching the node selector if strict +// validation is enabled. It should error out immediately. func TestPendingWrongNamedNode(t *testing.T) { nodegroup, err := mock.NewNodegroup("ng-1", 2) if err != nil { diff --git a/pkg/controller/cyclenoderequest/transitioner/util.go b/pkg/controller/cyclenoderequest/transitioner/util.go index aef2815..8bade32 100644 --- a/pkg/controller/cyclenoderequest/transitioner/util.go +++ b/pkg/controller/cyclenoderequest/transitioner/util.go @@ -242,6 +242,7 @@ func (t *CycleNodeRequestTransitioner) checkIfTransitioning(numNodesToCycle, num transition, err := t.transitionObject(v1.CycleNodeRequestWaitingTermination) return true, transition, err } + // otherwise, we have finished everything, so transition to Successful transition, err := t.transitionToSuccessful() return true, transition, err @@ -252,21 +253,19 @@ func (t *CycleNodeRequestTransitioner) checkIfTransitioning(numNodesToCycle, num // findOffendingNodes finds the offending nodes information which cause number of nodes mismatch between // cloud provider node group and nodes inside kubernetes cluster using label selector -func findOffendingNodes(kubeNodes []corev1.Node, cloudProviderNodes map[string]cloudprovider.Instance) ([]string, []string) { - kubeNodesMap := make(map[string]corev1.Node) - var nodesNotInCPNodeGroup []string - var nodesNotInKube []string +func findOffendingNodes(kubeNodes map[string]corev1.Node, cloudProviderNodes map[string]cloudprovider.Instance) (map[string]corev1.Node, map[string]cloudprovider.Instance) { + var nodesNotInCPNodeGroup = make(map[string]corev1.Node) + var nodesNotInKube = make(map[string]cloudprovider.Instance) + for _, kubeNode := range kubeNodes { - kubeNodesMap[kubeNode.Spec.ProviderID] = kubeNode if _, ok := cloudProviderNodes[kubeNode.Spec.ProviderID]; !ok { - nodesNotInCPNodeGroup = append(nodesNotInCPNodeGroup, fmt.Sprintf("id %q", kubeNode.Spec.ProviderID)) + nodesNotInCPNodeGroup[kubeNode.Spec.ProviderID] = kubeNode } } - for cpNode := range cloudProviderNodes { - if _, ok := kubeNodesMap[cpNode]; !ok { - nodesNotInKube = append(nodesNotInKube, fmt.Sprintf("id %q in %q", - cpNode, - cloudProviderNodes[cpNode].NodeGroupName())) + + for providerID, cpNode := range cloudProviderNodes { + if _, ok := kubeNodes[providerID]; !ok { + nodesNotInKube[providerID] = cpNode } } diff --git a/pkg/controller/cyclenoderequest/transitioner/util_test.go b/pkg/controller/cyclenoderequest/transitioner/util_test.go index 97e2671..22c326e 100644 --- a/pkg/controller/cyclenoderequest/transitioner/util_test.go +++ b/pkg/controller/cyclenoderequest/transitioner/util_test.go @@ -1,6 +1,7 @@ package transitioner import ( + "reflect" "testing" "github.com/atlassian-labs/cyclops/pkg/cloudprovider" @@ -45,88 +46,98 @@ func TestFindOffendingNodes(t *testing.T) { tests := []struct { name string - knodes []corev1.Node + knodes map[string]corev1.Node cnodes map[string]cloudprovider.Instance - expectNotInCPNodeGroup []string - expectNotInKube []string + expectNotInCPNodeGroup map[string]corev1.Node + expectNotInKube map[string]cloudprovider.Instance }{ { "kube nodes match cloud provider nodes", - []corev1.Node{ - buildNode(dummyInstanceA), - buildNode(dummyInstanceB), - buildNode(dummyInstanceC), + map[string]corev1.Node{ + dummyInstanceA.providerID: buildNode(dummyInstanceA), + dummyInstanceB.providerID: buildNode(dummyInstanceB), + dummyInstanceC.providerID: buildNode(dummyInstanceC), }, map[string]cloudprovider.Instance{ dummyInstanceA.providerID: &dummyInstanceA, dummyInstanceB.providerID: &dummyInstanceB, dummyInstanceC.providerID: &dummyInstanceC, }, - []string{}, - []string{}, + make(map[string]corev1.Node), + make(map[string]cloudprovider.Instance), }, { "more nodes in kube than cloud provider", - []corev1.Node{ - buildNode(dummyInstanceA), - buildNode(dummyInstanceB), - buildNode(dummyInstanceC), + map[string]corev1.Node{ + dummyInstanceA.providerID: buildNode(dummyInstanceA), + dummyInstanceB.providerID: buildNode(dummyInstanceB), + dummyInstanceC.providerID: buildNode(dummyInstanceC), }, map[string]cloudprovider.Instance{ dummyInstanceA.providerID: &dummyInstanceA, dummyInstanceB.providerID: &dummyInstanceB, }, - []string{"id \"aws:///us-east-1c/i-cbcdefghijk\""}, - []string{}, + map[string]corev1.Node{ + dummyInstanceC.providerID: buildNode(dummyInstanceC), + }, + make(map[string]cloudprovider.Instance), }, { "more nodes in cloud provider than kube", - []corev1.Node{ - buildNode(dummyInstanceA), - buildNode(dummyInstanceB), + map[string]corev1.Node{ + dummyInstanceA.providerID: buildNode(dummyInstanceA), + dummyInstanceB.providerID: buildNode(dummyInstanceB), }, map[string]cloudprovider.Instance{ dummyInstanceA.providerID: &dummyInstanceA, dummyInstanceB.providerID: &dummyInstanceB, dummyInstanceC.providerID: &dummyInstanceC, }, - []string{}, - []string{"id \"aws:///us-east-1c/i-cbcdefghijk\" in \"GroupC\""}, + make(map[string]corev1.Node), + map[string]cloudprovider.Instance{ + dummyInstanceC.providerID: &dummyInstanceC, + }, }, { "no nodes in cloud provider", - []corev1.Node{ - buildNode(dummyInstanceA), - buildNode(dummyInstanceB), + map[string]corev1.Node{ + dummyInstanceA.providerID: buildNode(dummyInstanceA), + dummyInstanceB.providerID: buildNode(dummyInstanceB), + }, + make(map[string]cloudprovider.Instance), + map[string]corev1.Node{ + dummyInstanceA.providerID: buildNode(dummyInstanceA), + dummyInstanceB.providerID: buildNode(dummyInstanceB), }, - map[string]cloudprovider.Instance{}, - []string{"id \"aws:///us-east-1a/i-abcdefghijk\"", "id \"aws:///us-east-1b/i-bbcdefghijk\""}, - []string{}, + make(map[string]cloudprovider.Instance), }, { "no nodes in kube", - []corev1.Node{}, + make(map[string]corev1.Node), + map[string]cloudprovider.Instance{ + dummyInstanceA.providerID: &dummyInstanceA, + dummyInstanceB.providerID: &dummyInstanceB, + }, + make(map[string]corev1.Node), map[string]cloudprovider.Instance{ dummyInstanceA.providerID: &dummyInstanceA, dummyInstanceB.providerID: &dummyInstanceB, }, - []string{}, - []string{"id \"aws:///us-east-1a/i-abcdefghijk\" in \"GroupA\"", "id \"aws:///us-east-1b/i-bbcdefghijk\" in \"GroupB\""}, }, { "both cloud provider and kube nodes are empty", - []corev1.Node{}, - map[string]cloudprovider.Instance{}, - []string{}, - []string{}, + make(map[string]corev1.Node), + make(map[string]cloudprovider.Instance), + make(map[string]corev1.Node), + make(map[string]cloudprovider.Instance), }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { nodesNotInCPNodeGroup, nodesNotInKube := findOffendingNodes(test.knodes, test.cnodes) - assert.ElementsMatch(t, test.expectNotInCPNodeGroup, nodesNotInCPNodeGroup) - assert.ElementsMatch(t, test.expectNotInKube, nodesNotInKube) + assert.Equal(t, true, reflect.DeepEqual(test.expectNotInCPNodeGroup, nodesNotInCPNodeGroup)) + assert.Equal(t, true, reflect.DeepEqual(test.expectNotInKube, nodesNotInKube)) }) } } diff --git a/pkg/generation/cnr.go b/pkg/generation/cnr.go index 9808b93..db5615d 100644 --- a/pkg/generation/cnr.go +++ b/pkg/generation/cnr.go @@ -3,9 +3,10 @@ package generation import ( "context" "fmt" - "github.com/atlassian-labs/cyclops/pkg/controller/cyclenoderequest" "strings" + "github.com/atlassian-labs/cyclops/pkg/controller/cyclenoderequest" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/validation" "sigs.k8s.io/controller-runtime/pkg/client" @@ -73,8 +74,9 @@ func GiveReason(cnr *atlassianv1.CycleNodeRequest, reason string) { } cnr.Annotations[cnrReasonAnnotationKey] = reason } + // SetAPIVersion adds apiVersion annotation to the cnr -func SetAPIVersion(cnr *atlassianv1.CycleNodeRequest, clientVersion string){ +func SetAPIVersion(cnr *atlassianv1.CycleNodeRequest, clientVersion string) { if cnr.Annotations == nil { cnr.Annotations = map[string]string{} } @@ -110,6 +112,7 @@ func GenerateCNR(nodeGroup atlassianv1.NodeGroup, nodes []string, name, namespac PreTerminationChecks: nodeGroup.Spec.PreTerminationChecks, SkipInitialHealthChecks: nodeGroup.Spec.SkipInitialHealthChecks, SkipPreTerminationChecks: nodeGroup.Spec.SkipPreTerminationChecks, + ValidationOptions: nodeGroup.Spec.ValidationOptions, }, } }