Skip to content

Commit

Permalink
Reduce lenient validation to just missing node names and add strict v…
Browse files Browse the repository at this point in the history
…alidation option
  • Loading branch information
vincentportella committed Aug 13, 2024
1 parent 834f0d0 commit 5e8160c
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 6 deletions.
9 changes: 9 additions & 0 deletions deploy/crds/atlassian.com_cyclenoderequests_crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ spec:
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
clusterName:
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
Expand Down Expand Up @@ -101,6 +103,13 @@ spec:
- Drain
- Wait
type: string
strictValidation:
description: StrictValidation is a boolean which determines whether
named nodes selected in a CNR must exist and be valid nodes
before cycling can begin. If set to true when invalid nodes
are selected the CNR will be transitioned to the "Failed" phase
before cycling can begin again.
type: boolean
required:
- method
type: object
Expand Down
7 changes: 7 additions & 0 deletions deploy/crds/atlassian.com_cyclenodestatuses_crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,13 @@ spec:
- Drain
- Wait
type: string
strictValidation:
description: StrictValidation is a boolean which determines whether
named nodes selected in a CNR must exist and be valid nodes
before cycling can begin. If set to true when invalid nodes
are selected the CNR will be transitioned to the "Failed" phase
before cycling can begin again.
type: boolean
required:
- method
type: object
Expand Down
7 changes: 7 additions & 0 deletions deploy/crds/atlassian.com_nodegroups_crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,13 @@ spec:
- Drain
- Wait
type: string
strictValidation:
description: StrictValidation is a boolean which determines whether
named nodes selected in a CNR must exist and be valid nodes
before cycling can begin. If set to true when invalid nodes
are selected the CNR will be transitioned to the "Failed" phase
before cycling can begin again.
type: boolean
required:
- method
type: object
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/atlassian/v1/common_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ type CycleSettings struct {
// in-progress CNS request timeout from the time it's worked on by the controller.
// If no cyclingTimeout is provided, CNS will use the default controller CNS cyclingTimeout.
CyclingTimeout *metav1.Duration `json:"cyclingTimeout,omitempty"`

// StrictValidation is a boolean which determines whether named nodes selected in a CNR must
// exist and be valid nodes before cycling can begin. If set to true when invalid nodes are
// selected the CNR will be transitioned to the "Failed" phase before cycling can begin again.
StrictValidation bool `json:"strictValidation,omitempty"`
}

// HealthCheck defines the health check configuration for the NodeGroup
Expand Down
1 change: 1 addition & 0 deletions pkg/apis/atlassian/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 8 additions & 1 deletion pkg/controller/cyclenoderequest/transitioner/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func (t *CycleNodeRequestTransitioner) getNodesToTerminate(numNodes int64) (node

// addNamedNodesToTerminate adds the named nodes for this CycleNodeRequest to the list of nodes to terminate.
// Skips any named node that does not exist in the node group for this CycleNodeRequest.
func (t *CycleNodeRequestTransitioner) addNamedNodesToTerminate(kubeNodes map[string]corev1.Node, nodeGroupInstances map[string]cloudprovider.Instance) {
func (t *CycleNodeRequestTransitioner) addNamedNodesToTerminate(kubeNodes map[string]corev1.Node, nodeGroupInstances map[string]cloudprovider.Instance) error {
kubeNodesMap := make(map[string]corev1.Node)

for _, node := range kubeNodes {
Expand All @@ -117,6 +117,11 @@ func (t *CycleNodeRequestTransitioner) addNamedNodesToTerminate(kubeNodes map[st

if !found {
t.rm.Logger.Info("could not find node by name, skipping", "nodeName", namedNode)

if t.cycleNodeRequest.Spec.CycleSettings.StrictValidation {
return fmt.Errorf("could not find node by name: %v", namedNode)
}

continue
}

Expand All @@ -130,6 +135,8 @@ func (t *CycleNodeRequestTransitioner) addNamedNodesToTerminate(kubeNodes map[st
newCycleNodeRequestNode(&kubeNode, nodeGroupInstances[kubeNode.Spec.ProviderID].NodeGroupName()),
)
}

return nil
}

// newCycleNodeRequestNode converts a corev1.Node to a v1.CycleNodeRequestNode. This is done multiple
Expand Down
9 changes: 8 additions & 1 deletion pkg/controller/cyclenoderequest/transitioner/transitions.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ func (t *CycleNodeRequestTransitioner) transitionPending() (reconcile.Result, er
return t.transitionToHealing(err)
}

if len(kubeNodes) == 0 {
return t.transitionToHealing(fmt.Errorf("no nodes matched selector"))
}

// Only retain nodes which still exist inside cloud provider
var nodeProviderIDs []string

Expand Down Expand Up @@ -176,7 +180,10 @@ func (t *CycleNodeRequestTransitioner) transitionPending() (reconcile.Result, er
if len(t.cycleNodeRequest.Spec.NodeNames) > 0 {
// If specific node names are provided, check they actually exist in the node group
t.rm.LogEvent(t.cycleNodeRequest, "SelectingNodes", "Adding named nodes to NodesToTerminate")
t.addNamedNodesToTerminate(kubeNodes, nodeGroupInstances)
err := t.addNamedNodesToTerminate(kubeNodes, nodeGroupInstances)
if err != nil {
return t.transitionToHealing(err)
}
} else {
// Otherwise just add all the nodes in the node group
t.rm.LogEvent(t.cycleNodeRequest, "SelectingNodes", "Adding all node group nodes to NodesToTerminate")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ func TestPendingSimpleCase(t *testing.T) {
assert.Equal(t, cnr.Status.NumNodesCycled, 0)
}

// Test to ensure the Pending phase will accept a CNR with a correct named node.
// Test to ensure the Pending phase will reject a CNR with a named node that
// does not match any of the nodes matching the node selector if strict
// validation is enabled. It should error out immediately.
func TestPendingWithNamedNode(t *testing.T) {
nodegroup, err := mock.NewNodegroup("ng-1", 2)
if err != nil {
Expand Down Expand Up @@ -108,9 +110,10 @@ func TestPendingWithNamedNode(t *testing.T) {
assert.Equal(t, cnr.Status.NumNodesCycled, 0)
}

// Test to ensure the Pending phase will reject a CNR with a named node that
// does not match any of the nodes matching the node selector. It should error
// out immediately.
// Test to ensure the Pending phase will accept a CNR with a named node that
// does not match any of the nodes matching the node selector if strict
// validation is not enabled. It will just select the select the nodes that
// exist.
func TestPendingWrongNamedNode(t *testing.T) {
nodegroup, err := mock.NewNodegroup("ng-1", 2)
if err != nil {
Expand Down Expand Up @@ -148,6 +151,59 @@ func TestPendingWrongNamedNode(t *testing.T) {
WithCloudProviderInstances(nodegroup),
)

result, err := fakeTransitioner.Run()
assert.NoError(t, err)
assert.True(t, result.Requeue)

// It should move to the Initialised phase and set up the status of the CNR
// in a predictable manner
assert.Equal(t, v1.CycleNodeRequestInitialised, cnr.Status.Phase)
assert.Len(t, cnr.Status.NodesToTerminate, 1)
assert.Equal(t, cnr.Status.ActiveChildren, int64(0))
assert.Equal(t, cnr.Status.NumNodesCycled, 0)
}

// Test to ensure the Pending phase will reject a CNR with a named node that
// does not match any of the nodes matching the node selector if strict
// validation is enabled. It should error out immediately.
func TestPendingWrongNamedNodeStrictValidation(t *testing.T) {
nodegroup, err := mock.NewNodegroup("ng-1", 2)
if err != nil {
assert.NoError(t, err)
}

cnr := &v1.CycleNodeRequest{
ObjectMeta: metav1.ObjectMeta{
Name: "cnr-1",
Namespace: "kube-system",
},
Spec: v1.CycleNodeRequestSpec{
NodeGroupsList: []string{"ng-1"},
CycleSettings: v1.CycleSettings{
Concurrency: 1,
Method: v1.CycleNodeRequestMethodDrain,
StrictValidation: true,
},
Selector: metav1.LabelSelector{
MatchLabels: map[string]string{
"customer": "kitt",
},
},
NodeNames: []string{
"ng-1-node-0",
"ng-1-node-2",
},
},
Status: v1.CycleNodeRequestStatus{
Phase: v1.CycleNodeRequestPending,
},
}

fakeTransitioner := NewFakeTransitioner(cnr,
WithKubeNodes(nodegroup),
WithCloudProviderInstances(nodegroup),
)

_, err = fakeTransitioner.Run()
assert.Error(t, err)
assert.Equal(t, v1.CycleNodeRequestHealing, cnr.Status.Phase)
Expand Down

0 comments on commit 5e8160c

Please sign in to comment.