Skip to content

Commit

Permalink
Match unreachable taint key without comparing effect
Browse files Browse the repository at this point in the history
Problem: Till k8s 1.18.10, the k8s node lifecycle controller sets these two taints on an unreachable node
(or a node with Ready=Unknown): `node.kubernetes.io/unreachable:NoSchedule` and `node.kubernetes.io/unreachable:NoExecute`.
In k8s 1.19.3, the node controller is only setting the taint with NoSchedule effect for such a node.
The nodepool controller checks for the presence of a taint with key `node.kubernetes.io/unreachable` and effect `NoExecute`
when determining if a node is unreachable and needs to be replaced. Since the node in 1.19 does not get this taint, nodepool
controller does not consider it as unreachable.

Solution: Modify the check in nodepool controller to consider a node unreachable even if only the taint key matches
 `node.kubernetes.io/unreachable`. The taint effect should not impact how nodepool processes this node.
  • Loading branch information
mrajashree committed Oct 28, 2020
1 parent a501bf8 commit b46c0ec
Showing 1 changed file with 6 additions and 12 deletions.
18 changes: 6 additions & 12 deletions pkg/controllers/management/nodepool/nodepool.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,7 @@ import (
)

var (
nameRegexp = regexp.MustCompile("^(.*?)([0-9]+)$")
unReachableTaint = v1.Taint{
Key: "node.kubernetes.io/unreachable",
Effect: "NoExecute",
}
falseValue = false
nameRegexp = regexp.MustCompile("^(.*?)([0-9]+)$")
)

type Controller struct {
Expand Down Expand Up @@ -221,7 +216,7 @@ func (c *Controller) createOrCheckNodes(nodePool *v3.NodePool, simulate bool) (b
}
}
// remove unreachable node with the unreachable taint & status of Ready being Unknown
q := getTaint(node.Spec.InternalNodeSpec.Taints, &unReachableTaint)
q := getUnreachableTaint(node.Spec.InternalNodeSpec.Taints)
if q != nil && deleteNotReadyAfter > 0 {
changed = true
if isNodeReadyUnknown(node) && !simulate {
Expand Down Expand Up @@ -363,14 +358,14 @@ func (c *Controller) updateNodeRoles(existing *v3.Node, nodePool *v3.NodePool, s
// requeue checks every 5 seconds if the node is still unreachable with one goroutine per node
func (c *Controller) requeue(timeout time.Duration, np *v3.NodePool, node *v3.Node) {

t := getTaint(node.Spec.InternalNodeSpec.Taints, &unReachableTaint)
t := getUnreachableTaint(node.Spec.InternalNodeSpec.Taints)
for t != nil {
time.Sleep(5 * time.Second)
exist, err := c.NodeLister.Get(node.Namespace, node.Name)
if err != nil {
break
}
t = getTaint(exist.Spec.InternalNodeSpec.Taints, &unReachableTaint)
t = getUnreachableTaint(exist.Spec.InternalNodeSpec.Taints)
if t != nil && time.Since(t.TimeAdded.Time) > timeout {
logrus.Debugf("Enqueue nodepool controller: %s %s", np.Namespace, np.Name)
c.NodePoolController.Enqueue(np.Namespace, np.Name)
Expand All @@ -382,10 +377,9 @@ func (c *Controller) requeue(timeout time.Duration, np *v3.NodePool, node *v3.No
c.mutex.Unlock()
}

// getTaint returns the taint that matches the given request
func getTaint(taints []v1.Taint, taintToFind *v1.Taint) *v1.Taint {
func getUnreachableTaint(taints []v1.Taint) *v1.Taint {
for _, taint := range taints {
if taint.MatchTaint(taintToFind) {
if taint.Key == v1.TaintNodeUnreachable {
return &taint
}
}
Expand Down

0 comments on commit b46c0ec

Please sign in to comment.