@@ -17,6 +17,7 @@ limitations under the License.
1717package actuation
1818
1919import (
20+ default_context "context"
2021 "strings"
2122 "time"
2223
@@ -43,11 +44,14 @@ import (
4344 "k8s.io/autoscaler/cluster-autoscaler/utils/expiring"
4445 kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
4546 "k8s.io/autoscaler/cluster-autoscaler/utils/taints"
47+
48+ "k8s.io/client-go/util/workqueue"
4649 "k8s.io/klog/v2"
4750)
4851
4952const (
50- pastLatencyExpireDuration = time .Hour
53+ pastLatencyExpireDuration = time .Hour
54+ maxConcurrentNodesTainting = 5
5155)
5256
5357// Actuator is responsible for draining and deleting nodes.
@@ -179,33 +183,55 @@ func (a *Actuator) deleteAsyncEmpty(NodeGroupViews []*budgets.NodeGroupView, nod
179183// taintNodesSync synchronously taints all provided nodes with NoSchedule. If tainting fails for any of the nodes, already
180184// applied taints are cleaned up.
181185func (a * Actuator ) taintNodesSync (NodeGroupViews []* budgets.NodeGroupView ) (time.Duration , errors.AutoscalerError ) {
182- var taintedNodes []* apiv1.Node
186+ nodesToTaint := make ( []* apiv1.Node , 0 )
183187 var updateLatencyTracker * UpdateLatencyTracker
184188 nodeDeleteDelayAfterTaint := a .nodeDeleteDelayAfterTaint
185189 if a .ctx .AutoscalingOptions .DynamicNodeDeleteDelayAfterTaintEnabled {
186190 updateLatencyTracker = NewUpdateLatencyTracker (a .ctx .AutoscalingKubeClients .ListerRegistry .AllNodeLister ())
187191 go updateLatencyTracker .Start ()
188192 }
193+
189194 for _ , bucket := range NodeGroupViews {
190195 for _ , node := range bucket .Nodes {
191196 if a .ctx .AutoscalingOptions .DynamicNodeDeleteDelayAfterTaintEnabled {
192197 updateLatencyTracker .StartTimeChan <- nodeTaintStartTime {node .Name , time .Now ()}
193198 }
194- err := a .taintNode (node )
195- if err != nil {
196- a .ctx .Recorder .Eventf (node , apiv1 .EventTypeWarning , "ScaleDownFailed" , "failed to mark the node as toBeDeleted/unschedulable: %v" , err )
197- // Clean up already applied taints in case of issues.
198- for _ , taintedNode := range taintedNodes {
199- _ , _ = taints .CleanToBeDeleted (taintedNode , a .ctx .ClientSet , a .ctx .CordonNodeBeforeTerminate )
200- }
201- if a .ctx .AutoscalingOptions .DynamicNodeDeleteDelayAfterTaintEnabled {
202- close (updateLatencyTracker .AwaitOrStopChan )
203- }
204- return nodeDeleteDelayAfterTaint , errors .NewAutoscalerError (errors .ApiCallError , "couldn't taint node %q with ToBeDeleted" , node )
205- }
206- taintedNodes = append (taintedNodes , node )
199+ nodesToTaint = append (nodesToTaint , node )
200+ }
201+ }
202+ failedTaintedNodes := make (chan struct {
203+ node * apiv1.Node
204+ err error
205+ }, len (nodesToTaint ))
206+ taintedNodes := make (chan * apiv1.Node , len (nodesToTaint ))
207+ workqueue .ParallelizeUntil (default_context .Background (), maxConcurrentNodesTainting , len (nodesToTaint ), func (piece int ) {
208+ node := nodesToTaint [piece ]
209+ err := a .taintNode (node )
210+ if err != nil {
211+ failedTaintedNodes <- struct {
212+ node * apiv1.Node
213+ err error
214+ }{node : node , err : err }
215+ } else {
216+ taintedNodes <- node
217+ }
218+ })
219+ close (failedTaintedNodes )
220+ close (taintedNodes )
221+ if len (failedTaintedNodes ) > 0 {
222+ for nodeWithError := range failedTaintedNodes {
223+ a .ctx .Recorder .Eventf (nodeWithError .node , apiv1 .EventTypeWarning , "ScaleDownFailed" , "failed to mark the node as toBeDeleted/unschedulable: %v" , nodeWithError .err )
224+ }
225+ // Clean up already applied taints in case of issues.
226+ for taintedNode := range taintedNodes {
227+ _ , _ = taints .CleanToBeDeleted (taintedNode , a .ctx .ClientSet , a .ctx .CordonNodeBeforeTerminate )
207228 }
229+ if a .ctx .AutoscalingOptions .DynamicNodeDeleteDelayAfterTaintEnabled {
230+ close (updateLatencyTracker .AwaitOrStopChan )
231+ }
232+ return nodeDeleteDelayAfterTaint , errors .NewAutoscalerError (errors .ApiCallError , "couldn't taint %d nodes with ToBeDeleted" , len (failedTaintedNodes ))
208233 }
234+
209235 if a .ctx .AutoscalingOptions .DynamicNodeDeleteDelayAfterTaintEnabled {
210236 updateLatencyTracker .AwaitOrStopChan <- true
211237 latency , ok := <- updateLatencyTracker .ResultChan
0 commit comments