make MaxKubernetesEmptyNodeDeletionTime not configurable - just as up…

…stream
kubernetes · Feb 27, 2024 · dabc007 · dabc007
1 parent bfb24ba
commit dabc007
Show file tree

Hide file tree

Showing 6 changed files with 71 additions and 72 deletions.
diff --git a/cluster-autoscaler/config/autoscaling_options.go b/cluster-autoscaler/config/autoscaling_options.go
@@ -134,8 +134,6 @@ type AutoscalingOptions struct {
 	DrainPriorityConfig []kubelet_config.ShutdownGracePeriodByPodPriority
 	// MaxCloudProviderNodeDeletionTime is the maximum time needed by cloud provider to delete a node
 	MaxCloudProviderNodeDeletionTime time.Duration
-	// MaxKubernetesEmptyNodeDeletionTime is the maximum time needed by Kubernetes to delete an empty node
-	MaxKubernetesEmptyNodeDeletionTime time.Duration
 	//  Maximum time CA waits for node to be provisioned
 	//  MaxNodeProvisionTime defines maximum time CA waits for node to be provisioned
 	MaxNodeProvisionTime time.Duration

diff --git a/cluster-autoscaler/core/scaledown/actuation/delete_in_batch.go b/cluster-autoscaler/core/scaledown/actuation/delete_in_batch.go
@@ -37,6 +37,11 @@ import (
 	"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
 )
 
+const (
+	// MaxKubernetesEmptyNodeDeletionTime is the maximum time needed by Kubernetes to delete an empty node.
+	MaxKubernetesEmptyNodeDeletionTime = 3 * time.Minute
+)
+
 // NodeDeletionBatcher batch scale down candidates for one node group and remove them.
 type NodeDeletionBatcher struct {
 	sync.Mutex
@@ -168,7 +173,7 @@ func nodeScaleDownReason(node *apiv1.Node, drain bool) metrics.NodeScaleDownReas
 // IsNodeBeingDeleted returns true iff a given node is being deleted.
 func IsNodeBeingDeleted(ac *context.AutoscalingContext, node *apiv1.Node, timestamp time.Time) bool {
 	deleteTime, _ := taints.GetToBeDeletedTime(node)
-	return deleteTime != nil && (timestamp.Sub(*deleteTime) < ac.MaxCloudProviderNodeDeletionTime || timestamp.Sub(*deleteTime) < ac.MaxKubernetesEmptyNodeDeletionTime)
+	return deleteTime != nil && (timestamp.Sub(*deleteTime) < ac.MaxCloudProviderNodeDeletionTime || timestamp.Sub(*deleteTime) < MaxKubernetesEmptyNodeDeletionTime)
 }
 
 // CleanUpAndRecordFailedScaleDownEvent record failed scale down event and log an error.

diff --git a/cluster-autoscaler/core/scaledown/eligibility/eligibility_test.go b/cluster-autoscaler/core/scaledown/eligibility/eligibility_test.go
@@ -164,8 +164,7 @@ func TestFilterOutUnremovable(t *testing.T) {
 					ScaleDownUnreadyTime:             config.DefaultScaleDownUnreadyTime,
 					IgnoreDaemonSetsUtilization:      tc.ignoreDaemonSetsUtilization,
 				},
-				MaxCloudProviderNodeDeletionTime:   5 * time.Minute,
-				MaxKubernetesEmptyNodeDeletionTime: 3 * time.Minute,
+				MaxCloudProviderNodeDeletionTime: 5 * time.Minute,
 			}
 			s := nodegroupconfig.NewDefaultNodeGroupConfigProcessor(options.NodeGroupDefaults)
 			c := NewChecker(s)

diff --git a/cluster-autoscaler/core/scaledown/legacy/legacy_test.go b/cluster-autoscaler/core/scaledown/legacy/legacy_test.go
@@ -143,9 +143,8 @@ func TestFindUnneededNodes(t *testing.T) {
 		NodeGroupDefaults: config.NodeGroupAutoscalingOptions{
 			ScaleDownUtilizationThreshold: 0.35,
 		},
-		UnremovableNodeRecheckTimeout:      5 * time.Minute,
-		MaxCloudProviderNodeDeletionTime:   5 * time.Minute,
-		MaxKubernetesEmptyNodeDeletionTime: 3 * time.Minute,
+		UnremovableNodeRecheckTimeout:    5 * time.Minute,
+		MaxCloudProviderNodeDeletionTime: 5 * time.Minute,
 	}
 	context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, registry, provider, nil, nil)
 	assert.NoError(t, err)

diff --git a/cluster-autoscaler/core/scaledown/resource/limits_test.go b/cluster-autoscaler/core/scaledown/resource/limits_test.go
@@ -18,10 +18,11 @@ package resource
 
 import (
 	"fmt"
-	"k8s.io/autoscaler/cluster-autoscaler/config"
 	"testing"
 	"time"
 
+	"k8s.io/autoscaler/cluster-autoscaler/config"
+
 	. "k8s.io/autoscaler/cluster-autoscaler/core/test"
 	"k8s.io/autoscaler/cluster-autoscaler/core/utils"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/taints"
@@ -57,8 +58,7 @@ func TestCalculateCoresAndMemoryTotal(t *testing.T) {
 	}
 
 	options := config.AutoscalingOptions{
-		MaxCloudProviderNodeDeletionTime:   5 * time.Minute,
-		MaxKubernetesEmptyNodeDeletionTime: 3 * time.Minute,
+		MaxCloudProviderNodeDeletionTime: 5 * time.Minute,
 	}
 	context, err := NewScaleTestAutoscalingContext(options, nil, nil, nil, nil, nil)
 	assert.NoError(t, err)

diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go
@@ -160,15 +160,14 @@ var (
 	maxEmptyBulkDeleteFlag     = flag.Int("max-empty-bulk-delete", 10, "Maximum number of empty nodes that can be deleted at the same time.")
 	maxGracefulTerminationFlag = flag.Int("max-graceful-termination-sec", 10*60, "Maximum number of seconds CA waits for pod termination when trying to scale down a node. "+
 		"This flag is mutually exclusion with drain-priority-config flag which allows more configuration options.")
-	maxTotalUnreadyPercentage          = flag.Float64("max-total-unready-percentage", 45, "Maximum percentage of unready nodes in the cluster.  After this is exceeded, CA halts operations")
-	okTotalUnreadyCount                = flag.Int("ok-total-unready-count", 3, "Number of allowed unready nodes, irrespective of max-total-unready-percentage")
-	scaleUpFromZero                    = flag.Bool("scale-up-from-zero", true, "Should CA scale up when there are 0 ready nodes.")
-	parallelScaleUp                    = flag.Bool("parallel-scale-up", false, "Whether to allow parallel node groups scale up. Experimental: may not work on some cloud providers, enable at your own risk.")
-	maxCloudProviderNodeDeletionTime   = flag.Duration("max-cloud-provider-node-deletion-time", 5*time.Minute, "Maximum time needed by cloud provider to delete a node")
-	maxKubernetesEmptyNodeDeletionTime = flag.Duration("max-kubernetes-empty-node-deletion-time", 3*time.Minute, "Maximum time needed by cloud provider to delete a node")
-	maxNodeProvisionTime               = flag.Duration("max-node-provision-time", 15*time.Minute, "The default maximum time CA waits for node to be provisioned - the value can be overridden per node group")
-	maxPodEvictionTime                 = flag.Duration("max-pod-eviction-time", 2*time.Minute, "Maximum time CA tries to evict a pod before giving up")
-	nodeGroupsFlag                     = multiStringFlag(
+	maxTotalUnreadyPercentage        = flag.Float64("max-total-unready-percentage", 45, "Maximum percentage of unready nodes in the cluster.  After this is exceeded, CA halts operations")
+	okTotalUnreadyCount              = flag.Int("ok-total-unready-count", 3, "Number of allowed unready nodes, irrespective of max-total-unready-percentage")
+	scaleUpFromZero                  = flag.Bool("scale-up-from-zero", true, "Should CA scale up when there are 0 ready nodes.")
+	parallelScaleUp                  = flag.Bool("parallel-scale-up", false, "Whether to allow parallel node groups scale up. Experimental: may not work on some cloud providers, enable at your own risk.")
+	maxCloudProviderNodeDeletionTime = flag.Duration("max-cloud-provider-node-deletion-time", 5*time.Minute, "Maximum time needed by cloud provider to delete a node")
+	maxNodeProvisionTime             = flag.Duration("max-node-provision-time", 15*time.Minute, "The default maximum time CA waits for node to be provisioned - the value can be overridden per node group")
+	maxPodEvictionTime               = flag.Duration("max-pod-eviction-time", 2*time.Minute, "Maximum time CA tries to evict a pod before giving up")
+	nodeGroupsFlag                   = multiStringFlag(
 		"nodes",
 		"sets min,max size and other configuration data for a node group in a format accepted by cloud provider. Can be used multiple times. Format: <min>:<max>:<other...>")
 	nodeGroupAutoDiscoveryFlag = multiStringFlag(
@@ -342,58 +341,57 @@ func createAutoscalingOptions() config.AutoscalingOptions {
 			IgnoreDaemonSetsUtilization:      *ignoreDaemonSetsUtilization,
 			MaxNodeProvisionTime:             *maxNodeProvisionTime,
 		},
-		CloudConfig:                        *cloudConfig,
-		CloudProviderName:                  *cloudProviderFlag,
-		NodeGroupAutoDiscovery:             *nodeGroupAutoDiscoveryFlag,
-		MaxTotalUnreadyPercentage:          *maxTotalUnreadyPercentage,
-		OkTotalUnreadyCount:                *okTotalUnreadyCount,
-		ScaleUpFromZero:                    *scaleUpFromZero,
-		ParallelScaleUp:                    *parallelScaleUp,
-		EstimatorName:                      *estimatorFlag,
-		ExpanderNames:                      *expanderFlag,
-		GRPCExpanderCert:                   *grpcExpanderCert,
-		GRPCExpanderURL:                    *grpcExpanderURL,
-		IgnoreMirrorPodsUtilization:        *ignoreMirrorPodsUtilization,
-		MaxBulkSoftTaintCount:              *maxBulkSoftTaintCount,
-		MaxBulkSoftTaintTime:               *maxBulkSoftTaintTime,
-		MaxEmptyBulkDelete:                 *maxEmptyBulkDeleteFlag,
-		MaxCloudProviderNodeDeletionTime:   *maxCloudProviderNodeDeletionTime,
-		MaxKubernetesEmptyNodeDeletionTime: *maxKubernetesEmptyNodeDeletionTime,
-		MaxGracefulTerminationSec:          *maxGracefulTerminationFlag,
-		MaxPodEvictionTime:                 *maxPodEvictionTime,
-		MaxNodesTotal:                      *maxNodesTotal,
-		MaxCoresTotal:                      maxCoresTotal,
-		MinCoresTotal:                      minCoresTotal,
-		MaxMemoryTotal:                     maxMemoryTotal,
-		MinMemoryTotal:                     minMemoryTotal,
-		GpuTotal:                           parsedGpuTotal,
-		NodeGroups:                         *nodeGroupsFlag,
-		EnforceNodeGroupMinSize:            *enforceNodeGroupMinSize,
-		ScaleDownDelayAfterAdd:             *scaleDownDelayAfterAdd,
-		ScaleDownDelayAfterDelete:          *scaleDownDelayAfterDelete,
-		ScaleDownDelayAfterFailure:         *scaleDownDelayAfterFailure,
-		ScaleDownEnabled:                   *scaleDownEnabled,
-		ScaleDownUnreadyEnabled:            *scaleDownUnreadyEnabled,
-		ScaleDownNonEmptyCandidatesCount:   *scaleDownNonEmptyCandidatesCount,
-		ScaleDownCandidatesPoolRatio:       *scaleDownCandidatesPoolRatio,
-		ScaleDownCandidatesPoolMinCount:    *scaleDownCandidatesPoolMinCount,
-		DrainPriorityConfig:                drainPriorityConfigMap,
-		SchedulerConfig:                    parsedSchedConfig,
-		WriteStatusConfigMap:               *writeStatusConfigMapFlag,
-		StatusConfigMapName:                *statusConfigMapName,
-		BalanceSimilarNodeGroups:           *balanceSimilarNodeGroupsFlag,
-		ConfigNamespace:                    *namespace,
-		ClusterName:                        *clusterName,
-		NodeAutoprovisioningEnabled:        *nodeAutoprovisioningEnabled,
-		MaxAutoprovisionedNodeGroupCount:   *maxAutoprovisionedNodeGroupCount,
-		UnremovableNodeRecheckTimeout:      *unremovableNodeRecheckTimeout,
-		ExpendablePodsPriorityCutoff:       *expendablePodsPriorityCutoff,
-		Regional:                           *regional,
-		NewPodScaleUpDelay:                 *newPodScaleUpDelay,
-		StartupTaints:                      append(*ignoreTaintsFlag, *startupTaintsFlag...),
-		StatusTaints:                       *statusTaintsFlag,
-		BalancingExtraIgnoredLabels:        *balancingIgnoreLabelsFlag,
-		BalancingLabels:                    *balancingLabelsFlag,
+		CloudConfig:                      *cloudConfig,
+		CloudProviderName:                *cloudProviderFlag,
+		NodeGroupAutoDiscovery:           *nodeGroupAutoDiscoveryFlag,
+		MaxTotalUnreadyPercentage:        *maxTotalUnreadyPercentage,
+		OkTotalUnreadyCount:              *okTotalUnreadyCount,
+		ScaleUpFromZero:                  *scaleUpFromZero,
+		ParallelScaleUp:                  *parallelScaleUp,
+		EstimatorName:                    *estimatorFlag,
+		ExpanderNames:                    *expanderFlag,
+		GRPCExpanderCert:                 *grpcExpanderCert,
+		GRPCExpanderURL:                  *grpcExpanderURL,
+		IgnoreMirrorPodsUtilization:      *ignoreMirrorPodsUtilization,
+		MaxBulkSoftTaintCount:            *maxBulkSoftTaintCount,
+		MaxBulkSoftTaintTime:             *maxBulkSoftTaintTime,
+		MaxEmptyBulkDelete:               *maxEmptyBulkDeleteFlag,
+		MaxCloudProviderNodeDeletionTime: *maxCloudProviderNodeDeletionTime,
+		MaxGracefulTerminationSec:        *maxGracefulTerminationFlag,
+		MaxPodEvictionTime:               *maxPodEvictionTime,
+		MaxNodesTotal:                    *maxNodesTotal,
+		MaxCoresTotal:                    maxCoresTotal,
+		MinCoresTotal:                    minCoresTotal,
+		MaxMemoryTotal:                   maxMemoryTotal,
+		MinMemoryTotal:                   minMemoryTotal,
+		GpuTotal:                         parsedGpuTotal,
+		NodeGroups:                       *nodeGroupsFlag,
+		EnforceNodeGroupMinSize:          *enforceNodeGroupMinSize,
+		ScaleDownDelayAfterAdd:           *scaleDownDelayAfterAdd,
+		ScaleDownDelayAfterDelete:        *scaleDownDelayAfterDelete,
+		ScaleDownDelayAfterFailure:       *scaleDownDelayAfterFailure,
+		ScaleDownEnabled:                 *scaleDownEnabled,
+		ScaleDownUnreadyEnabled:          *scaleDownUnreadyEnabled,
+		ScaleDownNonEmptyCandidatesCount: *scaleDownNonEmptyCandidatesCount,
+		ScaleDownCandidatesPoolRatio:     *scaleDownCandidatesPoolRatio,
+		ScaleDownCandidatesPoolMinCount:  *scaleDownCandidatesPoolMinCount,
+		DrainPriorityConfig:              drainPriorityConfigMap,
+		SchedulerConfig:                  parsedSchedConfig,
+		WriteStatusConfigMap:             *writeStatusConfigMapFlag,
+		StatusConfigMapName:              *statusConfigMapName,
+		BalanceSimilarNodeGroups:         *balanceSimilarNodeGroupsFlag,
+		ConfigNamespace:                  *namespace,
+		ClusterName:                      *clusterName,
+		NodeAutoprovisioningEnabled:      *nodeAutoprovisioningEnabled,
+		MaxAutoprovisionedNodeGroupCount: *maxAutoprovisionedNodeGroupCount,
+		UnremovableNodeRecheckTimeout:    *unremovableNodeRecheckTimeout,
+		ExpendablePodsPriorityCutoff:     *expendablePodsPriorityCutoff,
+		Regional:                         *regional,
+		NewPodScaleUpDelay:               *newPodScaleUpDelay,
+		StartupTaints:                    append(*ignoreTaintsFlag, *startupTaintsFlag...),
+		StatusTaints:                     *statusTaintsFlag,
+		BalancingExtraIgnoredLabels:      *balancingIgnoreLabelsFlag,
+		BalancingLabels:                  *balancingLabelsFlag,
 		KubeClientOpts: config.KubeClientOptions{
 			Master:         *kubernetes,
 			KubeConfigPath: *kubeConfigFile,