diff --git a/config/config.go b/config/config.go index 2869e892b06b..9062b048c541 100644 --- a/config/config.go +++ b/config/config.go @@ -4,6 +4,7 @@ import ( "fmt" "math" "path" + "time" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -106,12 +107,16 @@ type Config struct { // PodSpecLogStrategy enables the logging of podspec on controller log. PodSpecLogStrategy PodSpecLogStrategy `json:"podSpecLogStrategy,omitempty"` - // PodGCGracePeriodSeconds specifies the duration in seconds before the pods in the GC queue get deleted. - // Value must be non-negative integer. A zero value indicates that the pods will be deleted immediately - // as soon as they arrived in the pod GC queue. - // Defaults to 30 seconds. + // PodGCGracePeriodSeconds specifies the duration in seconds before a terminating pod is forcefully killed. + // Value must be non-negative integer. A zero value indicates that the pod will be forcefully terminated immediately. + // Defaults to the Kubernetes default of 30 seconds. PodGCGracePeriodSeconds *int64 `json:"podGCGracePeriodSeconds,omitempty"` + // PodGCDeleteDelayDuration specifies the duration in seconds before the pods in the GC queue get deleted. + // Value must be non-negative integer. A zero value indicates that the pods will be deleted immediately. + // Defaults to 5 seconds. + PodGCDeleteDelayDuration *metav1.Duration `json:"podGCDeleteDelayDuration,omitempty"` + // WorkflowRestrictions restricts the controller to executing Workflows that meet certain restrictions WorkflowRestrictions *WorkflowRestrictions `json:"workflowRestrictions,omitempty"` @@ -144,6 +149,14 @@ func (c Config) GetResourceRateLimit() ResourceRateLimit { } } +func (c Config) GetPodGCDeleteDelayDuration() time.Duration { + if c.PodGCDeleteDelayDuration == nil { + return 5 * time.Second + } + + return c.PodGCDeleteDelayDuration.Duration +} + // PodSpecLogStrategy contains the configuration for logging the pod spec in controller log for debugging purpose type PodSpecLogStrategy struct { FailedPod bool `json:"failedPod,omitempty"` diff --git a/test/e2e/pod_cleanup_test.go b/test/e2e/pod_cleanup_test.go index 9e9c957e6005..07f674dcb162 100644 --- a/test/e2e/pod_cleanup_test.go +++ b/test/e2e/pod_cleanup_test.go @@ -21,7 +21,7 @@ type PodCleanupSuite struct { fixtures.E2ESuite } -const enoughTimeForPodCleanup = 3 * time.Second +const enoughTimeForPodCleanup = 10 * time.Second func (s *PodCleanupSuite) TestNone() { s.Given(). diff --git a/workflow/controller/controller.go b/workflow/controller/controller.go index 7a6c9e4278f1..081870aa5848 100644 --- a/workflow/controller/controller.go +++ b/workflow/controller/controller.go @@ -768,7 +768,8 @@ func (wfc *WorkflowController) processNextItem(ctx context.Context) bool { } if doPodGC { for podName := range woc.completedPods { - woc.controller.queuePodForCleanup(woc.wf.Namespace, podName, deletePod) + delay := woc.controller.Config.GetPodGCDeleteDelayDuration() + woc.controller.queuePodForCleanupAfter(woc.wf.Namespace, podName, deletePod, delay) } } } diff --git a/workflow/controller/operator.go b/workflow/controller/operator.go index 43dde9732dfe..6f0e6eb0cf11 100644 --- a/workflow/controller/operator.go +++ b/workflow/controller/operator.go @@ -596,10 +596,12 @@ func (woc *wfOperationCtx) persistUpdates(ctx context.Context) { switch woc.execWf.Spec.PodGC.Strategy { case wfv1.PodGCOnPodSuccess: if podPhase == apiv1.PodSucceeded { - woc.controller.queuePodForCleanup(woc.wf.Namespace, podName, deletePod) + delay := woc.controller.Config.GetPodGCDeleteDelayDuration() + woc.controller.queuePodForCleanupAfter(woc.wf.Namespace, podName, deletePod, delay) } case wfv1.PodGCOnPodCompletion: - woc.controller.queuePodForCleanup(woc.wf.Namespace, podName, deletePod) + delay := woc.controller.Config.GetPodGCDeleteDelayDuration() + woc.controller.queuePodForCleanupAfter(woc.wf.Namespace, podName, deletePod, delay) } } else { // label pods which will not be deleted