Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(controller): Delay pod GC deletion by a configurable amount (default 5s) #6168

Merged
merged 4 commits into from
Jun 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"math"
"path"
"time"

apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -106,12 +107,16 @@ type Config struct {
// PodSpecLogStrategy enables the logging of podspec on controller log.
PodSpecLogStrategy PodSpecLogStrategy `json:"podSpecLogStrategy,omitempty"`

// PodGCGracePeriodSeconds specifies the duration in seconds before the pods in the GC queue get deleted.
// Value must be non-negative integer. A zero value indicates that the pods will be deleted immediately
// as soon as they arrived in the pod GC queue.
// Defaults to 30 seconds.
// PodGCGracePeriodSeconds specifies the duration in seconds before a terminating pod is forcefully killed.
// Value must be non-negative integer. A zero value indicates that the pod will be forcefully terminated immediately.
// Defaults to the Kubernetes default of 30 seconds.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed this comment to signify what this option does, as the new option below handles the actual deletion delay.

PodGCGracePeriodSeconds *int64 `json:"podGCGracePeriodSeconds,omitempty"`

// PodGCDeleteDelayDuration specifies the duration in seconds before the pods in the GC queue get deleted.
// Value must be non-negative integer. A zero value indicates that the pods will be deleted immediately.
// Defaults to 5 seconds.
PodGCDeleteDelayDuration *metav1.Duration `json:"podGCDeleteDelayDuration,omitempty"`

// WorkflowRestrictions restricts the controller to executing Workflows that meet certain restrictions
WorkflowRestrictions *WorkflowRestrictions `json:"workflowRestrictions,omitempty"`

Expand Down Expand Up @@ -144,6 +149,14 @@ func (c Config) GetResourceRateLimit() ResourceRateLimit {
}
}

func (c Config) GetPodGCDeleteDelayDuration() time.Duration {
if c.PodGCDeleteDelayDuration == nil {
return 5 * time.Second
}

return c.PodGCDeleteDelayDuration.Duration
}

// PodSpecLogStrategy contains the configuration for logging the pod spec in controller log for debugging purpose
type PodSpecLogStrategy struct {
FailedPod bool `json:"failedPod,omitempty"`
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/pod_cleanup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ type PodCleanupSuite struct {
fixtures.E2ESuite
}

const enoughTimeForPodCleanup = 3 * time.Second
const enoughTimeForPodCleanup = 10 * time.Second

func (s *PodCleanupSuite) TestNone() {
s.Given().
Expand Down
3 changes: 2 additions & 1 deletion workflow/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,8 @@ func (wfc *WorkflowController) processNextItem(ctx context.Context) bool {
}
if doPodGC {
for podName := range woc.completedPods {
woc.controller.queuePodForCleanup(woc.wf.Namespace, podName, deletePod)
delay := woc.controller.Config.GetPodGCDeleteDelayDuration()
woc.controller.queuePodForCleanupAfter(woc.wf.Namespace, podName, deletePod, delay)
}
}
}
Expand Down
6 changes: 4 additions & 2 deletions workflow/controller/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -596,10 +596,12 @@ func (woc *wfOperationCtx) persistUpdates(ctx context.Context) {
switch woc.execWf.Spec.PodGC.Strategy {
case wfv1.PodGCOnPodSuccess:
if podPhase == apiv1.PodSucceeded {
woc.controller.queuePodForCleanup(woc.wf.Namespace, podName, deletePod)
delay := woc.controller.Config.GetPodGCDeleteDelayDuration()
woc.controller.queuePodForCleanupAfter(woc.wf.Namespace, podName, deletePod, delay)
}
case wfv1.PodGCOnPodCompletion:
woc.controller.queuePodForCleanup(woc.wf.Namespace, podName, deletePod)
delay := woc.controller.Config.GetPodGCDeleteDelayDuration()
woc.controller.queuePodForCleanupAfter(woc.wf.Namespace, podName, deletePod, delay)
}
} else {
// label pods which will not be deleted
Expand Down