-
Notifications
You must be signed in to change notification settings - Fork 217
Detect and shut down stuck server pods #2027
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
159 changes: 159 additions & 0 deletions
159
operator/src/main/java/oracle/kubernetes/operator/StuckPodProcessing.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
// Copyright (c) 2020, Oracle Corporation and/or its affiliates. | ||
// Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. | ||
|
||
package oracle.kubernetes.operator; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Collection; | ||
import java.util.List; | ||
import java.util.Objects; | ||
import java.util.Optional; | ||
import javax.annotation.Nonnull; | ||
|
||
import io.kubernetes.client.openapi.models.V1ObjectMeta; | ||
import io.kubernetes.client.openapi.models.V1Pod; | ||
import io.kubernetes.client.openapi.models.V1PodList; | ||
import oracle.kubernetes.operator.calls.CallResponse; | ||
import oracle.kubernetes.operator.helpers.CallBuilder; | ||
import oracle.kubernetes.operator.helpers.PodHelper; | ||
import oracle.kubernetes.operator.logging.LoggingFacade; | ||
import oracle.kubernetes.operator.logging.LoggingFactory; | ||
import oracle.kubernetes.operator.steps.DefaultResponseStep; | ||
import oracle.kubernetes.operator.work.NextAction; | ||
import oracle.kubernetes.operator.work.Packet; | ||
import oracle.kubernetes.operator.work.Step; | ||
import oracle.kubernetes.utils.SystemClock; | ||
import org.joda.time.DateTime; | ||
|
||
import static oracle.kubernetes.operator.logging.MessageKeys.POD_FORCE_DELETED; | ||
|
||
/** | ||
* Under certain circumstances, when a Kubernetes node goes down, it may mark its pods as terminating, but never | ||
* actually remove them. This code detects such cases, deletes the pods and triggers the necessary make-right flows. | ||
*/ | ||
public class StuckPodProcessing { | ||
private static final LoggingFacade LOGGER = LoggingFactory.getLogger("Operator", "Operator"); | ||
|
||
private final MainDelegate mainDelegate; | ||
|
||
public StuckPodProcessing(MainDelegate mainDelegate) { | ||
this.mainDelegate = mainDelegate; | ||
} | ||
|
||
void checkStuckPods(String namespace) { | ||
Step step = new CallBuilder() | ||
.withLabelSelectors(LabelConstants.getCreatedbyOperatorSelector()) | ||
.listPodAsync(namespace, new PodListProcessing(namespace, SystemClock.now())); | ||
mainDelegate.runSteps(step); | ||
} | ||
|
||
@SuppressWarnings("unchecked") | ||
private List<V1Pod> getStuckPodList(Packet packet) { | ||
return (List<V1Pod>) packet.computeIfAbsent("STUCK_PODS", k -> new ArrayList<>()); | ||
} | ||
|
||
class PodListProcessing extends DefaultResponseStep<V1PodList> { | ||
|
||
private final DateTime now; | ||
|
||
public PodListProcessing(String namespace, DateTime dateTime) { | ||
super(new PodActionsStep(namespace)); | ||
now = dateTime; | ||
} | ||
|
||
@Override | ||
public NextAction onSuccess(Packet packet, CallResponse<V1PodList> callResponse) { | ||
callResponse.getResult().getItems().stream() | ||
.filter(pod -> isStuck(pod, now)) | ||
.forEach(pod -> addStuckPodToPacket(packet, pod)); | ||
|
||
return doContinueListOrNext(callResponse, packet); | ||
} | ||
|
||
private boolean isStuck(V1Pod pod, DateTime now) { | ||
return getExpectedDeleteTime(pod).isBefore(now); | ||
} | ||
|
||
private DateTime getExpectedDeleteTime(V1Pod pod) { | ||
return getDeletionTimeStamp(pod).plusSeconds((int) getDeletionGracePeriodSeconds(pod)); | ||
} | ||
|
||
private long getDeletionGracePeriodSeconds(V1Pod pod) { | ||
return Optional.of(pod).map(V1Pod::getMetadata).map(V1ObjectMeta::getDeletionGracePeriodSeconds).orElse(1L); | ||
rjeberhard marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
private DateTime getDeletionTimeStamp(V1Pod pod) { | ||
return Optional.of(pod).map(V1Pod::getMetadata).map(V1ObjectMeta::getDeletionTimestamp).orElse(SystemClock.now()); | ||
} | ||
|
||
private void addStuckPodToPacket(Packet packet, V1Pod stuckPod) { | ||
getStuckPodList(packet).add(stuckPod); | ||
} | ||
} | ||
|
||
class PodActionsStep extends Step { | ||
|
||
private final String namespace; | ||
|
||
public PodActionsStep(String namespace) { | ||
this.namespace = namespace; | ||
} | ||
|
||
@Override | ||
public NextAction apply(Packet packet) { | ||
final List<V1Pod> stuckPodList = getStuckPodList(packet); | ||
if (stuckPodList.isEmpty()) { | ||
return doNext(packet); | ||
} else { | ||
Collection<StepAndPacket> startDetails = new ArrayList<>(); | ||
|
||
for (V1Pod pod : stuckPodList) { | ||
startDetails.add(new StepAndPacket(createForcedDeletePodStep(pod), packet.clone())); | ||
} | ||
return doForkJoin(readExistingNamespaces(), packet, startDetails); | ||
rjeberhard marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
|
||
@Nonnull | ||
private Step readExistingNamespaces() { | ||
return mainDelegate.getDomainNamespaces().readExistingResources(namespace, mainDelegate.getDomainProcessor()); | ||
} | ||
|
||
private Step createForcedDeletePodStep(V1Pod pod) { | ||
return new CallBuilder() | ||
.withGracePeriodSeconds(0) | ||
.deletePodAsync(getName(pod), getNamespace(pod), getDomainUid(pod), null, | ||
new ForcedDeleteResponseStep(getName(pod), getNamespace(pod))); | ||
} | ||
|
||
private String getName(V1Pod pod) { | ||
return Objects.requireNonNull(pod.getMetadata()).getName(); | ||
} | ||
|
||
private String getNamespace(V1Pod pod) { | ||
return Objects.requireNonNull(pod.getMetadata()).getNamespace(); | ||
} | ||
|
||
private String getDomainUid(V1Pod pod) { | ||
return PodHelper.getPodDomainUid(pod); | ||
} | ||
} | ||
|
||
static class ForcedDeleteResponseStep extends DefaultResponseStep<V1Pod> { | ||
|
||
private final String name; | ||
private final String namespace; | ||
|
||
public ForcedDeleteResponseStep(String name, String namespace) { | ||
this.name = name; | ||
this.namespace = namespace; | ||
} | ||
|
||
@Override | ||
public NextAction onSuccess(Packet packet, CallResponse<V1Pod> callResponse) { | ||
LOGGER.info(POD_FORCE_DELETED, name, namespace); | ||
return super.onSuccess(packet, callResponse); | ||
} | ||
} | ||
|
||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That seems a reasonable precaution - how much drift should we be concerned about?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tough question. Off the top my head, maybe account for a few seconds of drift -- I've seen much worse on occasion, but, in general, WL clusters themselves dislike a drift any higher than that in my experience. Plus perhaps account for a few extra seconds to give k8s time to handle the delete 'naturally'. So that would total 10 seconds? And perhaps have an associated configurable?
@rjeberhard Thoughts?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I keep any accounting for drift very small; perhaps just 1 or 2 seconds. It's subjective, but when I tested the solution it still "felt" like I waited a long time before the terminated pod was removed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, what's the harm? If we delete a pod that is going to be deleted anyway, it should still be fine, right?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's my understanding this change initiates a forced delete - so in-memory-replicated-state data loss is possible if the forced delete occurs too early for the pod to perform its graceful shutdown
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The harm is that instead of catching only a pod that is "stuck" in the Terminating state that we could also detect pods that are in the process of shutting down normally and delete them before they've had a chance to complete normal shut down. This should only happen if the operator's pod is badly skewed from the master and the operator thinks the time is later than the master does.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wish that we could find a way to know that time at the master, but I've not been able to find it.