Skip to content

Commit bc0f813

Browse files
authored
Backport fix for OWLS-96080 to 3.4 branch (#3109)
* backport changes from PR3066 to 3.4 branch * backport changes to FiberGate from PR2613 to 3.4 branch * backport doAfterCall support from main * backport unit test from PR3066 from main
1 parent 93a7b5b commit bc0f813

File tree

6 files changed

+157
-19
lines changed

6 files changed

+157
-19
lines changed

operator/src/main/java/oracle/kubernetes/operator/JobWatcher.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -408,14 +408,15 @@ public NextAction onSuccess(Packet packet, CallResponse<V1Job> callResponse) {
408408
}
409409
}
410410

411-
static class DeadlineExceededException extends Exception {
411+
public static class DeadlineExceededException extends Exception {
412412
final V1Job job;
413413

414-
DeadlineExceededException(V1Job job) {
414+
public DeadlineExceededException(V1Job job) {
415415
super();
416416
this.job = job;
417417
}
418418

419+
@Override
419420
public String toString() {
420421
return LOGGER.formatMessage(
421422
MessageKeys.JOB_DEADLINE_EXCEEDED_MESSAGE,

operator/src/main/java/oracle/kubernetes/operator/helpers/JobHelper.java

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
import static oracle.kubernetes.operator.LabelConstants.INTROSPECTION_DOMAIN_SPEC_GENERATION;
5959
import static oracle.kubernetes.operator.LabelConstants.INTROSPECTION_STATE_LABEL;
6060
import static oracle.kubernetes.operator.ProcessingConstants.COMPATIBILITY_MODE;
61+
import static oracle.kubernetes.operator.ProcessingConstants.DOMAIN_INTROSPECTOR_JOB;
6162
import static oracle.kubernetes.operator.ProcessingConstants.DOMAIN_INTROSPECT_REQUESTED;
6263
import static oracle.kubernetes.operator.ProcessingConstants.JOB_POD_FLUENTD_CONTAINER_TERMINATED;
6364
import static oracle.kubernetes.operator.ProcessingConstants.JOB_POD_INTROSPECT_CONTAINER_TERMINATED;
@@ -368,8 +369,8 @@ static class ReplaceOrCreateStep extends DefaultResponseStep {
368369
public NextAction onSuccess(Packet packet, CallResponse callResponse) {
369370
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class);
370371
V1Job job = (V1Job) callResponse.getResult();
371-
if ((job != null) && (packet.get(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB) == null)) {
372-
packet.put(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB, job);
372+
if ((job != null) && (packet.get(DOMAIN_INTROSPECTOR_JOB) == null)) {
373+
packet.put(DOMAIN_INTROSPECTOR_JOB, job);
373374
}
374375
return doNext(getIntrospectorPodStatus(info.getDomainUid(), info.getNamespace(), getNext()), packet);
375376
}
@@ -443,7 +444,7 @@ public NextAction onSuccess(Packet packet, CallResponse<V1PodList> callResponse)
443444
.orElseGet(Collections::emptyList)
444445
.forEach(pod -> recordJobPodNameAndStatus(packet, pod));
445446

446-
V1Job job = (V1Job) packet.get(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB);
447+
V1Job job = (V1Job) packet.get(DOMAIN_INTROSPECTOR_JOB);
447448
OffsetDateTime startTime = createNextSteps(nextSteps, packet, job, getNext());
448449
packet.putIfAbsent(START_TIME, startTime);
449450
return doContinueListOrNext(callResponse, packet, nextSteps.get(0));
@@ -571,7 +572,7 @@ public NextAction onSuccess(Packet packet, CallResponse<String> callResponse) {
571572
}
572573

573574
V1Job domainIntrospectorJob =
574-
(V1Job) packet.get(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB);
575+
(V1Job) packet.get(DOMAIN_INTROSPECTOR_JOB);
575576
boolean jobPodContainerTerminated = JOB_POD_INTROSPECT_CONTAINER_TERMINATED_MARKER
576577
.equals(packet.get(JOB_POD_INTROSPECT_CONTAINER_TERMINATED));
577578

@@ -790,7 +791,7 @@ public static Step deleteDomainIntrospectorJobStep(Step next) {
790791

791792
static void logJobDeleted(String domainUid, String namespace, String jobName, Packet packet) {
792793
V1Job domainIntrospectorJob =
793-
(V1Job) packet.remove(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB);
794+
(V1Job) packet.remove(DOMAIN_INTROSPECTOR_JOB);
794795

795796
packet.remove(ProcessingConstants.INTROSPECTOR_JOB_FAILURE_LOGGED);
796797
if (domainIntrospectorJob != null
@@ -908,6 +909,9 @@ public NextAction onSuccess(Packet packet, CallResponse<V1PodList> callResponse)
908909
.orElse(null);
909910

910911
if (jobPod != null) {
912+
if (isJobPodTimedOut(jobPod)) {
913+
return onFailureNoRetry(packet, callResponse);
914+
}
911915
addContainerTerminatedMarkerToPacket(jobPod, getJobName(), packet);
912916
recordJobPodName(packet, getName(jobPod));
913917
}
@@ -922,6 +926,20 @@ private String getName(V1Pod pod) {
922926
return Optional.of(pod).map(V1Pod::getMetadata).map(V1ObjectMeta::getName).orElse("");
923927
}
924928

929+
private boolean isJobPodTimedOut(V1Pod jobPod) {
930+
return "DeadlineExceeded".equals(getJobPodStatusReason(jobPod));
931+
}
932+
933+
private String getJobPodStatusReason(V1Pod jobPod) {
934+
return Optional.ofNullable(jobPod.getStatus()).map(V1PodStatus::getReason).orElse(null);
935+
}
936+
937+
@Override
938+
protected Throwable createTerminationException(Packet packet,
939+
CallResponse<V1PodList> callResponse) {
940+
return new JobWatcher.DeadlineExceededException((V1Job) packet.get(DOMAIN_INTROSPECTOR_JOB));
941+
}
942+
925943
private void recordJobPodName(Packet packet, String podName) {
926944
packet.put(ProcessingConstants.JOB_POD_NAME, podName);
927945
}

operator/src/main/java/oracle/kubernetes/operator/helpers/ResponseStep.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,18 @@ public NextAction onFailure(Step conflictStep, Packet packet, CallResponse<T> ca
192192
}
193193

194194
protected NextAction onFailureNoRetry(Packet packet, CallResponse<T> callResponse) {
195-
return doTerminate(UnrecoverableErrorBuilder.createExceptionFromFailedCall(callResponse), packet);
195+
return doTerminate(createTerminationException(packet, callResponse), packet);
196+
}
197+
198+
/**
199+
* Create an exception to be passed to the doTerminate call.
200+
*
201+
* @param packet Packet for creating the exception
202+
* @param callResponse CallResponse for creating the exception
203+
* @return An Exception to be passed to the doTerminate call
204+
*/
205+
protected Throwable createTerminationException(Packet packet, CallResponse<T> callResponse) {
206+
return UnrecoverableErrorBuilder.createExceptionFromFailedCall(callResponse);
196207
}
197208

198209
protected boolean isNotAuthorizedOrForbidden(CallResponse<T> callResponse) {

operator/src/main/java/oracle/kubernetes/operator/work/FiberGate.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,14 +109,20 @@ public synchronized Fiber startFiberIfLastFiberMatches(
109109
new CompletionCallback() {
110110
@Override
111111
public void onCompletion(Packet packet) {
112-
gateMap.remove(key, f);
113-
callback.onCompletion(packet);
112+
try {
113+
callback.onCompletion(packet);
114+
} finally {
115+
gateMap.remove(key, f);
116+
}
114117
}
115118

116119
@Override
117120
public void onThrowable(Packet packet, Throwable throwable) {
118-
gateMap.remove(key, f);
119-
callback.onThrowable(packet, throwable);
121+
try {
122+
callback.onThrowable(packet, throwable);
123+
} finally {
124+
gateMap.remove(key, f);
125+
}
120126
}
121127
});
122128
return f;

operator/src/test/java/oracle/kubernetes/operator/helpers/DomainIntrospectorJobTest.java

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
import java.util.ArrayList;
77
import java.util.Arrays;
88
import java.util.Collections;
9+
import java.util.HashMap;
910
import java.util.List;
11+
import java.util.Map;
1012
import java.util.Optional;
1113
import java.util.logging.Level;
1214
import java.util.logging.LogRecord;
@@ -23,12 +25,16 @@
2325
import io.kubernetes.client.openapi.models.V1JobCondition;
2426
import io.kubernetes.client.openapi.models.V1JobStatus;
2527
import io.kubernetes.client.openapi.models.V1ObjectMeta;
28+
import io.kubernetes.client.openapi.models.V1Pod;
2629
import io.kubernetes.client.openapi.models.V1PodSpec;
30+
import io.kubernetes.client.openapi.models.V1PodStatus;
2731
import io.kubernetes.client.openapi.models.V1ResourceRequirements;
2832
import io.kubernetes.client.openapi.models.V1SecretReference;
2933
import io.kubernetes.client.openapi.models.V1Volume;
3034
import io.kubernetes.client.openapi.models.V1VolumeMount;
3135
import oracle.kubernetes.operator.JobAwaiterStepFactory;
36+
import oracle.kubernetes.operator.JobWatcher;
37+
import oracle.kubernetes.operator.LabelConstants;
3238
import oracle.kubernetes.operator.ProcessingConstants;
3339
import oracle.kubernetes.operator.TuningParameters;
3440
import oracle.kubernetes.operator.calls.unprocessable.UnrecoverableErrorBuilderImpl;
@@ -127,6 +133,7 @@ class DomainIntrospectorJobTest {
127133
private static final String FATAL_MESSAGE_1 = "@[SEVERE] " + FATAL_PROBLEM_1;
128134
public static final String TEST_VOLUME_NAME = "test";
129135
public static final String JOB_UID = "some-unique-id";
136+
private static final String JOB_NAME = UID + "-introspector";
130137
public static final String INFO_MESSAGE_1 = "informational message";
131138
private static final String INFO_MESSAGE = "@[INFO] just letting you know";
132139

@@ -878,4 +885,60 @@ private Cluster getCluster(String clusterName) {
878885
private String getDomainHome() {
879886
return "/shared/domains/" + UID;
880887
}
888+
889+
@Test
890+
void whenPreviousFailedJobWithDeadlineExceeded_terminateWithException() {
891+
ignoreIntrospectorFailureLogs();
892+
ignoreJobCreatedAndDeletedLogs();
893+
testSupport.addToPacket(DOMAIN_TOPOLOGY, createDomainConfig("cluster-1"));
894+
defineFailedIntrospectionPodWithDeadlineExceeded();
895+
testSupport.doOnCreate(JOB, this::recordJob);
896+
testSupport.doAfterCall(JOB, "deleteJob", this::replaceFailedJobPodWithSuccess);
897+
898+
testSupport.runSteps(JobHelper.createDomainIntrospectorJobStep(null));
899+
900+
testSupport.verifyCompletionThrowable(JobWatcher.DeadlineExceededException.class);
901+
}
902+
903+
private void defineFailedIntrospectionPodWithDeadlineExceeded() {
904+
testSupport.defineResources(asFailedJobPodWithDeadlineExceeded(createIntrospectorJobPod()));
905+
}
906+
907+
private V1Pod asFailedJobPodWithDeadlineExceeded(V1Pod introspectorJobPod) {
908+
return introspectorJobPod.status(new V1PodStatus().reason("DeadlineExceeded"));
909+
}
910+
911+
private void ignoreIntrospectorFailureLogs() {
912+
consoleHandlerMemento.ignoreMessage(getJobFailedMessageKey());
913+
consoleHandlerMemento.ignoreMessage(getJobFailedDetailMessageKey());
914+
}
915+
916+
private void ignoreJobCreatedAndDeletedLogs() {
917+
consoleHandlerMemento.ignoreMessage(getJobCreatedMessageKey());
918+
consoleHandlerMemento.ignoreMessage(getJobDeletedMessageKey());
919+
}
920+
921+
private V1Job affectedJob;
922+
923+
private void recordJob(Object job) {
924+
affectedJob = asCompletedJob((V1Job) job);
925+
}
926+
927+
private V1Job asCompletedJob(V1Job job) {
928+
job.setStatus(new V1JobStatus().addConditionsItem(
929+
new V1JobCondition().status("True").type("Complete")));
930+
return job;
931+
}
932+
933+
private void replaceFailedJobPodWithSuccess() {
934+
testSupport.deleteResources(createIntrospectorJobPod());
935+
testSupport.defineResources(createIntrospectorJobPod());
936+
}
937+
938+
private V1Pod createIntrospectorJobPod() {
939+
Map<String, String> labels = new HashMap<>();
940+
labels.put(LabelConstants.JOBNAME_LABEL, JOB_NAME);
941+
return new V1Pod().metadata(new V1ObjectMeta().name(JOB_NAME).labels(labels).namespace(NS));
942+
}
943+
881944
}

operator/src/test/java/oracle/kubernetes/operator/helpers/KubernetesTestSupport.java

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ public class KubernetesTestSupport extends FiberTestSupport {
133133
private final Map<String, DataRepository<?>> repositories = new HashMap<>();
134134
private final Map<Class<?>, String> dataTypes = new HashMap<>();
135135
private Failure failure;
136+
private AfterCallAction afterCallAction;
136137
private long resourceVersion;
137138
private int numCalls;
138139
private boolean addCreationTimestamp;
@@ -480,6 +481,16 @@ public void cancelFailures() {
480481
failure = null;
481482
}
482483

484+
/**
485+
* Specifies an action to perform after completing the next matching invocation.
486+
* @param resourceType the type of resource
487+
* @param call the call string
488+
* @param action the action to perform
489+
*/
490+
public void doAfterCall(@Nonnull String resourceType, @Nonnull String call, @Nonnull Runnable action) {
491+
afterCallAction = new AfterCallAction(resourceType, call, action);
492+
}
493+
483494
@SuppressWarnings("unused")
484495
private enum Operation {
485496
create {
@@ -591,6 +602,27 @@ HttpErrorException getException() {
591602
}
592603
}
593604

605+
static class AfterCallAction {
606+
private final String resourceType;
607+
private final String call;
608+
private final Runnable action;
609+
610+
AfterCallAction(@Nonnull String resourceType, @Nonnull String call, @Nonnull Runnable action) {
611+
this.resourceType = resourceType;
612+
this.call = call;
613+
this.action = action;
614+
}
615+
616+
boolean matches(String resourceType, RequestParams requestParams) {
617+
return this.resourceType.equals(resourceType)
618+
&& (this.call.equals(requestParams.call));
619+
}
620+
621+
void doAction() {
622+
action.run();
623+
}
624+
}
625+
594626
static class HttpErrorException extends RuntimeException {
595627
private final ApiException apiException;
596628

@@ -1174,15 +1206,22 @@ private int indexOfFirstCapital(String callName) {
11741206
}
11751207

11761208
private Object execute() {
1177-
if (failure != null && failure.matches(resourceType, requestParams, operation)) {
1178-
try {
1179-
throw failure.getException();
1180-
} finally {
1181-
failure = null;
1209+
try {
1210+
if (failure != null && failure.matches(resourceType, requestParams, operation)) {
1211+
try {
1212+
throw failure.getException();
1213+
} finally {
1214+
failure = null;
1215+
}
11821216
}
1183-
}
11841217

1185-
return operation.execute(this, selectRepository(resourceType));
1218+
return operation.execute(this, selectRepository(resourceType));
1219+
} finally {
1220+
if (afterCallAction != null && afterCallAction.matches(resourceType, requestParams)) {
1221+
afterCallAction.doAction();
1222+
afterCallAction = null;
1223+
}
1224+
}
11861225
}
11871226

11881227
@SuppressWarnings("unchecked")

0 commit comments

Comments
 (0)