Skip to content

Commit 53b8dd1

Browse files
ankediarussgold
andauthored
Fixes for introspector failure count increment issue and optimizations (#2564)
* Fixes for introspector failure count increment issue and optimizations Co-authored-by: Russell Gold <russ@russgold.net>
1 parent 51ec5e1 commit 53b8dd1

File tree

11 files changed

+363
-94
lines changed

11 files changed

+363
-94
lines changed

documentation/domains/Domain.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,10 @@
541541
"type": "number",
542542
"minimum": 0
543543
},
544+
"lastIntrospectJobProcessedUid": {
545+
"description": "Unique id of the last introspector job that was processed for this domain.",
546+
"type": "string"
547+
},
544548
"startTime": {
545549
"description": "RFC 3339 date and time at which the operator started the domain. This will be when the operator begins processing and will precede when the various servers or clusters are available.",
546550
"$ref": "#/definitions/DateTime"

documentation/domains/Domain.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ The current status of the operation of the WebLogic domain. Updated automaticall
5656
| `clusters` | Array of [Cluster Status](#cluster-status) | Status of WebLogic clusters in this domain. |
5757
| `conditions` | Array of [Domain Condition](#domain-condition) | Current service state of the domain. |
5858
| `introspectJobFailureCount` | number | Non-zero if the introspector job fails for any reason. You can configure an introspector job retry limit for jobs that log script failures using the Operator tuning parameter 'domainPresenceFailureRetryMaxCount' (default 5). You cannot configure a limit for other types of failures, such as a Domain resource reference to an unknown secret name; in which case, the retries are unlimited. |
59+
| `lastIntrospectJobProcessedUid` | string | Unique id of the last introspector job that was processed for this domain. |
5960
| `message` | string | A human readable message indicating details about why the domain is in this condition. |
6061
| `reason` | string | A brief CamelCase message indicating details about why the domain is in this state. |
6162
| `replicas` | number | The number of running cluster member Managed Servers in the WebLogic cluster if there is exactly one cluster defined in the domain configuration and where the `replicas` field is set at the `spec` level rather than for the specific cluster under `clusters`. This field is provided to support use of Kubernetes scaling for this limited use case. |

documentation/domains/index.html

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1462,6 +1462,10 @@
14621462
"type": "number",
14631463
"minimum": 0.0
14641464
},
1465+
"lastIntrospectJobProcessedUid": {
1466+
"description": "Unique id of the last introspector job that was processed for this domain.",
1467+
"type": "string"
1468+
},
14651469
"startTime": {
14661470
"description": "RFC 3339 date and time at which the operator started the domain. This will be when the operator begins processing and will precede when the various servers or clusters are available.",
14671471
"$ref": "#/definitions/DateTime"

kubernetes/crd/domain-crd.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ apiVersion: apiextensions.k8s.io/v1
55
kind: CustomResourceDefinition
66
metadata:
77
annotations:
8-
weblogic.sha256: 2ebb2170cf64a39d3db9d3e7a9635ff80ca760fc8e37bcfacc13b0dd8e92553e
8+
weblogic.sha256: 9a73b19676f5e704b99194a570051ca5ef0838a9113874a350cb51c61a2082a3
99
name: domains.weblogic.oracle
1010
spec:
1111
group: weblogic.oracle
@@ -11330,6 +11330,10 @@ spec:
1133011330
this limited use case.
1133111331
minimum: 0.0
1133211332
type: number
11333+
lastIntrospectJobProcessedUid:
11334+
description: Unique id of the last introspector job that was processed
11335+
for this domain.
11336+
type: string
1133311337
startTime:
1133411338
description: RFC 3339 date and time at which the operator started
1133511339
the domain. This will be when the operator begins processing and

operator/src/main/java/oracle/kubernetes/operator/DomainStatusUpdater.java

Lines changed: 34 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import java.util.Collection;
88
import java.util.Collections;
99
import java.util.HashSet;
10-
import java.util.List;
1110
import java.util.Map;
1211
import java.util.Objects;
1312
import java.util.Optional;
@@ -405,56 +404,10 @@ DomainStatus getNewStatus() {
405404
newStatus.setMessage(
406405
Optional.ofNullable(info).map(DomainPresenceInfo::getValidationWarningsAsString).orElse(null));
407406
}
408-
if (shouldUpdateFailureCount(newStatus)) {
409-
newStatus.incrementIntrospectJobFailureCount();
410-
}
411407

412408
return newStatus;
413409
}
414410

415-
private String getExistingStatusMessage() {
416-
return Optional.ofNullable(info)
417-
.map(DomainPresenceInfo::getDomain)
418-
.map(Domain::getStatus)
419-
.map(DomainStatus::getMessage)
420-
.orElse(null);
421-
}
422-
423-
private DomainCondition getProgressingCondition() {
424-
return Optional.ofNullable(info)
425-
.map(DomainPresenceInfo::getDomain)
426-
.map(Domain::getStatus)
427-
.map(this::getProgressingCondition).orElse(null);
428-
}
429-
430-
private DomainCondition getProgressingCondition(DomainStatus status) {
431-
return Optional.ofNullable(status)
432-
.map(s -> s.getConditionWithType(Progressing)).orElse(null);
433-
}
434-
435-
private boolean shouldUpdateFailureCount(DomainStatus newStatus) {
436-
return transitFromProgressing(newStatus)
437-
&& getExistingStatusMessage() == null
438-
&& isBackoffLimitExceeded(newStatus);
439-
}
440-
441-
private boolean transitFromProgressing(DomainStatus newStatus) {
442-
return getProgressingCondition() != null && getProgressingCondition(newStatus) == null;
443-
}
444-
445-
private boolean isBackoffLimitExceeded(DomainStatus newStatus) {
446-
List<DomainCondition> domainConditions = Optional.of(newStatus)
447-
.map(DomainStatus::getConditions)
448-
.orElse(Collections.emptyList());
449-
450-
for (DomainCondition cond : domainConditions) {
451-
if ("BackoffLimitExceeded".equals(cond.getReason())) {
452-
return true;
453-
}
454-
}
455-
return false;
456-
}
457-
458411
String getDomainUid() {
459412
return getDomain().getDomainUid();
460413
}
@@ -803,6 +756,40 @@ private Integer getClusterSizeGoal(String clusterName) {
803756
}
804757
}
805758

759+
public static Step createFailureCountStep() {
760+
return new FailureCountStep();
761+
}
762+
763+
static class FailureCountStep extends DomainStatusUpdaterStep {
764+
765+
public FailureCountStep() {
766+
super(null);
767+
}
768+
769+
@Override
770+
void modifyStatus(DomainStatus domainStatus) {
771+
domainStatus.incrementIntrospectJobFailureCount();
772+
}
773+
}
774+
775+
public static Step recordLastIntrospectJobProcessedUid(String lastIntrospectJobProcessedId) {
776+
return new RecordLastIntrospectJobProcessedUidStep(lastIntrospectJobProcessedId);
777+
}
778+
779+
static class RecordLastIntrospectJobProcessedUidStep extends DomainStatusUpdaterStep {
780+
private final String lastIntrospectJobProcessedId;
781+
782+
public RecordLastIntrospectJobProcessedUidStep(String lastIntrospectJobProcessedId) {
783+
super(null);
784+
this.lastIntrospectJobProcessedId = lastIntrospectJobProcessedId;
785+
}
786+
787+
@Override
788+
void modifyStatus(DomainStatus domainStatus) {
789+
domainStatus.setLastIntrospectJobProcessedUid(lastIntrospectJobProcessedId);
790+
}
791+
}
792+
806793
public static class ProgressingStep extends DomainStatusUpdaterStep {
807794
private final String reason;
808795
private final boolean isPreserveAvailable;

operator/src/main/java/oracle/kubernetes/operator/helpers/JobHelper.java

Lines changed: 86 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import java.util.Objects;
1111
import java.util.Optional;
1212

13-
import io.kubernetes.client.openapi.models.V1Container;
1413
import io.kubernetes.client.openapi.models.V1DeleteOptions;
1514
import io.kubernetes.client.openapi.models.V1EnvVar;
1615
import io.kubernetes.client.openapi.models.V1Job;
@@ -19,7 +18,6 @@
1918
import io.kubernetes.client.openapi.models.V1ObjectMeta;
2019
import io.kubernetes.client.openapi.models.V1Pod;
2120
import io.kubernetes.client.openapi.models.V1PodList;
22-
import io.kubernetes.client.openapi.models.V1PodSpec;
2321
import io.kubernetes.client.openapi.models.V1Volume;
2422
import io.kubernetes.client.openapi.models.V1VolumeMount;
2523
import oracle.kubernetes.operator.DomainProcessorImpl;
@@ -31,6 +29,7 @@
3129
import oracle.kubernetes.operator.ProcessingConstants;
3230
import oracle.kubernetes.operator.TuningParameters;
3331
import oracle.kubernetes.operator.calls.CallResponse;
32+
import oracle.kubernetes.operator.calls.UnrecoverableErrorBuilder;
3433
import oracle.kubernetes.operator.logging.LoggingFacade;
3534
import oracle.kubernetes.operator.logging.LoggingFactory;
3635
import oracle.kubernetes.operator.logging.MessageKeys;
@@ -55,6 +54,7 @@
5554
import static oracle.kubernetes.operator.DomainSourceType.FromModel;
5655
import static oracle.kubernetes.operator.DomainStatusUpdater.INSPECTING_DOMAIN_PROGRESS_REASON;
5756
import static oracle.kubernetes.operator.DomainStatusUpdater.createProgressingStartedEventStep;
57+
import static oracle.kubernetes.operator.DomainStatusUpdater.recordLastIntrospectJobProcessedUid;
5858
import static oracle.kubernetes.operator.LabelConstants.INTROSPECTION_DOMAIN_SPEC_GENERATION;
5959
import static oracle.kubernetes.operator.LabelConstants.INTROSPECTION_STATE_LABEL;
6060
import static oracle.kubernetes.operator.ProcessingConstants.DOMAIN_INTROSPECT_REQUESTED;
@@ -123,7 +123,7 @@ private static boolean isGenerationChanged(Packet packet, DomainPresenceInfo inf
123123
}
124124

125125
private static String getIntrospectVersion(DomainPresenceInfo info) {
126-
return Optional.ofNullable(info.getDomain()).map(Domain::getSpec).map(s -> s.getIntrospectVersion())
126+
return Optional.ofNullable(info.getDomain()).map(Domain::getSpec).map(DomainSpec::getIntrospectVersion)
127127
.orElse("");
128128
}
129129

@@ -443,47 +443,69 @@ public NextAction apply(Packet packet) {
443443
return doNext(replaceOrCreateJob(packet, getNext()), packet);
444444
}
445445

446-
447446
private Step replaceOrCreateJob(Packet packet, Step next) {
448447
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class);
449448
return new CallBuilder().readJobAsync(JobHelper.createJobName(info.getDomain().getDomainUid()),
450449
info.getNamespace(), info.getDomain().getDomainUid(),
451450
new ReplaceOrCreateStep(next));
452451
}
452+
}
453+
454+
static class ReplaceOrCreateStep extends DefaultResponseStep {
453455

454-
private class ReplaceOrCreateStep extends DefaultResponseStep {
456+
ReplaceOrCreateStep(Step next) {
457+
super(next);
458+
}
455459

456-
ReplaceOrCreateStep(Step next) {
457-
super(next);
460+
@Override
461+
public NextAction onSuccess(Packet packet, CallResponse callResponse) {
462+
List<Step> nextSteps = new ArrayList<>();
463+
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class);
464+
V1Job job = (V1Job) callResponse.getResult();
465+
if ((job != null) && (packet.get(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB) == null)) {
466+
packet.put(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB, job);
458467
}
459468

460-
@Override
461-
public NextAction onSuccess(Packet packet, CallResponse callResponse) {
462-
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class);
463-
String namespace = info.getNamespace();
464-
V1Job job = (V1Job) callResponse.getResult();
465-
if ((job != null) && (packet.get(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB) == null)) {
466-
packet.put(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB, job);
467-
}
469+
OffsetDateTime startTime = createNextSteps(nextSteps, info, job, getNext());
470+
packet.putIfAbsent(START_TIME, startTime);
471+
return doNext(nextSteps.get(0), packet);
472+
}
468473

469-
if (job != null) {
470-
packet.putIfAbsent(START_TIME, Optional.ofNullable(job.getMetadata())
471-
.map(m -> m.getCreationTimestamp()).orElse(OffsetDateTime.now()));
472-
return doNext(Step.chain(
473-
createProgressingStartedEventStep(info, INSPECTING_DOMAIN_PROGRESS_REASON, true, null),
474-
readDomainIntrospectorPodLogStep(null),
474+
475+
static OffsetDateTime createNextSteps(List<Step> nextSteps, DomainPresenceInfo info,
476+
V1Job job, Step next) {
477+
OffsetDateTime jobStartTime;
478+
String namespace = info.getNamespace();
479+
if (job != null) {
480+
jobStartTime = Optional.ofNullable(job.getMetadata())
481+
.map(V1ObjectMeta::getCreationTimestamp).orElse(OffsetDateTime.now());
482+
String lastIntrospectJobProcessedId = getLastIntrospectJobProcessedId(info);
483+
if ((lastIntrospectJobProcessedId == null)
484+
|| (!lastIntrospectJobProcessedId.equals(job.getMetadata().getUid()))) {
485+
nextSteps.add(Step.chain(readDomainIntrospectorPodLogStep(null),
475486
deleteDomainIntrospectorJobStep(null),
476-
ConfigMapHelper.createIntrospectorConfigMapStep(null),
477-
ConfigMapHelper.readExistingIntrospectorConfigMap(namespace, info.getDomainUid()),
478-
new DomainProcessorImpl.IntrospectionRequestStep(info),
479-
createDomainIntrospectorJobStep(getNext())), packet);
487+
ConfigMapHelper.createIntrospectorConfigMapStep(next)));
480488
} else {
481-
packet.putIfAbsent(START_TIME, OffsetDateTime.now());
482-
return doNext(Step.chain(
483-
ConfigMapHelper.readExistingIntrospectorConfigMap(namespace, info.getDomainUid()),
484-
createDomainIntrospectorJobStep(getNext())), packet);
489+
nextSteps.add(Step.chain(createWatchDomainIntrospectorJobReadyStep(null),
490+
deleteDomainIntrospectorJobStep(null),
491+
new DomainProcessorImpl.IntrospectionRequestStep(info),
492+
createDomainIntrospectorJobStep(next)));
485493
}
494+
} else {
495+
jobStartTime = OffsetDateTime.now();
496+
nextSteps.add(Step.chain(
497+
ConfigMapHelper.readExistingIntrospectorConfigMap(namespace, info.getDomainUid()),
498+
createDomainIntrospectorJobStep(next)));
486499
}
500+
return jobStartTime;
501+
}
502+
503+
private static String getLastIntrospectJobProcessedId(DomainPresenceInfo info) {
504+
return Optional.of(info)
505+
.map(DomainPresenceInfo::getDomain)
506+
.map(Domain::getStatus)
507+
.map(DomainStatus::getLastIntrospectJobProcessedUid)
508+
.orElse(null);
487509
}
488510
}
489511

@@ -567,6 +589,13 @@ public NextAction onSuccess(Packet packet, CallResponse<String> callResponse) {
567589
nextStep = getNext();
568590
}
569591

592+
nextStep = Step.chain(recordLastIntrospectJobProcessedUid(
593+
getLastIntrospectJobProcessedId(domainIntrospectorJob)), nextStep);
594+
595+
if (!severeStatuses.isEmpty()) {
596+
nextStep = Step.chain(DomainStatusUpdater.createFailureCountStep(), nextStep);
597+
}
598+
570599
return doNext(
571600
DomainStatusUpdater.createFailureRelatedSteps(
572601
onSeparateLines(jobConditionsReason),
@@ -575,12 +604,20 @@ public NextAction onSuccess(Packet packet, CallResponse<String> callResponse) {
575604
packet);
576605
}
577606

578-
return doNext(packet);
607+
Step nextSteps = Step.chain(recordLastIntrospectJobProcessedUid(
608+
getLastIntrospectJobProcessedId(domainIntrospectorJob)), getNext());
609+
return doNext(nextSteps, packet);
610+
611+
}
612+
613+
private String getLastIntrospectJobProcessedId(V1Job domainIntrospectorJob) {
614+
return Optional.ofNullable(domainIntrospectorJob).map(V1Job::getMetadata)
615+
.map(V1ObjectMeta::getUid).orElse(null);
579616
}
580617

581618
private OffsetDateTime getJobCreationTime(V1Job domainIntrospectorJob) {
582619
return Optional.ofNullable(domainIntrospectorJob.getMetadata())
583-
.map(m -> m.getCreationTimestamp()).orElse(OffsetDateTime.now());
620+
.map(V1ObjectMeta::getCreationTimestamp).orElse(OffsetDateTime.now());
584621
}
585622

586623
private boolean isNotComplete(V1Job domainIntrospectorJob) {
@@ -657,6 +694,23 @@ private void updateStatus(DomainPresenceInfo domainPresenceInfo) {
657694
private String onSeparateLines(List<String> lines) {
658695
return String.join(System.lineSeparator(), lines);
659696
}
697+
698+
@Override
699+
public NextAction onFailure(Packet packet, CallResponse<String> callResponse) {
700+
if (UnrecoverableErrorBuilder.isAsyncCallUnrecoverableFailure(callResponse)) {
701+
return updateDomainStatus(packet, callResponse);
702+
} else {
703+
return super.onFailure(packet, callResponse);
704+
}
705+
}
706+
707+
private NextAction updateDomainStatus(Packet packet, CallResponse<String> callResponse) {
708+
return doNext(
709+
Step.chain(
710+
DomainStatusUpdater.createFailureCountStep(),
711+
DomainStatusUpdater.createFailureRelatedSteps(callResponse, null)),
712+
packet);
713+
}
660714
}
661715

662716
private static void logIntrospectorFailure(Packet packet, V1Job domainIntrospectorJob) {
@@ -667,7 +721,7 @@ private static void logIntrospectorFailure(Packet packet, V1Job domainIntrospect
667721
LOGGER.info(INTROSPECTOR_JOB_FAILED,
668722
Objects.requireNonNull(domainIntrospectorJob.getMetadata()).getName(),
669723
domainIntrospectorJob.getMetadata().getNamespace(),
670-
domainIntrospectorJob.getStatus().toString(),
724+
domainIntrospectorJob.getStatus(),
671725
jobPodName);
672726
LOGGER.fine(INTROSPECTOR_JOB_FAILED_DETAIL,
673727
domainIntrospectorJob.getMetadata().getNamespace(),
@@ -783,18 +837,10 @@ private String getName(V1Pod pod) {
783837
return Optional.of(pod).map(V1Pod::getMetadata).map(V1ObjectMeta::getName).orElse("");
784838
}
785839

786-
private List<V1Container> getInitContainers(V1Pod pod) {
787-
return Optional.of(pod).map(V1Pod::getSpec).map(V1PodSpec::getInitContainers).orElse(Collections.emptyList());
788-
}
789-
790840
private boolean isJobPodName(String podName) {
791841
return podName.startsWith(createJobName(domainUid));
792842
}
793843

794-
private boolean isJobPod(V1Pod pod) {
795-
return pod.getMetadata().getName().startsWith(createJobName(domainUid));
796-
}
797-
798844
private void recordJobPodName(Packet packet, String podName) {
799845
packet.put(ProcessingConstants.JOB_POD_NAME, podName);
800846
}

0 commit comments

Comments
 (0)