Skip to content

Fixes for introspector failure count increment issue and optimizations #2564

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions documentation/domains/Domain.json
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,10 @@
"type": "number",
"minimum": 0
},
"lastIntrospectJobProcessedUid": {
"description": "Unique id of the last introspector job that was processed for this domain.",
"type": "string"
},
"startTime": {
"description": "RFC 3339 date and time at which the operator started the domain. This will be when the operator begins processing and will precede when the various servers or clusters are available.",
"$ref": "#/definitions/DateTime"
Expand Down
1 change: 1 addition & 0 deletions documentation/domains/Domain.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ The current status of the operation of the WebLogic domain. Updated automaticall
| `clusters` | Array of [Cluster Status](#cluster-status) | Status of WebLogic clusters in this domain. |
| `conditions` | Array of [Domain Condition](#domain-condition) | Current service state of the domain. |
| `introspectJobFailureCount` | number | Non-zero if the introspector job fails for any reason. You can configure an introspector job retry limit for jobs that log script failures using the Operator tuning parameter 'domainPresenceFailureRetryMaxCount' (default 5). You cannot configure a limit for other types of failures, such as a Domain resource reference to an unknown secret name; in which case, the retries are unlimited. |
| `lastIntrospectJobProcessedUid` | string | Unique id of the last introspector job that was processed for this domain. |
| `message` | string | A human readable message indicating details about why the domain is in this condition. |
| `reason` | string | A brief CamelCase message indicating details about why the domain is in this state. |
| `replicas` | number | The number of running cluster member Managed Servers in the WebLogic cluster if there is exactly one cluster defined in the domain configuration and where the `replicas` field is set at the `spec` level rather than for the specific cluster under `clusters`. This field is provided to support use of Kubernetes scaling for this limited use case. |
Expand Down
4 changes: 4 additions & 0 deletions documentation/domains/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -1462,6 +1462,10 @@
"type": "number",
"minimum": 0.0
},
"lastIntrospectJobProcessedUid": {
"description": "Unique id of the last introspector job that was processed for this domain.",
"type": "string"
},
"startTime": {
"description": "RFC 3339 date and time at which the operator started the domain. This will be when the operator begins processing and will precede when the various servers or clusters are available.",
"$ref": "#/definitions/DateTime"
Expand Down
6 changes: 5 additions & 1 deletion kubernetes/crd/domain-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
weblogic.sha256: 2ebb2170cf64a39d3db9d3e7a9635ff80ca760fc8e37bcfacc13b0dd8e92553e
weblogic.sha256: 9a73b19676f5e704b99194a570051ca5ef0838a9113874a350cb51c61a2082a3
name: domains.weblogic.oracle
spec:
group: weblogic.oracle
Expand Down Expand Up @@ -11330,6 +11330,10 @@ spec:
this limited use case.
minimum: 0.0
type: number
lastIntrospectJobProcessedUid:
description: Unique id of the last introspector job that was processed
for this domain.
type: string
startTime:
description: RFC 3339 date and time at which the operator started
the domain. This will be when the operator begins processing and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
Expand Down Expand Up @@ -405,56 +404,10 @@ DomainStatus getNewStatus() {
newStatus.setMessage(
Optional.ofNullable(info).map(DomainPresenceInfo::getValidationWarningsAsString).orElse(null));
}
if (shouldUpdateFailureCount(newStatus)) {
newStatus.incrementIntrospectJobFailureCount();
}

return newStatus;
}

private String getExistingStatusMessage() {
return Optional.ofNullable(info)
.map(DomainPresenceInfo::getDomain)
.map(Domain::getStatus)
.map(DomainStatus::getMessage)
.orElse(null);
}

private DomainCondition getProgressingCondition() {
return Optional.ofNullable(info)
.map(DomainPresenceInfo::getDomain)
.map(Domain::getStatus)
.map(this::getProgressingCondition).orElse(null);
}

private DomainCondition getProgressingCondition(DomainStatus status) {
return Optional.ofNullable(status)
.map(s -> s.getConditionWithType(Progressing)).orElse(null);
}

private boolean shouldUpdateFailureCount(DomainStatus newStatus) {
return transitFromProgressing(newStatus)
&& getExistingStatusMessage() == null
&& isBackoffLimitExceeded(newStatus);
}

private boolean transitFromProgressing(DomainStatus newStatus) {
return getProgressingCondition() != null && getProgressingCondition(newStatus) == null;
}

private boolean isBackoffLimitExceeded(DomainStatus newStatus) {
List<DomainCondition> domainConditions = Optional.of(newStatus)
.map(DomainStatus::getConditions)
.orElse(Collections.emptyList());

for (DomainCondition cond : domainConditions) {
if ("BackoffLimitExceeded".equals(cond.getReason())) {
return true;
}
}
return false;
}

String getDomainUid() {
return getDomain().getDomainUid();
}
Expand Down Expand Up @@ -803,6 +756,40 @@ private Integer getClusterSizeGoal(String clusterName) {
}
}

public static Step createFailureCountStep() {
return new FailureCountStep();
}

static class FailureCountStep extends DomainStatusUpdaterStep {

public FailureCountStep() {
super(null);
}

@Override
void modifyStatus(DomainStatus domainStatus) {
domainStatus.incrementIntrospectJobFailureCount();
}
}

public static Step recordLastIntrospectJobProcessedUid(String lastIntrospectJobProcessedId) {
return new RecordLastIntrospectJobProcessedUidStep(lastIntrospectJobProcessedId);
}

static class RecordLastIntrospectJobProcessedUidStep extends DomainStatusUpdaterStep {
private final String lastIntrospectJobProcessedId;

public RecordLastIntrospectJobProcessedUidStep(String lastIntrospectJobProcessedId) {
super(null);
this.lastIntrospectJobProcessedId = lastIntrospectJobProcessedId;
}

@Override
void modifyStatus(DomainStatus domainStatus) {
domainStatus.setLastIntrospectJobProcessedUid(lastIntrospectJobProcessedId);
}
}

public static class ProgressingStep extends DomainStatusUpdaterStep {
private final String reason;
private final boolean isPreserveAvailable;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import java.util.Objects;
import java.util.Optional;

import io.kubernetes.client.openapi.models.V1Container;
import io.kubernetes.client.openapi.models.V1DeleteOptions;
import io.kubernetes.client.openapi.models.V1EnvVar;
import io.kubernetes.client.openapi.models.V1Job;
Expand All @@ -19,7 +18,6 @@
import io.kubernetes.client.openapi.models.V1ObjectMeta;
import io.kubernetes.client.openapi.models.V1Pod;
import io.kubernetes.client.openapi.models.V1PodList;
import io.kubernetes.client.openapi.models.V1PodSpec;
import io.kubernetes.client.openapi.models.V1Volume;
import io.kubernetes.client.openapi.models.V1VolumeMount;
import oracle.kubernetes.operator.DomainProcessorImpl;
Expand All @@ -31,6 +29,7 @@
import oracle.kubernetes.operator.ProcessingConstants;
import oracle.kubernetes.operator.TuningParameters;
import oracle.kubernetes.operator.calls.CallResponse;
import oracle.kubernetes.operator.calls.UnrecoverableErrorBuilder;
import oracle.kubernetes.operator.logging.LoggingFacade;
import oracle.kubernetes.operator.logging.LoggingFactory;
import oracle.kubernetes.operator.logging.MessageKeys;
Expand All @@ -55,6 +54,7 @@
import static oracle.kubernetes.operator.DomainSourceType.FromModel;
import static oracle.kubernetes.operator.DomainStatusUpdater.INSPECTING_DOMAIN_PROGRESS_REASON;
import static oracle.kubernetes.operator.DomainStatusUpdater.createProgressingStartedEventStep;
import static oracle.kubernetes.operator.DomainStatusUpdater.recordLastIntrospectJobProcessedUid;
import static oracle.kubernetes.operator.LabelConstants.INTROSPECTION_DOMAIN_SPEC_GENERATION;
import static oracle.kubernetes.operator.LabelConstants.INTROSPECTION_STATE_LABEL;
import static oracle.kubernetes.operator.ProcessingConstants.DOMAIN_INTROSPECT_REQUESTED;
Expand Down Expand Up @@ -123,7 +123,7 @@ private static boolean isGenerationChanged(Packet packet, DomainPresenceInfo inf
}

private static String getIntrospectVersion(DomainPresenceInfo info) {
return Optional.ofNullable(info.getDomain()).map(Domain::getSpec).map(s -> s.getIntrospectVersion())
return Optional.ofNullable(info.getDomain()).map(Domain::getSpec).map(DomainSpec::getIntrospectVersion)
.orElse("");
}

Expand Down Expand Up @@ -443,47 +443,69 @@ public NextAction apply(Packet packet) {
return doNext(replaceOrCreateJob(packet, getNext()), packet);
}


private Step replaceOrCreateJob(Packet packet, Step next) {
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class);
return new CallBuilder().readJobAsync(JobHelper.createJobName(info.getDomain().getDomainUid()),
info.getNamespace(), info.getDomain().getDomainUid(),
new ReplaceOrCreateStep(next));
}
}

static class ReplaceOrCreateStep extends DefaultResponseStep {

private class ReplaceOrCreateStep extends DefaultResponseStep {
ReplaceOrCreateStep(Step next) {
super(next);
}

ReplaceOrCreateStep(Step next) {
super(next);
@Override
public NextAction onSuccess(Packet packet, CallResponse callResponse) {
List<Step> nextSteps = new ArrayList<>();
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class);
V1Job job = (V1Job) callResponse.getResult();
if ((job != null) && (packet.get(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB) == null)) {
packet.put(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB, job);
}

@Override
public NextAction onSuccess(Packet packet, CallResponse callResponse) {
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class);
String namespace = info.getNamespace();
V1Job job = (V1Job) callResponse.getResult();
if ((job != null) && (packet.get(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB) == null)) {
packet.put(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB, job);
}
OffsetDateTime startTime = createNextSteps(nextSteps, info, job, getNext());
packet.putIfAbsent(START_TIME, startTime);
return doNext(nextSteps.get(0), packet);
}

if (job != null) {
packet.putIfAbsent(START_TIME, Optional.ofNullable(job.getMetadata())
.map(m -> m.getCreationTimestamp()).orElse(OffsetDateTime.now()));
return doNext(Step.chain(
createProgressingStartedEventStep(info, INSPECTING_DOMAIN_PROGRESS_REASON, true, null),
readDomainIntrospectorPodLogStep(null),

static OffsetDateTime createNextSteps(List<Step> nextSteps, DomainPresenceInfo info,
V1Job job, Step next) {
OffsetDateTime jobStartTime;
String namespace = info.getNamespace();
if (job != null) {
jobStartTime = Optional.ofNullable(job.getMetadata())
.map(V1ObjectMeta::getCreationTimestamp).orElse(OffsetDateTime.now());
String lastIntrospectJobProcessedId = getLastIntrospectJobProcessedId(info);
if ((lastIntrospectJobProcessedId == null)
|| (!lastIntrospectJobProcessedId.equals(job.getMetadata().getUid()))) {
nextSteps.add(Step.chain(readDomainIntrospectorPodLogStep(null),
deleteDomainIntrospectorJobStep(null),
ConfigMapHelper.createIntrospectorConfigMapStep(null),
ConfigMapHelper.readExistingIntrospectorConfigMap(namespace, info.getDomainUid()),
new DomainProcessorImpl.IntrospectionRequestStep(info),
createDomainIntrospectorJobStep(getNext())), packet);
ConfigMapHelper.createIntrospectorConfigMapStep(next)));
} else {
packet.putIfAbsent(START_TIME, OffsetDateTime.now());
return doNext(Step.chain(
ConfigMapHelper.readExistingIntrospectorConfigMap(namespace, info.getDomainUid()),
createDomainIntrospectorJobStep(getNext())), packet);
nextSteps.add(Step.chain(createWatchDomainIntrospectorJobReadyStep(null),
deleteDomainIntrospectorJobStep(null),
new DomainProcessorImpl.IntrospectionRequestStep(info),
createDomainIntrospectorJobStep(next)));
}
} else {
jobStartTime = OffsetDateTime.now();
nextSteps.add(Step.chain(
ConfigMapHelper.readExistingIntrospectorConfigMap(namespace, info.getDomainUid()),
createDomainIntrospectorJobStep(next)));
}
return jobStartTime;
}

private static String getLastIntrospectJobProcessedId(DomainPresenceInfo info) {
return Optional.of(info)
.map(DomainPresenceInfo::getDomain)
.map(Domain::getStatus)
.map(DomainStatus::getLastIntrospectJobProcessedUid)
.orElse(null);
}
}

Expand Down Expand Up @@ -567,6 +589,13 @@ public NextAction onSuccess(Packet packet, CallResponse<String> callResponse) {
nextStep = getNext();
}

nextStep = Step.chain(recordLastIntrospectJobProcessedUid(
getLastIntrospectJobProcessedId(domainIntrospectorJob)), nextStep);

if (!severeStatuses.isEmpty()) {
nextStep = Step.chain(DomainStatusUpdater.createFailureCountStep(), nextStep);
}

return doNext(
DomainStatusUpdater.createFailureRelatedSteps(
onSeparateLines(jobConditionsReason),
Expand All @@ -575,12 +604,20 @@ public NextAction onSuccess(Packet packet, CallResponse<String> callResponse) {
packet);
}

return doNext(packet);
Step nextSteps = Step.chain(recordLastIntrospectJobProcessedUid(
getLastIntrospectJobProcessedId(domainIntrospectorJob)), getNext());
return doNext(nextSteps, packet);

}

private String getLastIntrospectJobProcessedId(V1Job domainIntrospectorJob) {
return Optional.ofNullable(domainIntrospectorJob).map(V1Job::getMetadata)
.map(V1ObjectMeta::getUid).orElse(null);
}

private OffsetDateTime getJobCreationTime(V1Job domainIntrospectorJob) {
return Optional.ofNullable(domainIntrospectorJob.getMetadata())
.map(m -> m.getCreationTimestamp()).orElse(OffsetDateTime.now());
.map(V1ObjectMeta::getCreationTimestamp).orElse(OffsetDateTime.now());
}

private boolean isNotComplete(V1Job domainIntrospectorJob) {
Expand Down Expand Up @@ -657,6 +694,23 @@ private void updateStatus(DomainPresenceInfo domainPresenceInfo) {
private String onSeparateLines(List<String> lines) {
return String.join(System.lineSeparator(), lines);
}

@Override
public NextAction onFailure(Packet packet, CallResponse<String> callResponse) {
if (UnrecoverableErrorBuilder.isAsyncCallUnrecoverableFailure(callResponse)) {
return updateDomainStatus(packet, callResponse);
} else {
return super.onFailure(packet, callResponse);
}
}

private NextAction updateDomainStatus(Packet packet, CallResponse<String> callResponse) {
return doNext(
Step.chain(
DomainStatusUpdater.createFailureCountStep(),
DomainStatusUpdater.createFailureRelatedSteps(callResponse, null)),
packet);
}
}

private static void logIntrospectorFailure(Packet packet, V1Job domainIntrospectorJob) {
Expand All @@ -667,7 +721,7 @@ private static void logIntrospectorFailure(Packet packet, V1Job domainIntrospect
LOGGER.info(INTROSPECTOR_JOB_FAILED,
Objects.requireNonNull(domainIntrospectorJob.getMetadata()).getName(),
domainIntrospectorJob.getMetadata().getNamespace(),
domainIntrospectorJob.getStatus().toString(),
domainIntrospectorJob.getStatus(),
jobPodName);
LOGGER.fine(INTROSPECTOR_JOB_FAILED_DETAIL,
domainIntrospectorJob.getMetadata().getNamespace(),
Expand Down Expand Up @@ -783,18 +837,10 @@ private String getName(V1Pod pod) {
return Optional.of(pod).map(V1Pod::getMetadata).map(V1ObjectMeta::getName).orElse("");
}

private List<V1Container> getInitContainers(V1Pod pod) {
return Optional.of(pod).map(V1Pod::getSpec).map(V1PodSpec::getInitContainers).orElse(Collections.emptyList());
}

private boolean isJobPodName(String podName) {
return podName.startsWith(createJobName(domainUid));
}

private boolean isJobPod(V1Pod pod) {
return pod.getMetadata().getName().startsWith(createJobName(domainUid));
}

private void recordJobPodName(Packet packet, String podName) {
packet.put(ProcessingConstants.JOB_POD_NAME, podName);
}
Expand Down
Loading