Skip to content

OWLS-90180 Optimize detection of when to run introspection #2430

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jul 8, 2021
Original file line number Diff line number Diff line change
Expand Up @@ -303,8 +303,7 @@ private static Step domainIntrospectionSteps(DomainPresenceInfo info) {
return Step.chain(
ConfigMapHelper.readIntrospectionVersionStep(info.getNamespace(), info.getDomainUid()),
new IntrospectionRequestStep(info),
JobHelper.deleteDomainIntrospectorJobStep(null),
JobHelper.createDomainIntrospectorJobStep(null));
JobHelper.replaceOrCreateDomainIntrospectorJobStep(null));
}

private static class IntrospectionRequestStep extends Step {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,11 @@ public NextAction apply(Packet packet) {
}

private long timeSinceJobStart(Packet packet) {
return System.currentTimeMillis() - ((Long) packet.get(JobHelper.START_TIME));
return System.currentTimeMillis() - getStartTime(packet);
}

private Long getStartTime(Packet packet) {
return Optional.ofNullable((Long) packet.get(JobHelper.START_TIME)).orElse(0L);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It probably ok, but if START_TIME is not set, then the elapsed time will be very long.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This check is for the operator restart use case where the introspector job is started before the operator is restarted. The START_TIME will not be set in the packet in this case. It seems the elapsed time is only used to log a FINE level log message and I'm not sure if it's worth storing the START_TIME in the introspector config map if it's only used for logging.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have removed this check by setting the START_TIME to job creation time for operator restart use-case.

}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,13 +178,13 @@ static boolean creatingServers(DomainPresenceInfo info) {
}

/**
* Factory for {@link Step} that deletes WebLogic domain introspector job.
* Factory for {@link Step} that replaces or creates WebLogic domain introspector job.
*
* @param next Next processing step
* @return Step for deleting the domain introsepctor jod
* @return Step for replacing or creating the domain introsepctor jod
*/
public static Step deleteDomainIntrospectorJobStep(Step next) {
return new DeleteIntrospectorJobStep(next);
public static Step replaceOrCreateDomainIntrospectorJobStep(Step next) {
return new ReplaceOrCreateIntrospectorJobStep(next);
}

private static Step createWatchDomainIntrospectorJobReadyStep(Step next) {
Expand Down Expand Up @@ -392,50 +392,55 @@ public NextAction apply(Packet packet) {
}
}

private static class DeleteIntrospectorJobStep extends Step {
private static class ReplaceOrCreateIntrospectorJobStep extends Step {

static final int JOB_DELETE_TIMEOUT_SECONDS = 1;

DeleteIntrospectorJobStep(Step next) {
ReplaceOrCreateIntrospectorJobStep(Step next) {
super(next);
}

@Override
public NextAction apply(Packet packet) {
return doNext(deleteJob(packet, getNext()), packet);
return doNext(replaceOrCreateJob(packet, getNext()), packet);
}

String getJobDeletedMessageKey() {
return MessageKeys.JOB_DELETED;

private Step replaceOrCreateJob(Packet packet, Step next) {
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class);
return new CallBuilder().readJobAsync(JobHelper.createJobName(info.getDomain().getDomainUid()),
info.getNamespace(), info.getDomain().getDomainUid(),
new ReplaceOrCreateStep(next));
}

void logJobDeleted(String domainUid, String namespace, String jobName, Packet packet) {
V1Job domainIntrospectorJob =
(V1Job) packet.remove(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB);
private class ReplaceOrCreateStep extends DefaultResponseStep {

packet.remove(ProcessingConstants.INTROSPECTOR_JOB_FAILURE_LOGGED);
if (domainIntrospectorJob != null
&& !JobWatcher.isComplete(domainIntrospectorJob)) {
logIntrospectorFailure(packet, domainIntrospectorJob);
ReplaceOrCreateStep(Step next) {
super(next);
}
packet.remove(ProcessingConstants.JOB_POD_NAME);

LOGGER.fine(getJobDeletedMessageKey(), domainUid, namespace, jobName);
}
@Override
public NextAction onSuccess(Packet packet, CallResponse callResponse) {
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class);
String namespace = info.getNamespace();
V1Job job = (V1Job) callResponse.getResult();
if ((job != null) && (packet.get(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB) == null)) {
packet.put(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB, job);
}

private Step deleteJob(Packet packet, Step next) {
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class);
java.lang.String domainUid = info.getDomain().getDomainUid();
java.lang.String namespace = info.getNamespace();
String jobName = JobHelper.createJobName(domainUid);
logJobDeleted(domainUid, namespace, jobName, packet);
return new CallBuilder().withTimeoutSeconds(JOB_DELETE_TIMEOUT_SECONDS)
.deleteJobAsync(
jobName,
namespace,
domainUid,
new V1DeleteOptions().propagationPolicy("Foreground"),
new DefaultResponseStep<>(next));
if (job != null) {
return doNext(Step.chain(
createProgressingStartedEventStep(info, INSPECTING_DOMAIN_PROGRESS_REASON, true, null),
readDomainIntrospectorPodLogStep(null),
deleteDomainIntrospectorJobStep(null),
ConfigMapHelper.createIntrospectorConfigMapStep(getNext())), packet);
} else {
packet.putIfAbsent(START_TIME, System.currentTimeMillis());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

START_TIME could be an instance of OffsetDateTime.now(). There isn't a good reason to convert to a Long.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in fedcece .

return doNext(Step.chain(
ConfigMapHelper.readExistingIntrospectorConfigMap(namespace, info.getDomainUid()),
createDomainIntrospectorJobStep(getNext())), packet);
}
}
}
}

Expand Down Expand Up @@ -507,17 +512,31 @@ public NextAction onSuccess(Packet packet, CallResponse<String> callResponse) {
jobConditionsReason.add(DomainStatusUpdater.ERR_INTROSPECTOR);
}
//Introspector job is incomplete, update domain status and terminate processing
Step nextStep = null;
if (System.currentTimeMillis() > getJobLazyDeletionTime(domainIntrospectorJob)) {
//Introspector job is incomplete and current time is greater than the lazy deletion time for the job,
//update the domain status and execute the next step
nextStep = getNext();
}

return doNext(
DomainStatusUpdater.createFailureRelatedSteps(
onSeparateLines(jobConditionsReason),
onSeparateLines(severeStatuses),
null),
nextStep),
packet);
}

return doNext(packet);
}

private Long getJobLazyDeletionTime(V1Job domainIntrospectorJob) {
int retryIntervalSeconds = TuningParameters.getInstance().getMainTuning().domainPresenceRecheckIntervalSeconds;
return Optional.ofNullable(domainIntrospectorJob.getMetadata())
.map(m -> m.getCreationTimestamp()).map(t -> t.toInstant().toEpochMilli()).orElse(0L)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are you converting these OffsetDateTime instances to epoch millis? Why not return an OffsetDateTime and use the isBefore() or isAfter() comparison methods. Similarly, the class has built-in support for adding seconds or millis, etc.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, made the changes to use OffsetDateTime, isAfter() and plus() methods instead of converting it to epoch millis in fedcece .

+ (retryIntervalSeconds * 1000L);
}

private boolean isNotComplete(V1Job domainIntrospectorJob) {
return !JobWatcher.isComplete(domainIntrospectorJob);
}
Expand Down Expand Up @@ -611,6 +630,49 @@ private static void logIntrospectorFailure(Packet packet, V1Job domainIntrospect
}
}

static class DeleteDomainIntrospectorJobStep extends Step {

DeleteDomainIntrospectorJobStep(Step next) {
super(next);
}

@Override
public NextAction apply(Packet packet) {
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class);
String jobName = JobHelper.createJobName(info.getDomainUid());
logJobDeleted(info.getDomainUid(), info.getNamespace(), jobName, packet);
return doNext(new CallBuilder().withTimeoutSeconds(ReplaceOrCreateIntrospectorJobStep.JOB_DELETE_TIMEOUT_SECONDS)
.deleteJobAsync(
jobName,
info.getNamespace(),
info.getDomainUid(),
new V1DeleteOptions().propagationPolicy("Foreground"),
new DefaultResponseStep<>(getNext())), packet);
}
}

public static Step deleteDomainIntrospectorJobStep(Step next) {
return new DeleteDomainIntrospectorJobStep(next);
}

static void logJobDeleted(String domainUid, String namespace, String jobName, Packet packet) {
V1Job domainIntrospectorJob =
(V1Job) packet.remove(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB);

packet.remove(ProcessingConstants.INTROSPECTOR_JOB_FAILURE_LOGGED);
if (domainIntrospectorJob != null
&& !JobWatcher.isComplete(domainIntrospectorJob)) {
logIntrospectorFailure(packet, domainIntrospectorJob);
}
packet.remove(ProcessingConstants.JOB_POD_NAME);

LOGGER.fine(getJobDeletedMessageKey(), domainUid, namespace, jobName);
}

static String getJobDeletedMessageKey() {
return MessageKeys.JOB_DELETED;
}

private static class ReadDomainIntrospectorPodStep extends Step {

ReadDomainIntrospectorPodStep(Step next) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ public void useSequenceBeforeAdminServerStep() {
plan,
hasChainWithStepsInOrder(
"DomainPresenceStep",
"DomainIntrospectorJobStep",
"ReplaceOrCreateIntrospectorJobStep",
"BeforeAdminServiceStep",
"AdminPodStep",
"ForServerStep",
Expand Down