-
Notifications
You must be signed in to change notification settings - Fork 217
OWLS-90180 Optimize detection of when to run introspection #2430
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
8d9fd6a
21e87c2
3438caa
6a29a8b
462b5d6
db5cc7c
aec66ce
f30a87b
8fe6448
f07b126
fedcece
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -178,13 +178,13 @@ static boolean creatingServers(DomainPresenceInfo info) { | |
} | ||
|
||
/** | ||
* Factory for {@link Step} that deletes WebLogic domain introspector job. | ||
* Factory for {@link Step} that replaces or creates WebLogic domain introspector job. | ||
* | ||
* @param next Next processing step | ||
* @return Step for deleting the domain introsepctor jod | ||
* @return Step for replacing or creating the domain introsepctor jod | ||
*/ | ||
public static Step deleteDomainIntrospectorJobStep(Step next) { | ||
return new DeleteIntrospectorJobStep(next); | ||
public static Step replaceOrCreateDomainIntrospectorJobStep(Step next) { | ||
return new ReplaceOrCreateIntrospectorJobStep(next); | ||
} | ||
|
||
private static Step createWatchDomainIntrospectorJobReadyStep(Step next) { | ||
|
@@ -392,50 +392,55 @@ public NextAction apply(Packet packet) { | |
} | ||
} | ||
|
||
private static class DeleteIntrospectorJobStep extends Step { | ||
private static class ReplaceOrCreateIntrospectorJobStep extends Step { | ||
|
||
static final int JOB_DELETE_TIMEOUT_SECONDS = 1; | ||
|
||
DeleteIntrospectorJobStep(Step next) { | ||
ReplaceOrCreateIntrospectorJobStep(Step next) { | ||
super(next); | ||
} | ||
|
||
@Override | ||
public NextAction apply(Packet packet) { | ||
return doNext(deleteJob(packet, getNext()), packet); | ||
return doNext(replaceOrCreateJob(packet, getNext()), packet); | ||
} | ||
|
||
String getJobDeletedMessageKey() { | ||
return MessageKeys.JOB_DELETED; | ||
|
||
private Step replaceOrCreateJob(Packet packet, Step next) { | ||
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); | ||
return new CallBuilder().readJobAsync(JobHelper.createJobName(info.getDomain().getDomainUid()), | ||
info.getNamespace(), info.getDomain().getDomainUid(), | ||
new ReplaceOrCreateStep(next)); | ||
} | ||
|
||
void logJobDeleted(String domainUid, String namespace, String jobName, Packet packet) { | ||
V1Job domainIntrospectorJob = | ||
(V1Job) packet.remove(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB); | ||
private class ReplaceOrCreateStep extends DefaultResponseStep { | ||
|
||
packet.remove(ProcessingConstants.INTROSPECTOR_JOB_FAILURE_LOGGED); | ||
if (domainIntrospectorJob != null | ||
&& !JobWatcher.isComplete(domainIntrospectorJob)) { | ||
logIntrospectorFailure(packet, domainIntrospectorJob); | ||
ReplaceOrCreateStep(Step next) { | ||
super(next); | ||
} | ||
packet.remove(ProcessingConstants.JOB_POD_NAME); | ||
|
||
LOGGER.fine(getJobDeletedMessageKey(), domainUid, namespace, jobName); | ||
} | ||
@Override | ||
public NextAction onSuccess(Packet packet, CallResponse callResponse) { | ||
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); | ||
String namespace = info.getNamespace(); | ||
V1Job job = (V1Job) callResponse.getResult(); | ||
if ((job != null) && (packet.get(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB) == null)) { | ||
packet.put(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB, job); | ||
} | ||
|
||
private Step deleteJob(Packet packet, Step next) { | ||
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); | ||
java.lang.String domainUid = info.getDomain().getDomainUid(); | ||
java.lang.String namespace = info.getNamespace(); | ||
String jobName = JobHelper.createJobName(domainUid); | ||
logJobDeleted(domainUid, namespace, jobName, packet); | ||
return new CallBuilder().withTimeoutSeconds(JOB_DELETE_TIMEOUT_SECONDS) | ||
.deleteJobAsync( | ||
jobName, | ||
namespace, | ||
domainUid, | ||
new V1DeleteOptions().propagationPolicy("Foreground"), | ||
new DefaultResponseStep<>(next)); | ||
if (job != null) { | ||
return doNext(Step.chain( | ||
createProgressingStartedEventStep(info, INSPECTING_DOMAIN_PROGRESS_REASON, true, null), | ||
readDomainIntrospectorPodLogStep(null), | ||
deleteDomainIntrospectorJobStep(null), | ||
ConfigMapHelper.createIntrospectorConfigMapStep(getNext())), packet); | ||
} else { | ||
packet.putIfAbsent(START_TIME, System.currentTimeMillis()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. START_TIME could be an instance of OffsetDateTime.now(). There isn't a good reason to convert to a Long. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done in fedcece . |
||
return doNext(Step.chain( | ||
ConfigMapHelper.readExistingIntrospectorConfigMap(namespace, info.getDomainUid()), | ||
createDomainIntrospectorJobStep(getNext())), packet); | ||
} | ||
} | ||
} | ||
} | ||
|
||
|
@@ -507,17 +512,31 @@ public NextAction onSuccess(Packet packet, CallResponse<String> callResponse) { | |
jobConditionsReason.add(DomainStatusUpdater.ERR_INTROSPECTOR); | ||
} | ||
//Introspector job is incomplete, update domain status and terminate processing | ||
Step nextStep = null; | ||
if (System.currentTimeMillis() > getJobLazyDeletionTime(domainIntrospectorJob)) { | ||
//Introspector job is incomplete and current time is greater than the lazy deletion time for the job, | ||
//update the domain status and execute the next step | ||
nextStep = getNext(); | ||
} | ||
|
||
return doNext( | ||
DomainStatusUpdater.createFailureRelatedSteps( | ||
onSeparateLines(jobConditionsReason), | ||
onSeparateLines(severeStatuses), | ||
null), | ||
nextStep), | ||
packet); | ||
} | ||
|
||
return doNext(packet); | ||
} | ||
|
||
private Long getJobLazyDeletionTime(V1Job domainIntrospectorJob) { | ||
int retryIntervalSeconds = TuningParameters.getInstance().getMainTuning().domainPresenceRecheckIntervalSeconds; | ||
return Optional.ofNullable(domainIntrospectorJob.getMetadata()) | ||
.map(m -> m.getCreationTimestamp()).map(t -> t.toInstant().toEpochMilli()).orElse(0L) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are you converting these OffsetDateTime instances to epoch millis? Why not return an OffsetDateTime and use the isBefore() or isAfter() comparison methods. Similarly, the class has built-in support for adding seconds or millis, etc. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, made the changes to use OffsetDateTime, isAfter() and plus() methods instead of converting it to epoch millis in fedcece . |
||
+ (retryIntervalSeconds * 1000L); | ||
} | ||
|
||
private boolean isNotComplete(V1Job domainIntrospectorJob) { | ||
return !JobWatcher.isComplete(domainIntrospectorJob); | ||
} | ||
|
@@ -611,6 +630,49 @@ private static void logIntrospectorFailure(Packet packet, V1Job domainIntrospect | |
} | ||
} | ||
|
||
static class DeleteDomainIntrospectorJobStep extends Step { | ||
|
||
DeleteDomainIntrospectorJobStep(Step next) { | ||
super(next); | ||
} | ||
|
||
@Override | ||
public NextAction apply(Packet packet) { | ||
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); | ||
String jobName = JobHelper.createJobName(info.getDomainUid()); | ||
logJobDeleted(info.getDomainUid(), info.getNamespace(), jobName, packet); | ||
return doNext(new CallBuilder().withTimeoutSeconds(ReplaceOrCreateIntrospectorJobStep.JOB_DELETE_TIMEOUT_SECONDS) | ||
.deleteJobAsync( | ||
jobName, | ||
info.getNamespace(), | ||
info.getDomainUid(), | ||
new V1DeleteOptions().propagationPolicy("Foreground"), | ||
new DefaultResponseStep<>(getNext())), packet); | ||
} | ||
} | ||
|
||
public static Step deleteDomainIntrospectorJobStep(Step next) { | ||
return new DeleteDomainIntrospectorJobStep(next); | ||
} | ||
|
||
static void logJobDeleted(String domainUid, String namespace, String jobName, Packet packet) { | ||
V1Job domainIntrospectorJob = | ||
(V1Job) packet.remove(ProcessingConstants.DOMAIN_INTROSPECTOR_JOB); | ||
|
||
packet.remove(ProcessingConstants.INTROSPECTOR_JOB_FAILURE_LOGGED); | ||
if (domainIntrospectorJob != null | ||
&& !JobWatcher.isComplete(domainIntrospectorJob)) { | ||
logIntrospectorFailure(packet, domainIntrospectorJob); | ||
} | ||
packet.remove(ProcessingConstants.JOB_POD_NAME); | ||
|
||
LOGGER.fine(getJobDeletedMessageKey(), domainUid, namespace, jobName); | ||
} | ||
|
||
static String getJobDeletedMessageKey() { | ||
return MessageKeys.JOB_DELETED; | ||
} | ||
|
||
private static class ReadDomainIntrospectorPodStep extends Step { | ||
|
||
ReadDomainIntrospectorPodStep(Step next) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It probably ok, but if START_TIME is not set, then the elapsed time will be very long.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This check is for the operator restart use case where the introspector job is started before the operator is restarted. The START_TIME will not be set in the packet in this case. It seems the elapsed time is only used to log a FINE level log message and I'm not sure if it's worth storing the START_TIME in the introspector config map if it's only used for logging.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have removed this check by setting the START_TIME to job creation time for operator restart use-case.