Skip to content

OWLS-72816 Generate event and status when operator can't scale cluster past maximum #2097

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Dec 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions docs-source/content/userguide/managing-domains/domain-events.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ The operator generates these event types, which indicate the following:
* `DomainProcessingRetrying`: The operator is going to retry the processing of a domain after it encountered an failure.
* `DomainProcessingCompleted`: The operator successfully completed the processing of a domain resource.
* `DomainProcessingAborted`: The operator stopped processing a domain when the operator encountered a fatal error or a failure that persisted after the specified maximum number of retries.
* `DomainValidationError`: A validation error or warning is found in a domain resource. Please refer to the event message for details.

#### Operator-generated event details

Expand Down Expand Up @@ -242,6 +243,39 @@ Source:
Type: Warning
Events: <none>

```
Example of a `DomainValidationError` event:

```none

Name: sample-domain1.DomainValidationError.1608160013145
Namespace: sample-domain1-ns
Labels: weblogic.createdByOperator=true
weblogic.domainUID=sample-domain1
Annotations: <none>
API Version: v1
Event Time: <nil>
First Timestamp: <nil>
Involved Object:
API Version: weblogic.oracle/v8
Kind: Domain
Name: sample-domain1
Namespace: sample-domain1-ns
Kind: Event
Last Timestamp: 2020-12-16T23:06:53Z
Message: Validation error in domain resource domain1: Replica request of 5 exceeds the maximum dynamic server count of 2 configured for cluster cluster-1
Metadata:
Creation Timestamp: 2020-12-16T23:06:53Z
Resource Version: 11222690
Self Link: /api/v1/namespaces/sample-domain1-ns/events/sample-domain1.DomainValidationError.1608160013145
UID: cd4e6a2f-8ddb-4902-90a7-d993bceb567d
Reason: DomainValidationError
Reporting Component: weblogic.operator
Reporting Instance: weblogic-operator-67c75bc4bf-d4flw
Source:
Type: Warning
Events: <none>

```

Example of domain processing completed after failure and retries:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,11 @@ public boolean wasInspectionRun() {
return inspectionRun;
}

@Override
public boolean isExplicitRecheck() {
return explicitRecheck;
}

private boolean shouldContinue() {
DomainPresenceInfo cachedInfo = getExistingDomainPresenceInfo(getNamespace(), getDomainUid());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ public interface EventConstants {
String DOMAIN_PROCESSING_FAILED_EVENT = "DomainProcessingFailed";
String DOMAIN_PROCESSING_RETRYING_EVENT = "DomainProcessingRetrying";
String DOMAIN_PROCESSING_ABORTED_EVENT = "DomainProcessingAborted";
String DOMAIN_VALIDATION_ERROR_EVENT = "DomainValidationError";
String EVENT_NORMAL = "Normal";
String EVENT_WARNING = "Warning";
String WEBLOGIC_OPERATOR_COMPONENT = "weblogic.operator";
Expand All @@ -31,4 +32,6 @@ public interface EventConstants {
= "Retrying the processing of domain resource %s after one or more failed attempts";
String DOMAIN_PROCESSING_ABORTED_PATTERN
= "Aborting the processing of domain resource %s permanently due to: %s";
String DOMAIN_VALIDATION_ERROR_PATTERN
= "Validation error in domain resource %s: %s";
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ static boolean isInspectionRequired(Packet packet) {
return domainRequiresIntrospectionInCurrentMakeRight(packet) && !wasInspectionRun(packet);
}

boolean isExplicitRecheck();

static boolean isExplicitRecheck(Packet packet) {
return fromPacket(packet).map(MakeRightDomainOperation::isExplicitRecheck).orElse(false);
}

/**
* Returns true if the packet contains info about a domain that requires introspection in a sequences of steps
* before server pods are created or modified.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

package oracle.kubernetes.operator.helpers;

import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;

import io.kubernetes.client.openapi.models.V1ConfigMap;
import io.kubernetes.client.openapi.models.V1ConfigMapList;
Expand All @@ -14,8 +14,11 @@
import io.kubernetes.client.openapi.models.V1Secret;
import io.kubernetes.client.openapi.models.V1SecretList;
import oracle.kubernetes.operator.DomainStatusUpdater;
import oracle.kubernetes.operator.MakeRightDomainOperation;
import oracle.kubernetes.operator.ProcessingConstants;
import oracle.kubernetes.operator.calls.CallResponse;
import oracle.kubernetes.operator.helpers.EventHelper.EventData;
import oracle.kubernetes.operator.helpers.EventHelper.EventItem;
import oracle.kubernetes.operator.logging.LoggingFacade;
import oracle.kubernetes.operator.logging.LoggingFactory;
import oracle.kubernetes.operator.logging.MessageKeys;
Expand All @@ -24,13 +27,13 @@
import oracle.kubernetes.operator.work.NextAction;
import oracle.kubernetes.operator.work.Packet;
import oracle.kubernetes.operator.work.Step;
import oracle.kubernetes.weblogic.domain.model.Cluster;
import oracle.kubernetes.weblogic.domain.model.Domain;
import oracle.kubernetes.weblogic.domain.model.DomainSpec;
import oracle.kubernetes.weblogic.domain.model.KubernetesResourceLookup;
import oracle.kubernetes.weblogic.domain.model.ManagedServer;

import static java.lang.System.lineSeparator;
import static oracle.kubernetes.operator.DomainStatusUpdater.BAD_DOMAIN;
import static oracle.kubernetes.operator.helpers.EventHelper.createEventStep;
import static oracle.kubernetes.operator.logging.MessageKeys.DOMAIN_VALIDATION_FAILED;

public class DomainValidationSteps {
Expand Down Expand Up @@ -146,43 +149,57 @@ static class ValidateDomainTopologyStep extends Step {
}


private void logAndAddWarning(List<String> validationWarnings, String messageKey, Object... params) {
LOGGER.warning(messageKey, params);
validationWarnings.add(LOGGER.formatMessage(messageKey, params));
private void logAndAddValidationWarning(DomainPresenceInfo info, String msgId, Object... messageParams) {
LOGGER.warning(msgId, messageParams);
info.addValidationWarning(LOGGER.formatMessage(msgId, messageParams));
}

private void validate(DomainPresenceInfo info, WlsDomainConfig wlsDomainConfig) {
List<String> validationWarnings = new ArrayList<>();
DomainSpec domainSpec = info.getDomain().getSpec();

Domain domain = info.getDomain();
info.clearValidationWarnings();

// log warnings for clusters that are specified in domain resource but not configured
// log warnings for each cluster that is specified in domain resource but not configured
// in the WebLogic domain
for (Cluster cluster : domain.getSpec().getClusters()) {
if (!wlsDomainConfig.containsCluster(cluster.getClusterName())) {
logAndAddWarning(validationWarnings, MessageKeys.NO_CLUSTER_IN_DOMAIN, cluster.getClusterName());
}
}
// log warnings for managed servers that are specified in domain resource but not configured
domainSpec.getClusters().forEach(
c -> warnIfClusterDoesNotExist(wlsDomainConfig, c.getClusterName(), info));

// log warnings for each managed server that is specified in domain resource but not configured
// in the WebLogic domain
for (ManagedServer server : domain.getSpec().getManagedServers()) {
if (!wlsDomainConfig.containsServer(server.getServerName())) {
logAndAddWarning(validationWarnings, MessageKeys.NO_MANAGED_SERVER_IN_DOMAIN, server.getServerName());
}
domainSpec.getManagedServers().forEach(
s -> warnIfServerDoesNotExist(wlsDomainConfig, s.getServerName(), info));
}

private void warnIfClusterDoesNotExist(WlsDomainConfig domainConfig,
String clusterName, DomainPresenceInfo info) {
if (!domainConfig.containsCluster(clusterName)) {
logAndAddValidationWarning(info, MessageKeys.NO_CLUSTER_IN_DOMAIN, clusterName);
}
info.clearValidationWarnings();
for (String warning: validationWarnings) {
info.addValidationWarning(warning);
}

private void warnIfServerDoesNotExist(WlsDomainConfig domainConfig,
String serverName, DomainPresenceInfo info) {
if (!domainConfig.containsServer(serverName)) {
logAndAddValidationWarning(info, MessageKeys.NO_MANAGED_SERVER_IN_DOMAIN, serverName);
}
}

@Override
public NextAction apply(Packet packet) {
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class);
boolean isExplicitRecheck = MakeRightDomainOperation.isExplicitRecheck(packet);
WlsDomainConfig wlsDomainConfig = (WlsDomainConfig) packet.get(ProcessingConstants.DOMAIN_TOPOLOGY);
validate(info, wlsDomainConfig);

return doNext(packet);
return doNext(getNextStep(info.getValidationWarningsAsString(), isExplicitRecheck, getNext()), packet);
}

private Step getNextStep(String message, boolean skipCreateEvent, Step next) {
return skipCreateEvent
? next
: Optional.ofNullable((message))
.map(m -> createEventStep(new EventData(EventItem.DOMAIN_VALIDATION_ERROR, m), next))
.orElse(next);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
import static oracle.kubernetes.operator.EventConstants.DOMAIN_PROCESSING_RETRYING_PATTERN;
import static oracle.kubernetes.operator.EventConstants.DOMAIN_PROCESSING_STARTING_EVENT;
import static oracle.kubernetes.operator.EventConstants.DOMAIN_PROCESSING_STARTING_PATTERN;
import static oracle.kubernetes.operator.EventConstants.DOMAIN_VALIDATION_ERROR_EVENT;
import static oracle.kubernetes.operator.EventConstants.DOMAIN_VALIDATION_ERROR_PATTERN;
import static oracle.kubernetes.operator.EventConstants.EVENT_NORMAL;
import static oracle.kubernetes.operator.EventConstants.EVENT_WARNING;
import static oracle.kubernetes.operator.EventConstants.WEBLOGIC_OPERATOR_COMPONENT;
Expand All @@ -60,10 +62,27 @@ public static Step createEventStep(
return new CreateEventStep(eventData);
}

/**
* Factory for {@link Step} that asynchronously create an event.
*
* @param eventData event item
* @param next next step
* @return Step for creating an event
*/
public static Step createEventStep(
EventData eventData, Step next) {
return new CreateEventStep(eventData, next);
}

public static class CreateEventStep extends Step {
private final EventData eventData;

CreateEventStep(EventData eventData) {
this(eventData, null);
}

CreateEventStep(EventData eventData, Step next) {
super(next);
this.eventData = eventData;
}

Expand Down Expand Up @@ -254,6 +273,28 @@ public String getMessage(DomainPresenceInfo info, EventData eventData) {
}

},
DOMAIN_VALIDATION_ERROR {
@Override
public String getType() {
return EVENT_WARNING;
}

@Override
public String getReason() {
return DOMAIN_VALIDATION_ERROR_EVENT;
}

@Override
public String getPattern() {
return DOMAIN_VALIDATION_ERROR_PATTERN;
}

@Override
public String getMessage(DomainPresenceInfo info, EventData eventData) {
return String.format(DOMAIN_VALIDATION_ERROR_PATTERN,
info.getDomainUid(), Optional.ofNullable(eventData.message).orElse(""));
}
},
EMPTY {
@Override
protected String getPattern() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ public NextAction apply(Packet packet) {
packet);
}

return doNext(DomainValidationSteps.createValidateDomainTopologyStep(getNext()), packet);
return doNext(packet);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,13 @@
import javax.annotation.Nonnull;

import oracle.kubernetes.operator.DomainStatusUpdater;
import oracle.kubernetes.operator.MakeRightDomainOperation;
import oracle.kubernetes.operator.ProcessingConstants;
import oracle.kubernetes.operator.helpers.DomainPresenceInfo;
import oracle.kubernetes.operator.helpers.DomainPresenceInfo.ServerShutdownInfo;
import oracle.kubernetes.operator.helpers.DomainPresenceInfo.ServerStartupInfo;
import oracle.kubernetes.operator.helpers.EventHelper.EventData;
import oracle.kubernetes.operator.helpers.EventHelper.EventItem;
import oracle.kubernetes.operator.helpers.PodHelper;
import oracle.kubernetes.operator.logging.LoggingFacade;
import oracle.kubernetes.operator.logging.LoggingFactory;
Expand All @@ -37,6 +40,7 @@
import static java.util.Comparator.comparing;
import static oracle.kubernetes.operator.DomainStatusUpdater.MANAGED_SERVERS_STARTING_PROGRESS_REASON;
import static oracle.kubernetes.operator.DomainStatusUpdater.createProgressingStartedEventStep;
import static oracle.kubernetes.operator.helpers.EventHelper.createEventStep;

public class ManagedServersUpStep extends Step {
static final String SERVERS_UP_MSG =
Expand Down Expand Up @@ -97,9 +101,11 @@ private static void insert(List<Step> steps, Step step) {
public NextAction apply(Packet packet) {
LOGGER.entering();
DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class);
boolean isExplicitRecheck = MakeRightDomainOperation.isExplicitRecheck(packet);
WlsDomainConfig config = (WlsDomainConfig) packet.get(ProcessingConstants.DOMAIN_TOPOLOGY);

ServersUpStepFactory factory = new ServersUpStepFactory(config, info.getDomain());
ServersUpStepFactory factory = new ServersUpStepFactory(config,
info.getDomain(), info, isExplicitRecheck);

if (LOGGER.isFineEnabled()) {
LOGGER.fine(SERVERS_UP_MSG, factory.domain.getDomainUid(), getRunningServers(info));
Expand Down Expand Up @@ -157,15 +163,21 @@ Step createServerStep(
static class ServersUpStepFactory {
final WlsDomainConfig domainTopology;
final Domain domain;
final DomainPresenceInfo info;
final boolean skipEventCreation;
List<ServerStartupInfo> startupInfos;
List<ServerShutdownInfo> shutdownInfos = new ArrayList<>();
final Collection<String> servers = new ArrayList<>();
final Collection<String> preCreateServers = new ArrayList<>();
final Map<String, Integer> replicas = new HashMap<>();
private Step eventStep;

ServersUpStepFactory(WlsDomainConfig domainTopology, Domain domain) {
ServersUpStepFactory(WlsDomainConfig domainTopology, Domain domain,
DomainPresenceInfo info, boolean skipEventCreation) {
this.domainTopology = domainTopology;
this.domain = domain;
this.info = info;
this.skipEventCreation = skipEventCreation;
}

/**
Expand Down Expand Up @@ -223,11 +235,8 @@ boolean exceedsMaxConfiguredClusterSize(WlsClusterConfig clusterConfig) {
}

private Step createNextStep(Step next) {
if (servers.isEmpty()) {
return next;
} else {
return new ManagedServerUpIteratorStep(getStartupInfos(), next);
}
Step nextStep = (servers.isEmpty()) ? next : new ManagedServerUpIteratorStep(getStartupInfos(), next);
return Optional.ofNullable(eventStep).map(s -> Step.chain(s, nextStep)).orElse(nextStep);
}

Collection<ServerStartupInfo> getStartupInfos() {
Expand Down Expand Up @@ -270,8 +279,7 @@ private Integer getReplicaCount(String clusterName) {
private void logIfReplicasExceedsClusterServersMax(WlsClusterConfig clusterConfig) {
if (exceedsMaxConfiguredClusterSize(clusterConfig)) {
String clusterName = clusterConfig.getClusterName();
LOGGER.warning(
MessageKeys.REPLICAS_EXCEEDS_TOTAL_CLUSTER_SERVER_COUNT,
addValidationErrorEventAndWarning(MessageKeys.REPLICAS_EXCEEDS_TOTAL_CLUSTER_SERVER_COUNT,
domain.getReplicaCount(clusterName),
clusterConfig.getMaxDynamicClusterSize(),
clusterName);
Expand All @@ -281,18 +289,26 @@ private void logIfReplicasExceedsClusterServersMax(WlsClusterConfig clusterConfi
private void logIfReplicasLessThanClusterServersMin(WlsClusterConfig clusterConfig) {
if (lessThanMinConfiguredClusterSize(clusterConfig)) {
String clusterName = clusterConfig.getClusterName();
LOGGER.warning(
MessageKeys.REPLICAS_LESS_THAN_TOTAL_CLUSTER_SERVER_COUNT,
domain.getReplicaCount(clusterName),
clusterConfig.getMinDynamicClusterSize(),
clusterName);
addValidationErrorEventAndWarning(MessageKeys.REPLICAS_LESS_THAN_TOTAL_CLUSTER_SERVER_COUNT,
domain.getReplicaCount(clusterName),
clusterConfig.getMinDynamicClusterSize(),
clusterName);

// Reset current replica count so we don't scale down less than minimum
// dynamic cluster size
domain.setReplicaCount(clusterName, clusterConfig.getMinDynamicClusterSize());
}
}

private void addValidationErrorEventAndWarning(String msgId, Object... messageParams) {
LOGGER.warning(msgId, messageParams);
String message = LOGGER.formatMessage(msgId, messageParams);
if (!skipEventCreation) {
eventStep = createEventStep(new EventData(EventItem.DOMAIN_VALIDATION_ERROR, message));
}
info.addValidationWarning(message);
}

private boolean lessThanMinConfiguredClusterSize(WlsClusterConfig clusterConfig) {
if (clusterConfig != null) {
String clusterName = clusterConfig.getClusterName();
Expand Down
Loading