Skip to content

Log introspect pod on failure #1787

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jul 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,13 @@
import javax.annotation.Nonnull;

import io.kubernetes.client.openapi.ApiException;
import io.kubernetes.client.openapi.models.V1ContainerState;
import io.kubernetes.client.openapi.models.V1ContainerStateTerminated;
import io.kubernetes.client.openapi.models.V1ContainerStateWaiting;
import io.kubernetes.client.openapi.models.V1ContainerStatus;
import io.kubernetes.client.openapi.models.V1ObjectMeta;
import io.kubernetes.client.openapi.models.V1Pod;
import io.kubernetes.client.openapi.models.V1PodStatus;
import io.kubernetes.client.util.Watch;
import oracle.kubernetes.operator.TuningParameters.WatchTuning;
import oracle.kubernetes.operator.builders.WatchBuilder;
Expand All @@ -30,12 +35,13 @@
import oracle.kubernetes.operator.watcher.WatchListener;
import oracle.kubernetes.operator.work.Step;

import static oracle.kubernetes.operator.helpers.LegalNames.DOMAIN_INTROSPECTOR_JOB_SUFFIX;

/**
* Watches for changes to pods.
*/
public class PodWatcher extends Watcher<V1Pod> implements WatchListener<V1Pod>, PodAwaiterStepFactory {
private static final LoggingFacade LOGGER = LoggingFactory.getLogger("Operator", "Operator");

private final String namespace;
private final WatchListener<V1Pod> listener;

Expand Down Expand Up @@ -141,6 +147,12 @@ public void receivedResponse(Watch.Response<V1Pod> item) {
switch (item.type) {
case "ADDED":
case "MODIFIED":
if (podName.contains(DOMAIN_INTROSPECTOR_JOB_SUFFIX) && isFailed(pod)) {
LOGGER.info(MessageKeys.INTROSPECTOR_POD_FAILED,
pod.getMetadata().getName(),
pod.getMetadata().getNamespace(),
pod.getStatus().toString());
}
copyOf(getOnModifiedCallbacks(podName)).forEach(c -> c.accept(pod));
break;
case "DELETED":
Expand All @@ -153,6 +165,52 @@ public void receivedResponse(Watch.Response<V1Pod> item) {
LOGGER.exiting();
}

/**
* Test if pod is failed.
* @param pod pob
* @return true, if failed
*/
private static boolean isFailed(V1Pod pod) {
if (pod == null) {
return false;
}

V1PodStatus status = pod.getStatus();
LOGGER.fine(
"PodWatcher.isFailed status of pod " + pod.getMetadata().getName() + ": " + status);
if (status != null) {
java.util.List<V1ContainerStatus> conStatuses = status.getContainerStatuses();
if (conStatuses != null) {
for (V1ContainerStatus conStatus : conStatuses) {
if (!isReady(conStatus)
&& (getContainerStateWaitingMessage(conStatus) != null
|| getContainerStateTerminatedReason(conStatus).contains("Error"))) {
return true;
}
}
}
}
return false;
}

private static boolean isReady(V1ContainerStatus conStatus) {
return Optional.ofNullable(conStatus).map(V1ContainerStatus::getReady).orElse(false);
}

private static String getContainerStateTerminatedReason(V1ContainerStatus conStatus) {
return Optional.of(conStatus)
.map(V1ContainerStatus::getState)
.map(V1ContainerState::getTerminated)
.map(V1ContainerStateTerminated::getReason).orElse("");
}

private static String getContainerStateWaitingMessage(V1ContainerStatus conStatus) {
return Optional.of(conStatus)
.map(V1ContainerStatus::getState)
.map(V1ContainerState::getWaiting)
.map(V1ContainerStateWaiting::getMessage).orElse(null);
}

// make a copy to avoid concurrent modification
private <T> Collection<T> copyOf(Collection<T> collection) {
return new ArrayList<>(collection);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ public class LegalNames {

private static final String SERVER_PATTERN = "%s-%s";
private static final String CLUSTER_SERVICE_PATTERN = "%s-cluster-%s";
private static final String DOMAIN_INTROSPECTOR_JOB_PATTERN = "%s-introspect-domain-job";
public static final String DOMAIN_INTROSPECTOR_JOB_SUFFIX = "-introspect-domain-job";
private static final String DOMAIN_INTROSPECTOR_JOB_PATTERN = "%s" + DOMAIN_INTROSPECTOR_JOB_SUFFIX;
private static final String EXTERNAL_SERVICE_PATTERN = "%s-%s-external";

public static final String DNS_1123_FIELDS_PARAM = "dns1123Fields";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ public class MessageKeys {
public static final String CREATE_CRD_FAILED = "WLSKO-0174";
public static final String INTROSPECTOR_JOB_FAILED = "WLSKO-0175";
public static final String INTROSPECTOR_JOB_FAILED_DETAIL = "WLSKO-0176";
public static final String INTROSPECTOR_POD_FAILED = "WLSKO-0177";

// domain status messages
public static final String DUPLICATE_SERVER_NAME_FOUND = "WLSDO-0001";
Expand Down
1 change: 1 addition & 0 deletions operator/src/main/resources/Operator.properties
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ WLSKO-0174=Create custom resource definition failed: {0}
WLSKO-0175=Job {0} in namespace {1} failed with status {2}. Check log messages \
copied from the introspector pod {3} log for additional information.
WLSKO-0176=Job {1} in namespace {0} failed, job details are {2}
WLSKO-0177=Pod {0} in namespace {1} failed, the pod status is {2}

# Domain status messages

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,15 @@
package oracle.kubernetes.operator;

import java.math.BigInteger;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Function;

import io.kubernetes.client.openapi.ApiException;
import io.kubernetes.client.openapi.models.V1ContainerState;
import io.kubernetes.client.openapi.models.V1ContainerStateTerminated;
import io.kubernetes.client.openapi.models.V1ContainerStateWaiting;
import io.kubernetes.client.openapi.models.V1ContainerStatus;
import io.kubernetes.client.openapi.models.V1ObjectMeta;
import io.kubernetes.client.openapi.models.V1Pod;
import io.kubernetes.client.openapi.models.V1PodCondition;
Expand All @@ -24,6 +30,9 @@

import static oracle.kubernetes.operator.LabelConstants.CREATEDBYOPERATOR_LABEL;
import static oracle.kubernetes.operator.LabelConstants.DOMAINUID_LABEL;
import static oracle.kubernetes.operator.helpers.LegalNames.DOMAIN_INTROSPECTOR_JOB_SUFFIX;
import static oracle.kubernetes.operator.logging.MessageKeys.INTROSPECTOR_POD_FAILED;
import static oracle.kubernetes.utils.LogMatcher.containsInfo;
import static org.hamcrest.Matchers.both;
import static org.hamcrest.Matchers.hasEntry;
import static org.hamcrest.Matchers.is;
Expand All @@ -37,12 +46,30 @@ public class PodWatcherTest extends WatcherTestBase implements WatchListener<V1P
private static final String NAME = "test";
private KubernetesTestSupport testSupport = new KubernetesTestSupport();
private final TerminalStep terminalStep = new TerminalStep();
private java.util.List<com.meterware.simplestub.Memento> mementos = new java.util.ArrayList<>();
private java.util.List<java.util.logging.LogRecord> logRecords = new java.util.ArrayList<>();

@Override
@Before
public void setUp() throws Exception {
super.setUp();
mementos.add(StubWatchFactory.install());
StubWatchFactory.setListener(this);
addMemento(testSupport.install());
mementos.add(
oracle.kubernetes.utils.TestUtils.silenceOperatorLogger()
.collectLogMessages(logRecords, getMessageKeys())
.withLogLevel(java.util.logging.Level.FINE)
.ignoringLoggedExceptions(ApiException.class));
}

private String[] getMessageKeys() {
return new String[] {
getPodFailedMessageKey()
};
}

private String getPodFailedMessageKey() {
return INTROSPECTOR_POD_FAILED;
}

@Override
Expand Down Expand Up @@ -91,6 +118,10 @@ private V1Pod createPod() {
return new V1Pod().metadata(new V1ObjectMeta().namespace(NS).name(NAME));
}

private V1Pod createIntrospectorPod() {
return new V1Pod().metadata(new V1ObjectMeta().namespace(NS).name(NAME + DOMAIN_INTROSPECTOR_JOB_SUFFIX));
}

@Test
public void whenPodInitiallyReady_waitForReadyProceedsImmediately() {
AtomicBoolean stopping = new AtomicBoolean(false);
Expand All @@ -116,6 +147,24 @@ private V1Pod markPodReady(V1Pod pod) {
return pod.status(new V1PodStatus().phase("Running").addConditionsItem(createCondition("Ready")));
}

private V1Pod addContainerStateWaitingMessage(V1Pod pod) {
return pod.status(new V1PodStatus()
.containerStatuses(java.util.Collections.singletonList(
new V1ContainerStatus()
.ready(false)
.state(new V1ContainerState().waiting(
new V1ContainerStateWaiting().message("Error"))))));
}

private V1Pod addContainerStateTerminatedReason(V1Pod pod) {
return pod.status(new V1PodStatus()
.containerStatuses(java.util.Collections.singletonList(
new V1ContainerStatus()
.ready(false)
.state(new V1ContainerState().terminated(
new V1ContainerStateTerminated().reason("Error"))))));
}

@SuppressWarnings("SameParameterValue")
private V1PodCondition createCondition(String type) {
return new V1PodCondition().type(type).status("True");
Expand Down Expand Up @@ -198,6 +247,22 @@ public void whenPodNotReadyLaterAndThenReady_runNextStep() {
assertThat(terminalStep.wasRun(), is(true));
}

@Test
public void whenIntrospectPodNotReadyWithTerminatedReason_logPodStatus() {
sendIntrospectorPodModifiedWatchAfterWaitForReady(this::addContainerStateTerminatedReason);

assertThat(terminalStep.wasRun(), is(false));
assertThat(logRecords, containsInfo(getPodFailedMessageKey()));
}

@Test
public void whenIntrospectPodNotReadyWithWaitingMessage_logPodStatus() {
sendIntrospectorPodModifiedWatchAfterWaitForReady(this::addContainerStateWaitingMessage);

assertThat(terminalStep.wasRun(), is(false));
assertThat(logRecords, containsInfo(getPodFailedMessageKey()));
}

// Starts the waitForReady step with an incomplete pod and sends a watch indicating that the pod has changed
@SafeVarargs
private void sendPodModifiedWatchAfterWaitForReady(Function<V1Pod,V1Pod>... modifiers) {
Expand All @@ -215,6 +280,23 @@ private void sendPodModifiedWatchAfterWaitForReady(Function<V1Pod,V1Pod>... modi
}
}

// Starts the waitForReady step with an incomplete pod and sends a watch indicating that the pod has changed
@SafeVarargs
private void sendIntrospectorPodModifiedWatchAfterWaitForReady(Function<V1Pod,V1Pod>... modifiers) {
AtomicBoolean stopping = new AtomicBoolean(false);
PodWatcher watcher = createWatcher(stopping);
testSupport.defineResources(createIntrospectorPod());

try {
testSupport.runSteps(watcher.waitForReady(createIntrospectorPod(), terminalStep));
for (Function<V1Pod,V1Pod> modifier : modifiers) {
watcher.receivedResponse(new Watch.Response<>("MODIFIED", modifier.apply(createIntrospectorPod())));
}
} finally {
stopping.set(true);
}
}

@Test
public void whenPodDeletedOnFirstRead_runNextStep() {
AtomicBoolean stopping = new AtomicBoolean(false);
Expand Down