Skip to content

Commit bce730f

Browse files
authored
Log introspect pod on failure (#1787)
* Log introspector pod on failure * Minor change * cleanup * Fix a merge issue * Unit tests
1 parent 06a0595 commit bce730f

File tree

5 files changed

+146
-3
lines changed

5 files changed

+146
-3
lines changed

operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,13 @@
1515
import javax.annotation.Nonnull;
1616

1717
import io.kubernetes.client.openapi.ApiException;
18+
import io.kubernetes.client.openapi.models.V1ContainerState;
19+
import io.kubernetes.client.openapi.models.V1ContainerStateTerminated;
20+
import io.kubernetes.client.openapi.models.V1ContainerStateWaiting;
21+
import io.kubernetes.client.openapi.models.V1ContainerStatus;
1822
import io.kubernetes.client.openapi.models.V1ObjectMeta;
1923
import io.kubernetes.client.openapi.models.V1Pod;
24+
import io.kubernetes.client.openapi.models.V1PodStatus;
2025
import io.kubernetes.client.util.Watch;
2126
import oracle.kubernetes.operator.TuningParameters.WatchTuning;
2227
import oracle.kubernetes.operator.builders.WatchBuilder;
@@ -30,12 +35,13 @@
3035
import oracle.kubernetes.operator.watcher.WatchListener;
3136
import oracle.kubernetes.operator.work.Step;
3237

38+
import static oracle.kubernetes.operator.helpers.LegalNames.DOMAIN_INTROSPECTOR_JOB_SUFFIX;
39+
3340
/**
3441
* Watches for changes to pods.
3542
*/
3643
public class PodWatcher extends Watcher<V1Pod> implements WatchListener<V1Pod>, PodAwaiterStepFactory {
3744
private static final LoggingFacade LOGGER = LoggingFactory.getLogger("Operator", "Operator");
38-
3945
private final String namespace;
4046
private final WatchListener<V1Pod> listener;
4147

@@ -141,6 +147,12 @@ public void receivedResponse(Watch.Response<V1Pod> item) {
141147
switch (item.type) {
142148
case "ADDED":
143149
case "MODIFIED":
150+
if (podName.contains(DOMAIN_INTROSPECTOR_JOB_SUFFIX) && isFailed(pod)) {
151+
LOGGER.info(MessageKeys.INTROSPECTOR_POD_FAILED,
152+
pod.getMetadata().getName(),
153+
pod.getMetadata().getNamespace(),
154+
pod.getStatus().toString());
155+
}
144156
copyOf(getOnModifiedCallbacks(podName)).forEach(c -> c.accept(pod));
145157
break;
146158
case "DELETED":
@@ -153,6 +165,52 @@ public void receivedResponse(Watch.Response<V1Pod> item) {
153165
LOGGER.exiting();
154166
}
155167

168+
/**
169+
* Test if pod is failed.
170+
* @param pod pob
171+
* @return true, if failed
172+
*/
173+
private static boolean isFailed(V1Pod pod) {
174+
if (pod == null) {
175+
return false;
176+
}
177+
178+
V1PodStatus status = pod.getStatus();
179+
LOGGER.fine(
180+
"PodWatcher.isFailed status of pod " + pod.getMetadata().getName() + ": " + status);
181+
if (status != null) {
182+
java.util.List<V1ContainerStatus> conStatuses = status.getContainerStatuses();
183+
if (conStatuses != null) {
184+
for (V1ContainerStatus conStatus : conStatuses) {
185+
if (!isReady(conStatus)
186+
&& (getContainerStateWaitingMessage(conStatus) != null
187+
|| getContainerStateTerminatedReason(conStatus).contains("Error"))) {
188+
return true;
189+
}
190+
}
191+
}
192+
}
193+
return false;
194+
}
195+
196+
private static boolean isReady(V1ContainerStatus conStatus) {
197+
return Optional.ofNullable(conStatus).map(V1ContainerStatus::getReady).orElse(false);
198+
}
199+
200+
private static String getContainerStateTerminatedReason(V1ContainerStatus conStatus) {
201+
return Optional.of(conStatus)
202+
.map(V1ContainerStatus::getState)
203+
.map(V1ContainerState::getTerminated)
204+
.map(V1ContainerStateTerminated::getReason).orElse("");
205+
}
206+
207+
private static String getContainerStateWaitingMessage(V1ContainerStatus conStatus) {
208+
return Optional.of(conStatus)
209+
.map(V1ContainerStatus::getState)
210+
.map(V1ContainerState::getWaiting)
211+
.map(V1ContainerStateWaiting::getMessage).orElse(null);
212+
}
213+
156214
// make a copy to avoid concurrent modification
157215
private <T> Collection<T> copyOf(Collection<T> collection) {
158216
return new ArrayList<>(collection);

operator/src/main/java/oracle/kubernetes/operator/helpers/LegalNames.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ public class LegalNames {
1616

1717
private static final String SERVER_PATTERN = "%s-%s";
1818
private static final String CLUSTER_SERVICE_PATTERN = "%s-cluster-%s";
19-
private static final String DOMAIN_INTROSPECTOR_JOB_PATTERN = "%s-introspect-domain-job";
19+
public static final String DOMAIN_INTROSPECTOR_JOB_SUFFIX = "-introspect-domain-job";
20+
private static final String DOMAIN_INTROSPECTOR_JOB_PATTERN = "%s" + DOMAIN_INTROSPECTOR_JOB_SUFFIX;
2021
private static final String EXTERNAL_SERVICE_PATTERN = "%s-%s-external";
2122

2223
public static final String DNS_1123_FIELDS_PARAM = "dns1123Fields";

operator/src/main/java/oracle/kubernetes/operator/logging/MessageKeys.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ public class MessageKeys {
132132
public static final String CREATE_CRD_FAILED = "WLSKO-0174";
133133
public static final String INTROSPECTOR_JOB_FAILED = "WLSKO-0175";
134134
public static final String INTROSPECTOR_JOB_FAILED_DETAIL = "WLSKO-0176";
135+
public static final String INTROSPECTOR_POD_FAILED = "WLSKO-0177";
135136

136137
// domain status messages
137138
public static final String DUPLICATE_SERVER_NAME_FOUND = "WLSDO-0001";

operator/src/main/resources/Operator.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ WLSKO-0174=Create custom resource definition failed: {0}
186186
WLSKO-0175=Job {0} in namespace {1} failed with status {2}. Check log messages \
187187
copied from the introspector pod {3} log for additional information.
188188
WLSKO-0176=Job {1} in namespace {0} failed, job details are {2}
189+
WLSKO-0177=Pod {0} in namespace {1} failed, the pod status is {2}
189190

190191
# Domain status messages
191192

operator/src/test/java/oracle/kubernetes/operator/PodWatcherTest.java

Lines changed: 83 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,15 @@
44
package oracle.kubernetes.operator;
55

66
import java.math.BigInteger;
7+
import java.util.List;
78
import java.util.concurrent.atomic.AtomicBoolean;
89
import java.util.function.Function;
910

11+
import io.kubernetes.client.openapi.ApiException;
12+
import io.kubernetes.client.openapi.models.V1ContainerState;
13+
import io.kubernetes.client.openapi.models.V1ContainerStateTerminated;
14+
import io.kubernetes.client.openapi.models.V1ContainerStateWaiting;
15+
import io.kubernetes.client.openapi.models.V1ContainerStatus;
1016
import io.kubernetes.client.openapi.models.V1ObjectMeta;
1117
import io.kubernetes.client.openapi.models.V1Pod;
1218
import io.kubernetes.client.openapi.models.V1PodCondition;
@@ -24,6 +30,9 @@
2430

2531
import static oracle.kubernetes.operator.LabelConstants.CREATEDBYOPERATOR_LABEL;
2632
import static oracle.kubernetes.operator.LabelConstants.DOMAINUID_LABEL;
33+
import static oracle.kubernetes.operator.helpers.LegalNames.DOMAIN_INTROSPECTOR_JOB_SUFFIX;
34+
import static oracle.kubernetes.operator.logging.MessageKeys.INTROSPECTOR_POD_FAILED;
35+
import static oracle.kubernetes.utils.LogMatcher.containsInfo;
2736
import static org.hamcrest.Matchers.both;
2837
import static org.hamcrest.Matchers.hasEntry;
2938
import static org.hamcrest.Matchers.is;
@@ -37,12 +46,30 @@ public class PodWatcherTest extends WatcherTestBase implements WatchListener<V1P
3746
private static final String NAME = "test";
3847
private KubernetesTestSupport testSupport = new KubernetesTestSupport();
3948
private final TerminalStep terminalStep = new TerminalStep();
49+
private java.util.List<com.meterware.simplestub.Memento> mementos = new java.util.ArrayList<>();
50+
private java.util.List<java.util.logging.LogRecord> logRecords = new java.util.ArrayList<>();
4051

4152
@Override
4253
@Before
4354
public void setUp() throws Exception {
44-
super.setUp();
55+
mementos.add(StubWatchFactory.install());
56+
StubWatchFactory.setListener(this);
4557
addMemento(testSupport.install());
58+
mementos.add(
59+
oracle.kubernetes.utils.TestUtils.silenceOperatorLogger()
60+
.collectLogMessages(logRecords, getMessageKeys())
61+
.withLogLevel(java.util.logging.Level.FINE)
62+
.ignoringLoggedExceptions(ApiException.class));
63+
}
64+
65+
private String[] getMessageKeys() {
66+
return new String[] {
67+
getPodFailedMessageKey()
68+
};
69+
}
70+
71+
private String getPodFailedMessageKey() {
72+
return INTROSPECTOR_POD_FAILED;
4673
}
4774

4875
@Override
@@ -91,6 +118,10 @@ private V1Pod createPod() {
91118
return new V1Pod().metadata(new V1ObjectMeta().namespace(NS).name(NAME));
92119
}
93120

121+
private V1Pod createIntrospectorPod() {
122+
return new V1Pod().metadata(new V1ObjectMeta().namespace(NS).name(NAME + DOMAIN_INTROSPECTOR_JOB_SUFFIX));
123+
}
124+
94125
@Test
95126
public void whenPodInitiallyReady_waitForReadyProceedsImmediately() {
96127
AtomicBoolean stopping = new AtomicBoolean(false);
@@ -116,6 +147,24 @@ private V1Pod markPodReady(V1Pod pod) {
116147
return pod.status(new V1PodStatus().phase("Running").addConditionsItem(createCondition("Ready")));
117148
}
118149

150+
private V1Pod addContainerStateWaitingMessage(V1Pod pod) {
151+
return pod.status(new V1PodStatus()
152+
.containerStatuses(java.util.Collections.singletonList(
153+
new V1ContainerStatus()
154+
.ready(false)
155+
.state(new V1ContainerState().waiting(
156+
new V1ContainerStateWaiting().message("Error"))))));
157+
}
158+
159+
private V1Pod addContainerStateTerminatedReason(V1Pod pod) {
160+
return pod.status(new V1PodStatus()
161+
.containerStatuses(java.util.Collections.singletonList(
162+
new V1ContainerStatus()
163+
.ready(false)
164+
.state(new V1ContainerState().terminated(
165+
new V1ContainerStateTerminated().reason("Error"))))));
166+
}
167+
119168
@SuppressWarnings("SameParameterValue")
120169
private V1PodCondition createCondition(String type) {
121170
return new V1PodCondition().type(type).status("True");
@@ -198,6 +247,22 @@ public void whenPodNotReadyLaterAndThenReady_runNextStep() {
198247
assertThat(terminalStep.wasRun(), is(true));
199248
}
200249

250+
@Test
251+
public void whenIntrospectPodNotReadyWithTerminatedReason_logPodStatus() {
252+
sendIntrospectorPodModifiedWatchAfterWaitForReady(this::addContainerStateTerminatedReason);
253+
254+
assertThat(terminalStep.wasRun(), is(false));
255+
assertThat(logRecords, containsInfo(getPodFailedMessageKey()));
256+
}
257+
258+
@Test
259+
public void whenIntrospectPodNotReadyWithWaitingMessage_logPodStatus() {
260+
sendIntrospectorPodModifiedWatchAfterWaitForReady(this::addContainerStateWaitingMessage);
261+
262+
assertThat(terminalStep.wasRun(), is(false));
263+
assertThat(logRecords, containsInfo(getPodFailedMessageKey()));
264+
}
265+
201266
// Starts the waitForReady step with an incomplete pod and sends a watch indicating that the pod has changed
202267
@SafeVarargs
203268
private void sendPodModifiedWatchAfterWaitForReady(Function<V1Pod,V1Pod>... modifiers) {
@@ -215,6 +280,23 @@ private void sendPodModifiedWatchAfterWaitForReady(Function<V1Pod,V1Pod>... modi
215280
}
216281
}
217282

283+
// Starts the waitForReady step with an incomplete pod and sends a watch indicating that the pod has changed
284+
@SafeVarargs
285+
private void sendIntrospectorPodModifiedWatchAfterWaitForReady(Function<V1Pod,V1Pod>... modifiers) {
286+
AtomicBoolean stopping = new AtomicBoolean(false);
287+
PodWatcher watcher = createWatcher(stopping);
288+
testSupport.defineResources(createIntrospectorPod());
289+
290+
try {
291+
testSupport.runSteps(watcher.waitForReady(createIntrospectorPod(), terminalStep));
292+
for (Function<V1Pod,V1Pod> modifier : modifiers) {
293+
watcher.receivedResponse(new Watch.Response<>("MODIFIED", modifier.apply(createIntrospectorPod())));
294+
}
295+
} finally {
296+
stopping.set(true);
297+
}
298+
}
299+
218300
@Test
219301
public void whenPodDeletedOnFirstRead_runNextStep() {
220302
AtomicBoolean stopping = new AtomicBoolean(false);

0 commit comments

Comments
 (0)