Skip to content

Error status handler improvements #742

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions docs/documentation/features.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,19 +166,21 @@ A successful execution resets the retry.

### Setting Error Status After Last Retry Attempt

In order to facilitate error reporting in case a last retry attempt fails, Reconciler can implement the following
In order to facilitate error reporting Reconciler can implement the following
[interface](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/api/reconciler/ErrorStatusHandler.java):

```java
public interface ErrorStatusHandler<T extends CustomResource<?, ?>> {
public interface ErrorStatusHandler<T extends HasMetadata> {

T updateErrorStatus(T resource, RuntimeException e);
Optional<T> updateErrorStatus(T resource, RetryInfo retryInfo, RuntimeException e);

}
```

The `updateErrorStatus` resource is called when it's the last retry attempt according the retry configuration and the
reconciler execution still resulted in a runtime exception.
The `updateErrorStatus` method is called in case an exception is thrown from the reconciler. It is also called when
there is no retry configured, just after the reconciler execution.
In the first call the `RetryInfo.getAttemptCount()` is always zero, since it is not a result of a retry
(regardless if retry is configured or not).

The result of the method call is used to make a status update on the custom resource. This is always a sub-resource
update request, so no update on custom resource itself (like spec of metadata) happens. Note that this update request
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package io.javaoperatorsdk.operator.api.reconciler;

import java.util.Optional;

import io.fabric8.kubernetes.api.model.HasMetadata;

public interface ErrorStatusHandler<T extends HasMetadata> {
Expand All @@ -19,9 +21,10 @@ public interface ErrorStatusHandler<T extends HasMetadata> {
* should not be updates on custom resource after it is marked for deletion.
*
* @param resource to update the status on
* @param retryInfo
* @param e exception thrown from the reconciler
* @return the updated resource
*/
T updateErrorStatus(T resource, RuntimeException e);
Optional<T> updateErrorStatus(T resource, RetryInfo retryInfo, RuntimeException e);

}
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,7 @@
import io.fabric8.kubernetes.client.dsl.Resource;
import io.javaoperatorsdk.operator.api.ObservedGenerationAware;
import io.javaoperatorsdk.operator.api.config.ControllerConfiguration;
import io.javaoperatorsdk.operator.api.reconciler.BaseControl;
import io.javaoperatorsdk.operator.api.reconciler.Context;
import io.javaoperatorsdk.operator.api.reconciler.DefaultContext;
import io.javaoperatorsdk.operator.api.reconciler.DeleteControl;
import io.javaoperatorsdk.operator.api.reconciler.ErrorStatusHandler;
import io.javaoperatorsdk.operator.api.reconciler.Reconciler;
import io.javaoperatorsdk.operator.api.reconciler.UpdateControl;
import io.javaoperatorsdk.operator.api.reconciler.*;
import io.javaoperatorsdk.operator.processing.Controller;

import static io.javaoperatorsdk.operator.processing.KubernetesResourceUtils.getName;
Expand Down Expand Up @@ -114,7 +108,7 @@ private PostExecutionControl<R> handleReconcile(
cloneResourceForErrorStatusHandlerIfNeeded(originalResource, context);
return reconcileExecution(executionScope, resourceForExecution, originalResource, context);
} catch (RuntimeException e) {
handleLastAttemptErrorStatusHandler(originalResource, context, e);
handleErrorStatusHandler(originalResource, context, e);
throw e;
}
}
Expand All @@ -128,7 +122,7 @@ private PostExecutionControl<R> handleReconcile(
* place for status update.
*/
private R cloneResourceForErrorStatusHandlerIfNeeded(R resource, Context context) {
if (isLastAttemptOfRetryAndErrorStatusHandlerPresent(context) ||
if (isErrorStatusHandlerPresent() ||
shouldUpdateObservedGenerationAutomatically(resource)) {
return configuration().getConfigurationService().getResourceCloner().clone(resource);
} else {
Expand Down Expand Up @@ -164,26 +158,32 @@ && shouldUpdateObservedGenerationAutomatically(resourceForExecution)) {
return createPostExecutionControl(updatedCustomResource, updateControl);
}

private void handleLastAttemptErrorStatusHandler(R resource, Context context,
private void handleErrorStatusHandler(R resource, Context context,
RuntimeException e) {
if (isLastAttemptOfRetryAndErrorStatusHandlerPresent(context)) {
if (isErrorStatusHandlerPresent()) {
try {
var retryInfo = context.getRetryInfo().orElse(new RetryInfo() {
@Override
public int getAttemptCount() {
return 0;
}

@Override
public boolean isLastAttempt() {
return controller.getConfiguration().getRetryConfiguration() == null;
}
});
var updatedResource = ((ErrorStatusHandler<R>) controller.getReconciler())
.updateErrorStatus(resource, e);
customResourceFacade.updateStatus(updatedResource);
.updateErrorStatus(resource, retryInfo, e);
updatedResource.ifPresent(customResourceFacade::updateStatus);
} catch (RuntimeException ex) {
log.error("Error during error status handling.", ex);
}
}
}

private boolean isLastAttemptOfRetryAndErrorStatusHandlerPresent(Context context) {
if (context.getRetryInfo().isPresent()) {
return context.getRetryInfo().get().isLastAttempt()
&& controller.getReconciler() instanceof ErrorStatusHandler;
} else {
return false;
}
private boolean isErrorStatusHandlerPresent() {
return controller.getReconciler() instanceof ErrorStatusHandler;
}

private R updateStatusGenerationAware(R resource) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package io.javaoperatorsdk.operator.processing.event;

import java.util.ArrayList;
import java.util.Optional;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
Expand All @@ -11,6 +12,7 @@
import io.fabric8.kubernetes.api.model.ObjectMeta;
import io.fabric8.kubernetes.client.CustomResource;
import io.javaoperatorsdk.operator.TestUtils;
import io.javaoperatorsdk.operator.api.config.Cloner;
import io.javaoperatorsdk.operator.api.config.ConfigurationService;
import io.javaoperatorsdk.operator.api.config.ControllerConfiguration;
import io.javaoperatorsdk.operator.api.monitoring.Metrics;
Expand Down Expand Up @@ -56,9 +58,12 @@ private <R extends HasMetadata> ReconciliationDispatcher<R> init(R customResourc
when(configuration.getName()).thenReturn("EventDispatcherTestController");
when(configService.getMetrics()).thenReturn(Metrics.NOOP);
when(configuration.getConfigurationService()).thenReturn(configService);
when(configService.getResourceCloner()).thenReturn(ConfigurationService.DEFAULT_CLONER);
when(reconciler.reconcile(eq(customResource), any()))
.thenReturn(UpdateControl.updateResource(customResource));
when(configService.getResourceCloner()).thenReturn(new Cloner() {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why was this changed?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To make sure to make easier to do when(..).thenReturn() with mockito. Since otherwise its a different instance.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but then you're not really testing the actual behavior?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be understood as a form of a mocking of that clone method / cloner. The goal here is not to test the cloner but only if it is called in case it's relevant.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand but the problem is what if the behavior changes when the instance is not actually cloned?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually not, is just what instance we move around, for some special cases.

@Override
public <R extends HasMetadata> R clone(R object) {
return object;
}
});
when(reconciler.cleanup(eq(customResource), any()))
.thenReturn(DeleteControl.defaultDelete());
when(customResourceFacade.replaceWithLock(any())).thenReturn(null);
Expand Down Expand Up @@ -351,9 +356,9 @@ void callErrorStatusHandlerIfImplemented() {

when(reconciler.reconcile(any(), any()))
.thenThrow(new IllegalStateException("Error Status Test"));
when(((ErrorStatusHandler) reconciler).updateErrorStatus(any(), any())).then(a -> {
when(((ErrorStatusHandler) reconciler).updateErrorStatus(any(), any(), any())).then(a -> {
testCustomResource.getStatus().setConfigMapStatus(ERROR_MESSAGE);
return testCustomResource;
return Optional.of(testCustomResource);
});

reconciliationDispatcher.handleExecution(
Expand All @@ -373,7 +378,25 @@ public boolean isLastAttempt() {

verify(customResourceFacade, times(1)).updateStatus(testCustomResource);
verify(((ErrorStatusHandler) reconciler), times(1)).updateErrorStatus(eq(testCustomResource),
any());
any(), any());
}

@Test
void callErrorStatusHandlerEvenOnFirstError() {
testCustomResource.addFinalizer(DEFAULT_FINALIZER);

when(reconciler.reconcile(any(), any()))
.thenThrow(new IllegalStateException("Error Status Test"));
when(((ErrorStatusHandler) reconciler).updateErrorStatus(any(), any(), any())).then(a -> {
testCustomResource.getStatus().setConfigMapStatus(ERROR_MESSAGE);
return Optional.of(testCustomResource);
});
reconciliationDispatcher.handleExecution(
new ExecutionScope(
testCustomResource, null));
verify(customResourceFacade, times(1)).updateStatus(testCustomResource);
verify(((ErrorStatusHandler) reconciler), times(1)).updateErrorStatus(eq(testCustomResource),
any(), any());
}

private ObservedGenCustomResource createObservedGenCustomResource() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@

public class ErrorStatusHandlerIT {

public static final int MAX_RETRY_ATTEMPTS = 3;
ErrorStatusHandlerTestReconciler reconciler = new ErrorStatusHandlerTestReconciler();

@RegisterExtension
OperatorExtension operator =
OperatorExtension.builder()
.withConfigurationService(DefaultConfigurationService.instance())
.withReconciler(reconciler, new GenericRetry().setMaxAttempts(3).withLinearRetry())
.withReconciler(reconciler,
new GenericRetry().setMaxAttempts(MAX_RETRY_ATTEMPTS).withLinearRetry())
.build();

@Test
Expand All @@ -32,16 +34,18 @@ public void testErrorMessageSetEventually() {
operator.create(ErrorStatusHandlerTestCustomResource.class, createCustomResource());

await()
.atMost(15, TimeUnit.SECONDS)
.pollInterval(1L, TimeUnit.SECONDS)
.atMost(10, TimeUnit.SECONDS)
.pollInterval(250, TimeUnit.MICROSECONDS)
.untilAsserted(
() -> {
ErrorStatusHandlerTestCustomResource res =
operator.get(ErrorStatusHandlerTestCustomResource.class,
resource.getMetadata().getName());
assertThat(res.getStatus()).isNotNull();
assertThat(res.getStatus().getMessage())
.isEqualTo(ErrorStatusHandlerTestReconciler.ERROR_STATUS_MESSAGE);
for (int i = 0; i < MAX_RETRY_ATTEMPTS + 1; i++) {
assertThat(res.getStatus().getMessages())
.contains(ErrorStatusHandlerTestReconciler.ERROR_STATUS_MESSAGE + i);
}
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,18 @@

public class RetryIT {
public static final int RETRY_INTERVAL = 150;
public static final int MAX_RETRY_ATTEMPTS = 5;

public static final int NUMBER_FAILED_EXECUTIONS = 3;

@RegisterExtension
OperatorExtension operator =
OperatorExtension.builder()
.withConfigurationService(DefaultConfigurationService.instance())
.withReconciler(
new RetryTestCustomReconciler(),
new RetryTestCustomReconciler(NUMBER_FAILED_EXECUTIONS),
new GenericRetry().setInitialInterval(RETRY_INTERVAL).withLinearRetry()
.setMaxAttempts(5))
.setMaxAttempts(MAX_RETRY_ATTEMPTS))
.build();


Expand All @@ -40,7 +43,7 @@ public void retryFailedExecution() {

await("cr status updated")
.pollDelay(
RETRY_INTERVAL * (RetryTestCustomReconciler.NUMBER_FAILED_EXECUTIONS + 2),
RETRY_INTERVAL * (NUMBER_FAILED_EXECUTIONS + 2),
TimeUnit.MILLISECONDS)
.pollInterval(
RETRY_INTERVAL,
Expand All @@ -49,7 +52,7 @@ public void retryFailedExecution() {
.untilAsserted(() -> {
assertThat(
TestUtils.getNumberOfExecutions(operator))
.isEqualTo(RetryTestCustomReconciler.NUMBER_FAILED_EXECUTIONS + 1);
.isEqualTo(NUMBER_FAILED_EXECUTIONS + 1);

RetryTestCustomResource finalResource =
operator.get(RetryTestCustomResource.class,
Expand All @@ -59,7 +62,7 @@ public void retryFailedExecution() {
});
}

public RetryTestCustomResource createTestCustomResource(String id) {
public static RetryTestCustomResource createTestCustomResource(String id) {
RetryTestCustomResource resource = new RetryTestCustomResource();
resource.setMetadata(
new ObjectMetaBuilder()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package io.javaoperatorsdk.operator;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.RegisterExtension;

import io.javaoperatorsdk.operator.config.runtime.DefaultConfigurationService;
import io.javaoperatorsdk.operator.junit.OperatorExtension;
import io.javaoperatorsdk.operator.processing.retry.GenericRetry;
import io.javaoperatorsdk.operator.sample.retry.RetryTestCustomReconciler;
import io.javaoperatorsdk.operator.sample.retry.RetryTestCustomResource;

import static io.javaoperatorsdk.operator.RetryIT.createTestCustomResource;
import static org.assertj.core.api.Assertions.assertThat;

public class RetryMaxAttemptIT {

public static final int MAX_RETRY_ATTEMPTS = 3;
public static final int RETRY_INTERVAL = 100;
public static final int ALL_EXECUTION_TO_FAIL = 99;

RetryTestCustomReconciler reconciler = new RetryTestCustomReconciler(ALL_EXECUTION_TO_FAIL);

@RegisterExtension
OperatorExtension operator =
OperatorExtension.builder()
.withConfigurationService(DefaultConfigurationService.instance())
.withReconciler(reconciler,
new GenericRetry().setInitialInterval(RETRY_INTERVAL).withLinearRetry()
.setMaxAttempts(MAX_RETRY_ATTEMPTS))
.build();


@Test
public void retryFailedExecution() throws InterruptedException {
RetryTestCustomResource resource = createTestCustomResource("max-retry");

operator.create(RetryTestCustomResource.class, resource);

Thread.sleep((MAX_RETRY_ATTEMPTS + 2) * RETRY_INTERVAL);
assertThat(reconciler.getNumberOfExecutions()).isEqualTo(4);
}
}
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
package io.javaoperatorsdk.operator.sample.errorstatushandler;

import java.util.ArrayList;
import java.util.List;

public class ErrorStatusHandlerTestCustomResourceStatus {

private String message;
private List<String> messages;

public String getMessage() {
return message;
public List<String> getMessages() {
if (messages == null) {
messages = new ArrayList<>();
}
return messages;
}

public ErrorStatusHandlerTestCustomResourceStatus setMessage(String message) {
this.message = message;
public ErrorStatusHandlerTestCustomResourceStatus setMessages(List<String> messages) {
this.messages = messages;
return this;
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package io.javaoperatorsdk.operator.sample.errorstatushandler;

import java.util.Optional;
import java.util.concurrent.atomic.AtomicInteger;

import org.slf4j.Logger;
Expand Down Expand Up @@ -45,11 +46,11 @@ public int getNumberOfExecutions() {
}

@Override
public ErrorStatusHandlerTestCustomResource updateErrorStatus(
ErrorStatusHandlerTestCustomResource resource, RuntimeException e) {
public Optional<ErrorStatusHandlerTestCustomResource> updateErrorStatus(
ErrorStatusHandlerTestCustomResource resource, RetryInfo retryInfo, RuntimeException e) {
log.info("Setting status.");
ensureStatusExists(resource);
resource.getStatus().setMessage(ERROR_STATUS_MESSAGE);
return resource;
resource.getStatus().getMessages().add(ERROR_STATUS_MESSAGE + retryInfo.getAttemptCount());
return Optional.of(resource);
}
}
Loading