Skip to content

Commit

Permalink
feat: add reconciliation retries for CRs (#423)
Browse files Browse the repository at this point in the history
## Description

Adds re-tries to Package CR status + logic to increment and handle
retries. Currently will attempt package reconcile 5x before failing.

## Type of change

- [x] Bug fix (non-breaking change which fixes an issue)
- [x] New feature (non-breaking change which adds functionality)
- [ ] Other (security config, docs update, etc)

## Checklist before merging

- [x] Test, docs, adr added or updated as needed
- [x] [Contributor Guide
Steps](https://github.com/defenseunicorns/uds-template-capability/blob/main/CONTRIBUTING.md)(https://github.com/defenseunicorns/uds-template-capability/blob/main/CONTRIBUTING.md#submitting-a-pull-request)
followed
  • Loading branch information
mjnagel authored May 22, 2024
1 parent 3c230f5 commit a546036
Show file tree
Hide file tree
Showing 8 changed files with 89 additions and 14 deletions.
2 changes: 2 additions & 0 deletions src/pepr/operator/crd/generated/exemption-v1alpha1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ export enum Policy {
export interface Status {
observedGeneration?: number;
phase?: Phase;
retryAttempt?: number;
titles?: string[];
}

Expand All @@ -80,4 +81,5 @@ RegisterKind(Exemption, {
group: "uds.dev",
version: "v1alpha1",
kind: "Exemption",
plural: "exemptions",
});
2 changes: 2 additions & 0 deletions src/pepr/operator/crd/generated/package-v1alpha1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,7 @@ export interface Status {
networkPolicyCount?: number;
observedGeneration?: number;
phase?: Phase;
retryAttempt?: number;
ssoClients?: string[];
}

Expand All @@ -553,4 +554,5 @@ RegisterKind(Package, {
group: "uds.dev",
version: "v1alpha1",
kind: "Package",
plural: "packages",
});
3 changes: 3 additions & 0 deletions src/pepr/operator/crd/sources/exemption/v1alpha1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ export const v1alpha1: V1CustomResourceDefinitionVersion = {
type: "string",
},
},
retryAttempt: {
type: "integer",
},
},
} as V1JSONSchemaProps,
spec: {
Expand Down
4 changes: 4 additions & 0 deletions src/pepr/operator/crd/sources/package/v1alpha1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,10 @@ export const v1alpha1: V1CustomResourceDefinitionVersion = {
networkPolicyCount: {
type: "integer",
},
retryAttempt: {
type: "integer",
nullable: true,
},
},
} as V1JSONSchemaProps,
spec: {
Expand Down
5 changes: 4 additions & 1 deletion src/pepr/operator/reconcilers/exempt-reconciler.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { Log } from "pepr";

import { handleFailure, shouldSkip, updateStatus } from ".";
import { handleFailure, shouldSkip, uidSeen, updateStatus } from ".";
import { processExemptions } from "../controllers/exemptions/exemptions";
import { Phase, UDSExemption } from "../crd";

Expand All @@ -27,6 +27,9 @@ export async function exemptReconciler(exempt: UDSExemption) {
observedGeneration: metadata.generation,
titles: exempt.spec?.exemptions?.map(e => e.title || e.matcher.name),
});

// Update to indicate this version of pepr-core has reconciled the package successfully once
uidSeen.add(exempt.metadata!.uid!);
} catch (err) {
// Handle the failure
void handleFailure(err, exempt);
Expand Down
52 changes: 49 additions & 3 deletions src/pepr/operator/reconcilers/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { GenericKind } from "kubernetes-fluent-client";
import { K8s, Log, kind } from "pepr";

import { Mock } from "jest-mock";
import { handleFailure, shouldSkip, updateStatus, writeEvent } from ".";
import { handleFailure, shouldSkip, uidSeen, updateStatus, writeEvent } from ".";
import { ExemptStatus, Phase, PkgStatus, UDSExemption, UDSPackage } from "../crd";

jest.mock("pepr", () => ({
Expand Down Expand Up @@ -33,6 +33,7 @@ describe("isPendingOrCurrent", () => {
});

it("should return true for a pending CR on subsequent calls", () => {
uidSeen.add("1");
const cr = { metadata: { uid: "1" }, status: { phase: Phase.Pending } } as UDSPackage;
expect(shouldSkip(cr)).toBe(true);
});
Expand Down Expand Up @@ -154,15 +155,60 @@ describe("handleFailure", () => {
expect(Create).not.toHaveBeenCalled();
});

it("should handle a failure", async () => {
it("should retry a failure", async () => {
const err = { status: 500, message: "Internal server error" };
const cr = {
kind: "Package",
apiVersion: "v1",
metadata: { namespace: "default", name: "test", generation: 1, uid: "1" },
};
await handleFailure(err, cr as UDSPackage | UDSExemption);
expect(Log.error).toHaveBeenCalledWith({ err }, "Error configuring default/test");
expect(Log.error).toHaveBeenCalledWith(
{ err },
"Reconciliation attempt 1 failed for default/test, retrying...",
);

expect(Create).toHaveBeenCalledWith({
type: "Warning",
reason: "ReconciliationFailed",
message: "Internal server error",
metadata: {
namespace: cr.metadata!.namespace,
generateName: cr.metadata!.name,
},
involvedObject: {
apiVersion: cr.apiVersion,
kind: cr.kind,
name: cr.metadata!.name,
namespace: cr.metadata!.namespace,
uid: cr.metadata!.uid,
},
firstTimestamp: expect.any(Date),
reportingComponent: "uds.dev/operator",
reportingInstance: process.env.HOSTNAME,
});

expect(PatchStatus).toHaveBeenCalledWith({
metadata: { namespace: "default", name: "test" },
status: {
retryAttempt: 1,
},
});
});

it("should fail after 5 retries", async () => {
const err = { status: 500, message: "Internal server error" };
const cr = {
kind: "Package",
apiVersion: "v1",
metadata: { namespace: "default", name: "test", generation: 1, uid: "1" },
status: { phase: Phase.Pending, retryAttempt: 5 },
};
await handleFailure(err, cr as UDSPackage | UDSExemption);
expect(Log.error).toHaveBeenCalledWith(
{ err },
"Error configuring default/test, maxed out retries",
);

expect(Create).toHaveBeenCalledWith({
type: "Warning",
Expand Down
29 changes: 20 additions & 9 deletions src/pepr/operator/reconcilers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { K8s, Log, kind } from "pepr";
import { ExemptStatus, Phase, PkgStatus, UDSExemption, UDSPackage } from "../crd";
import { Status } from "../crd/generated/package-v1alpha1";

const uidSeen = new Set<string>();
export const uidSeen = new Set<string>();

/**
* Checks if the CRD is pending or the current generation has been processed
Expand All @@ -17,10 +17,9 @@ export function shouldSkip(cr: UDSExemption | UDSPackage) {
const isCurrentGeneration = cr.metadata?.generation === cr.status?.observedGeneration;

// First check if the CR has been seen before and return false if it has not
// This ensures that all CRs are processed at least once during the lifetime of the pod
// This ensures that all CRs are processed at least once by this version of pepr-core
if (!uidSeen.has(cr.metadata!.uid!)) {
Log.debug(cr, `Should skip? No, first time processed during this pod's lifetime`);
uidSeen.add(cr.metadata!.uid!);
return false;
}

Expand Down Expand Up @@ -99,19 +98,31 @@ export async function handleFailure(
) {
const metadata = cr.metadata!;
const identifier = `${metadata.namespace}/${metadata.name}`;
let status: Status;

// todo: identify exact 404 we are targetting, possibly in `updateStatus`
if (err.status === 404) {
Log.warn({ err }, `Package metadata seems to have been deleted`);
return;
}

Log.error({ err }, `Error configuring ${identifier}`);
const retryAttempt = cr.status?.retryAttempt || 0;

// todo: need to evaluate when it is safe to retry (updating generation now avoids retrying infinitely)
const status = {
phase: Phase.Failed,
observedGeneration: metadata.generation,
} as Status;
if (retryAttempt < 5) {
const currRetry = retryAttempt + 1;
Log.error({ err }, `Reconciliation attempt ${currRetry} failed for ${identifier}, retrying...`);

status = {
retryAttempt: currRetry,
};
} else {
Log.error({ err }, `Error configuring ${identifier}, maxed out retries`);

status = {
phase: Phase.Failed,
observedGeneration: metadata.generation,
};
}

// Write an event for the error
void writeEvent(cr, { message: err.message });
Expand Down
6 changes: 5 additions & 1 deletion src/pepr/operator/reconcilers/package-reconciler.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { Log } from "pepr";

import { handleFailure, shouldSkip, updateStatus } from ".";
import { handleFailure, shouldSkip, uidSeen, updateStatus } from ".";
import { UDSConfig } from "../../config";
import { enableInjection } from "../controllers/istio/injection";
import { istioResources } from "../controllers/istio/istio-resources";
Expand Down Expand Up @@ -62,7 +62,11 @@ export async function packageReconciler(pkg: UDSPackage) {
monitors,
networkPolicyCount: netPol.length,
observedGeneration: metadata.generation,
retryAttempt: 0, // todo: make this nullable when kfc generates the type
});

// Update to indicate this version of pepr-core has reconciled the package successfully once
uidSeen.add(pkg.metadata!.uid!);
} catch (err) {
void handleFailure(err, pkg);
}
Expand Down

0 comments on commit a546036

Please sign in to comment.