Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add reconciliation retries for CRs #423

Merged
merged 6 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/pepr/operator/crd/generated/exemption-v1alpha1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ export enum Policy {
export interface Status {
observedGeneration?: number;
phase?: Phase;
retryAttempt?: number;
titles?: string[];
}

Expand All @@ -80,4 +81,5 @@ RegisterKind(Exemption, {
group: "uds.dev",
version: "v1alpha1",
kind: "Exemption",
plural: "exemptions",
});
2 changes: 2 additions & 0 deletions src/pepr/operator/crd/generated/package-v1alpha1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,7 @@ export interface Status {
networkPolicyCount?: number;
observedGeneration?: number;
phase?: Phase;
retryAttempt?: number;
ssoClients?: string[];
}

Expand All @@ -553,4 +554,5 @@ RegisterKind(Package, {
group: "uds.dev",
version: "v1alpha1",
kind: "Package",
plural: "packages",
});
3 changes: 3 additions & 0 deletions src/pepr/operator/crd/sources/exemption/v1alpha1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ export const v1alpha1: V1CustomResourceDefinitionVersion = {
type: "string",
},
},
retryAttempt: {
type: "integer",
},
},
} as V1JSONSchemaProps,
spec: {
Expand Down
4 changes: 4 additions & 0 deletions src/pepr/operator/crd/sources/package/v1alpha1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,10 @@ export const v1alpha1: V1CustomResourceDefinitionVersion = {
networkPolicyCount: {
type: "integer",
},
retryAttempt: {
type: "integer",
nullable: true,
},
},
} as V1JSONSchemaProps,
spec: {
Expand Down
5 changes: 4 additions & 1 deletion src/pepr/operator/reconcilers/exempt-reconciler.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { Log } from "pepr";

import { handleFailure, shouldSkip, updateStatus } from ".";
import { handleFailure, shouldSkip, uidSeen, updateStatus } from ".";
import { processExemptions } from "../controllers/exemptions/exemptions";
import { Phase, UDSExemption } from "../crd";

Expand All @@ -27,6 +27,9 @@ export async function exemptReconciler(exempt: UDSExemption) {
observedGeneration: metadata.generation,
titles: exempt.spec?.exemptions?.map(e => e.title || e.matcher.name),
});

// Update to indicate this version of pepr-core has reconciled the package successfully once
uidSeen.add(exempt.metadata!.uid!);
} catch (err) {
// Handle the failure
void handleFailure(err, exempt);
Expand Down
52 changes: 49 additions & 3 deletions src/pepr/operator/reconcilers/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { GenericKind } from "kubernetes-fluent-client";
import { K8s, Log, kind } from "pepr";

import { Mock } from "jest-mock";
import { handleFailure, shouldSkip, updateStatus, writeEvent } from ".";
import { handleFailure, shouldSkip, uidSeen, updateStatus, writeEvent } from ".";
import { ExemptStatus, Phase, PkgStatus, UDSExemption, UDSPackage } from "../crd";

jest.mock("pepr", () => ({
Expand Down Expand Up @@ -33,6 +33,7 @@ describe("isPendingOrCurrent", () => {
});

it("should return true for a pending CR on subsequent calls", () => {
uidSeen.add("1");
const cr = { metadata: { uid: "1" }, status: { phase: Phase.Pending } } as UDSPackage;
expect(shouldSkip(cr)).toBe(true);
});
Expand Down Expand Up @@ -154,15 +155,60 @@ describe("handleFailure", () => {
expect(Create).not.toHaveBeenCalled();
});

it("should handle a failure", async () => {
it("should retry a failure", async () => {
const err = { status: 500, message: "Internal server error" };
const cr = {
kind: "Package",
apiVersion: "v1",
metadata: { namespace: "default", name: "test", generation: 1, uid: "1" },
};
await handleFailure(err, cr as UDSPackage | UDSExemption);
expect(Log.error).toHaveBeenCalledWith({ err }, "Error configuring default/test");
expect(Log.error).toHaveBeenCalledWith(
{ err },
"Reconciliation attempt 1 failed for default/test, retrying...",
);

expect(Create).toHaveBeenCalledWith({
type: "Warning",
reason: "ReconciliationFailed",
message: "Internal server error",
metadata: {
namespace: cr.metadata!.namespace,
generateName: cr.metadata!.name,
},
involvedObject: {
apiVersion: cr.apiVersion,
kind: cr.kind,
name: cr.metadata!.name,
namespace: cr.metadata!.namespace,
uid: cr.metadata!.uid,
},
firstTimestamp: expect.any(Date),
reportingComponent: "uds.dev/operator",
reportingInstance: process.env.HOSTNAME,
});

expect(PatchStatus).toHaveBeenCalledWith({
metadata: { namespace: "default", name: "test" },
status: {
retryAttempt: 1,
},
});
});

it("should fail after 5 retries", async () => {
const err = { status: 500, message: "Internal server error" };
const cr = {
kind: "Package",
apiVersion: "v1",
metadata: { namespace: "default", name: "test", generation: 1, uid: "1" },
status: { phase: Phase.Pending, retryAttempt: 5 },
};
await handleFailure(err, cr as UDSPackage | UDSExemption);
expect(Log.error).toHaveBeenCalledWith(
{ err },
"Error configuring default/test, maxed out retries",
);

expect(Create).toHaveBeenCalledWith({
type: "Warning",
Expand Down
29 changes: 20 additions & 9 deletions src/pepr/operator/reconcilers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { K8s, Log, kind } from "pepr";
import { ExemptStatus, Phase, PkgStatus, UDSExemption, UDSPackage } from "../crd";
import { Status } from "../crd/generated/package-v1alpha1";

const uidSeen = new Set<string>();
export const uidSeen = new Set<string>();

/**
* Checks if the CRD is pending or the current generation has been processed
Expand All @@ -17,10 +17,9 @@ export function shouldSkip(cr: UDSExemption | UDSPackage) {
const isCurrentGeneration = cr.metadata?.generation === cr.status?.observedGeneration;

// First check if the CR has been seen before and return false if it has not
// This ensures that all CRs are processed at least once during the lifetime of the pod
// This ensures that all CRs are processed at least once by this version of pepr-core
if (!uidSeen.has(cr.metadata!.uid!)) {
Log.debug(cr, `Should skip? No, first time processed during this pod's lifetime`);
uidSeen.add(cr.metadata!.uid!);
return false;
}

Expand Down Expand Up @@ -99,19 +98,31 @@ export async function handleFailure(
) {
const metadata = cr.metadata!;
const identifier = `${metadata.namespace}/${metadata.name}`;
let status: Status;

// todo: identify exact 404 we are targetting, possibly in `updateStatus`
if (err.status === 404) {
Log.warn({ err }, `Package metadata seems to have been deleted`);
return;
}

Log.error({ err }, `Error configuring ${identifier}`);
const retryAttempt = cr.status?.retryAttempt || 0;

// todo: need to evaluate when it is safe to retry (updating generation now avoids retrying infinitely)
const status = {
phase: Phase.Failed,
observedGeneration: metadata.generation,
} as Status;
if (retryAttempt < 5) {
const currRetry = retryAttempt + 1;
Log.error({ err }, `Reconciliation attempt ${currRetry} failed for ${identifier}, retrying...`);

status = {
retryAttempt: currRetry,
};
} else {
Log.error({ err }, `Error configuring ${identifier}, maxed out retries`);

status = {
phase: Phase.Failed,
observedGeneration: metadata.generation,
};
}

// Write an event for the error
void writeEvent(cr, { message: err.message });
Expand Down
6 changes: 5 additions & 1 deletion src/pepr/operator/reconcilers/package-reconciler.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { Log } from "pepr";

import { handleFailure, shouldSkip, updateStatus } from ".";
import { handleFailure, shouldSkip, uidSeen, updateStatus } from ".";
import { UDSConfig } from "../../config";
import { enableInjection } from "../controllers/istio/injection";
import { istioResources } from "../controllers/istio/istio-resources";
Expand Down Expand Up @@ -62,7 +62,11 @@ export async function packageReconciler(pkg: UDSPackage) {
monitors,
networkPolicyCount: netPol.length,
observedGeneration: metadata.generation,
retryAttempt: 0, // todo: make this nullable when kfc generates the type
});

// Update to indicate this version of pepr-core has reconciled the package successfully once
uidSeen.add(pkg.metadata!.uid!);
} catch (err) {
void handleFailure(err, pkg);
}
Expand Down