Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion components/server/src/container-module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ import { WebhookEventGarbageCollector } from "./jobs/webhook-gc";
import { WorkspaceGarbageCollector } from "./jobs/workspace-gc";
import { LinkedInService } from "./linkedin-service";
import { LivenessController } from "./liveness/liveness-controller";
import { ReadinessController } from "./liveness/readiness-controller";
import { StartupController } from "./liveness/startup-controller";
import { RedisSubscriber } from "./messaging/redis-subscriber";
import { MonitoringEndpointsApp } from "./monitoring-endpoints";
import { OAuthController } from "./oauth-server/oauth-controller";
Expand Down Expand Up @@ -137,6 +137,7 @@ import { InstallationAdminCleanup } from "./jobs/installation-admin-cleanup";
import { AuditLogService } from "./audit/AuditLogService";
import { AuditLogGarbageCollectorJob } from "./jobs/auditlog-gc";
import { ProbesApp } from "./liveness/probes";
import { ReadinessController } from "./liveness/readiness-controller";

export const productionContainerModule = new ContainerModule(
(bind, unbind, isBound, rebind, unbindAsync, onActivation, onDeactivation) => {
Expand Down Expand Up @@ -245,6 +246,7 @@ export const productionContainerModule = new ContainerModule(

bind(ProbesApp).toSelf().inSingletonScope();
bind(LivenessController).toSelf().inSingletonScope();
bind(StartupController).toSelf().inSingletonScope();
bind(ReadinessController).toSelf().inSingletonScope();

bind(OneTimeSecretServer).toSelf().inSingletonScope();
Expand Down
12 changes: 8 additions & 4 deletions components/server/src/liveness/probes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ import * as http from "http";
import express from "express";
import { inject, injectable } from "inversify";
import { LivenessController } from "./liveness-controller";
import { ReadinessController } from "./readiness-controller";
import { StartupController } from "./startup-controller";
import { AddressInfo } from "net";
import { ReadinessController } from "./readiness-controller";

@injectable()
export class ProbesApp {
Expand All @@ -18,17 +19,17 @@ export class ProbesApp {

constructor(
@inject(LivenessController) protected readonly livenessController: LivenessController,
@inject(StartupController) protected readonly startupController: StartupController,
@inject(ReadinessController) protected readonly readinessController: ReadinessController,
) {
const probesApp = express();
probesApp.use("/live", this.livenessController.apiRouter);
probesApp.use("/startup", this.startupController.apiRouter);
probesApp.use("/ready", this.readinessController.apiRouter);
this.app = probesApp;
}

public async start(port: number): Promise<number> {
await this.readinessController.start();

return new Promise((resolve, reject) => {
const probeServer = this.app.listen(port, () => {
resolve((<AddressInfo>probeServer.address()).port);
Expand All @@ -37,8 +38,11 @@ export class ProbesApp {
});
}

public notifyShutdown(): void {
this.readinessController.notifyShutdown();
}

public async stop(): Promise<void> {
this.httpServer?.close();
await this.readinessController.stop();
}
}
134 changes: 19 additions & 115 deletions components/server/src/liveness/readiness-controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,138 +4,42 @@
* See License.AGPL.txt in the project root for license information.
*/

import { injectable, inject } from "inversify";
import { injectable } from "inversify";
import express from "express";
import { TypeORM } from "@gitpod/gitpod-db/lib";
import { SpiceDBClientProvider } from "../authorization/spicedb";
import { log } from "@gitpod/gitpod-protocol/lib/util/logging";
import { v1 } from "@authzed/authzed-node";
import { getExperimentsClientForBackend } from "@gitpod/gitpod-protocol/lib/experiments/configcat-server";
import { Redis } from "ioredis";
import { repeat } from "@gitpod/gitpod-protocol/lib/util/repeat";
import { Disposable, DisposableCollection } from "@gitpod/gitpod-protocol";

/**
* ReadinessController is mimicking the behavior server had in the past: Behave as there is not ready probe - except during shutdown.
*
* Why? In Gitpod, our error strategy has always been "keep it local and retry", instead of "fail loud and have someone else handle it".
* As we don't want to change this now, we keep the same behavior for most of the services lifetime.
*
* Only during shutdown, we want to signal that the service is not ready anymore, to reduce error peaks.
*/
@injectable()
export class ReadinessController {
@inject(TypeORM) protected readonly typeOrm: TypeORM;
@inject(SpiceDBClientProvider) protected readonly spiceDBClientProvider: SpiceDBClientProvider;
@inject(Redis) protected readonly redis: Redis;

private readinessProbeEnabled: boolean = true;
private disposables: DisposableCollection = new DisposableCollection();
private shutdown: boolean = false;

get apiRouter(): express.Router {
const router = express.Router();
this.addReadinessHandler(router);
return router;
}

public async start() {
this.disposables.push(this.startPollingFeatureFlag());
}

public async stop() {
this.disposables.dispose();
public notifyShutdown(): void {
this.shutdown = true;
}

protected addReadinessHandler(router: express.Router) {
router.get("/", async (_, res) => {
try {
if (!this.readinessProbeEnabled) {
log.debug("Readiness check skipped due to feature flag");
res.status(200);
return;
}

// Check database connection
const dbConnection = await this.checkDatabaseConnection();
if (!dbConnection) {
log.warn("Readiness check failed: Database connection failed");
res.status(503).send("Database connection failed");
return;
}

// Check SpiceDB connection
const spiceDBConnection = await this.checkSpiceDBConnection();
if (!spiceDBConnection) {
log.warn("Readiness check failed: SpiceDB connection failed");
res.status(503).send("SpiceDB connection failed");
return;
}

// Check Redis connection
const redisConnection = await this.checkRedisConnection();
if (!redisConnection) {
log.warn("Readiness check failed: Redis connection failed");
res.status(503).send("Redis connection failed");
return;
}

// All connections are good
res.status(200).send("Ready");
log.debug("Readiness check successful");
} catch (error) {
log.error("Readiness check failed", error);
res.status(503).send("Readiness check failed");
if (this.shutdown) {
log.warn("Readiness check failed: Server is shutting down");
res.status(503).send("Server is shutting down");
return;
}
});
}

private async checkDatabaseConnection(): Promise<boolean> {
try {
const connection = await this.typeOrm.getConnection();
// Simple query to verify connection is working
await connection.query("SELECT 1");
log.debug("Database connection check successful");
return true;
} catch (error) {
log.error("Database connection check failed", error);
return false;
}
}

private async checkSpiceDBConnection(): Promise<boolean> {
try {
const client = this.spiceDBClientProvider.getClient();

// Send a request, to verify that the connection works
const req = v1.ReadSchemaRequest.create({});
const response = await client.readSchema(req);
log.debug("SpiceDB connection check successful", { schemaLength: response.schemaText.length });

return true;
} catch (error) {
log.error("SpiceDB connection check failed", error);
return false;
}
}

private async checkRedisConnection(): Promise<boolean> {
try {
// Simple PING command to verify connection is working
const result = await this.redis.ping();
log.debug("Redis connection check successful", { result });
return result === "PONG";
} catch (error) {
log.error("Redis connection check failed", error);
return false;
}
}

private startPollingFeatureFlag(): Disposable {
return repeat(async () => {
// Check feature flag first
const readinessProbeEnabled = await getExperimentsClientForBackend().getValueAsync(
"server_readiness_probe",
true, // Default to readiness probe, skip if false
{},
);

log.debug("Feature flag server_readiness_probe updated", {
readinessProbeEnabled,
oldValue: this.readinessProbeEnabled,
});
this.readinessProbeEnabled = readinessProbeEnabled;
}, 10_000);
res.status(200).send("Ready");
log.debug("Readiness check successful");
});
}
}
104 changes: 104 additions & 0 deletions components/server/src/liveness/startup-controller.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/**
* Copyright (c) 2025 Gitpod GmbH. All rights reserved.
* Licensed under the GNU Affero General Public License (AGPL).
* See License.AGPL.txt in the project root for license information.
*/

import { injectable, inject } from "inversify";
import express from "express";
import { TypeORM } from "@gitpod/gitpod-db/lib";
import { SpiceDBClientProvider } from "../authorization/spicedb";
import { log } from "@gitpod/gitpod-protocol/lib/util/logging";
import { v1 } from "@authzed/authzed-node";
import { Redis } from "ioredis";

@injectable()
export class StartupController {
@inject(TypeORM) protected readonly typeOrm: TypeORM;
@inject(SpiceDBClientProvider) protected readonly spiceDBClientProvider: SpiceDBClientProvider;
@inject(Redis) protected readonly redis: Redis;

get apiRouter(): express.Router {
const router = express.Router();
this.addStartupHandler(router);
return router;
}

protected addStartupHandler(router: express.Router) {
router.get("/", async (_, res) => {
try {
// Check database connection
const dbConnection = await this.checkDatabaseConnection();
if (!dbConnection) {
log.warn("Startup check failed: Database connection failed");
res.status(503).send("Database connection failed");
return;
}

// Check SpiceDB connection
const spiceDBConnection = await this.checkSpiceDBConnection();
if (!spiceDBConnection) {
log.warn("Startup check failed: SpiceDB connection failed");
res.status(503).send("SpiceDB connection failed");
return;
}

// Check Redis connection
const redisConnection = await this.checkRedisConnection();
if (!redisConnection) {
log.warn("Startup check failed: Redis connection failed");
res.status(503).send("Redis connection failed");
return;
}

// All connections are good
res.status(200).send("Ready");
log.debug("Startup check successful");
} catch (error) {
log.error("Startup check failed", error);
res.status(503).send("Startup check failed");
}
});
}

private async checkDatabaseConnection(): Promise<boolean> {
try {
const connection = await this.typeOrm.getConnection();
// Simple query to verify connection is working
await connection.query("SELECT 1");
log.debug("Database connection check successful");
return true;
} catch (error) {
log.error("Database connection check failed", error);
return false;
}
}

private async checkSpiceDBConnection(): Promise<boolean> {
try {
const client = this.spiceDBClientProvider.getClient();

// Send a request, to verify that the connection works
const req = v1.ReadSchemaRequest.create({});
const response = await client.readSchema(req);
log.debug("SpiceDB connection check successful", { schemaLength: response.schemaText.length });

return true;
} catch (error) {
log.error("SpiceDB connection check failed", error);
return false;
}
}

private async checkRedisConnection(): Promise<boolean> {
try {
// Simple PING command to verify connection is working
const result = await this.redis.ping();
log.debug("Redis connection check successful", { result });
return result === "PONG";
} catch (error) {
log.error("Redis connection check failed", error);
return false;
}
}
}
8 changes: 7 additions & 1 deletion components/server/src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,9 @@ export class Server {
}

public async stop() {
// mark as not-ready
this.probesApp.notifyShutdown();

// run each stop with a timeout of 30s
async function race(workLoad: Promise<any>, task: string, ms: number = 30 * 1000): Promise<void> {
const before = Date.now();
Expand All @@ -413,10 +416,13 @@ export class Server {
race(this.stopServer(this.httpServer), "stop httpserver"),
race(this.stopServer(this.privateApiServer), "stop private api server"),
race(this.stopServer(this.publicApiServer), "stop public api server"),
race(this.probesApp.stop(), "stop probe server"),
race((async () => this.disposables.dispose())(), "dispose disposables"),
]);

this.probesApp.stop().catch(() => {
/* ignore */
});

log.info("server stopped.");
}

Expand Down
17 changes: 16 additions & 1 deletion install/installer/pkg/components/server/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ func deployment(ctx *common.RenderContext) ([]runtime.Object, error) {
StartupProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/ready",
Path: "/startup",
Port: intstr.IntOrString{
Type: intstr.Int,
IntVal: ProbesPort,
Expand All @@ -387,6 +387,21 @@ func deployment(ctx *common.RenderContext) ([]runtime.Object, error) {
PeriodSeconds: 10,
FailureThreshold: 18, // try for 180 seconds, then the Pod is restarted
},
// /ready will only return false on shutdown (SIGTERM), always true otherwise
ReadinessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/ready",
Port: intstr.IntOrString{
Type: intstr.Int,
IntVal: ProbesPort,
},
},
},
InitialDelaySeconds: 5,
PeriodSeconds: 5,
FailureThreshold: 1, // mark as "not ready" as quick as possible after receiving SIGTERM
},
SecurityContext: &corev1.SecurityContext{
Privileged: pointer.Bool(false),
AllowPrivilegeEscalation: pointer.Bool(false),
Expand Down
Loading