Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(liveness): fail liveness if db conns saturated for 10min #2648

Merged
merged 2 commits into from
Aug 13, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 35 additions & 10 deletions packages/server/modules/core/rest/health.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,32 @@ import { numberOfFreeConnections } from '@/modules/shared/helpers/dbHelper'
import { db } from '@/db/knex'
import type { Knex } from 'knex'

type FreeConnectionsCalculator = {
mean: () => number
}

export default (app: express.Application) => {
const knexFreeDbConnectionSampler = knexFreeDbConnectionSamplerFactory({
const knexFreeDbConnectionSamplerLiveness = knexFreeDbConnectionSamplerFactory({
db,
collectionPeriod: highFrequencyMetricsCollectionPeriodMs(),
sampledDuration: 600000 //number of ms over which to average the database connections, before declaring not alive. 10 minutes.
})
knexFreeDbConnectionSamplerLiveness.start()

const knexFreeDbConnectionSamplerReadiness = knexFreeDbConnectionSamplerFactory({
db,
collectionPeriod: highFrequencyMetricsCollectionPeriodMs(),
sampledDuration: 20000 //number of ms over which to average the database connections, before declaring unready
sampledDuration: 20000 //number of ms over which to average the database connections, before declaring unready. 20 seconds.
})
knexFreeDbConnectionSampler.start()
knexFreeDbConnectionSamplerReadiness.start()

app.options('/liveness')
app.get(
'/liveness',
handleLivenessFactory({
isRedisAlive,
isPostgresAlive
isPostgresAlive,
freeConnectionsCalculator: knexFreeDbConnectionSamplerLiveness
})
)
app.options('/readiness')
Expand All @@ -33,7 +45,7 @@ export default (app: express.Application) => {
handleReadinessFactory({
isRedisAlive,
isPostgresAlive,
freeConnectionsCalculator: knexFreeDbConnectionSampler
freeConnectionsCalculator: knexFreeDbConnectionSamplerReadiness
})
)
}
Expand All @@ -42,6 +54,7 @@ const handleLivenessFactory =
(deps: {
isRedisAlive: RedisCheck
isPostgresAlive: DBCheck
freeConnectionsCalculator: FreeConnectionsCalculator
}): express.RequestHandler =>
async (req, res) => {
const postgres = await deps.isPostgresAlive()
Expand Down Expand Up @@ -69,14 +82,26 @@ const handleLivenessFactory =
return
}

const numFreeConnections = await deps.freeConnectionsCalculator.mean()
const percentageFreeConnections = Math.floor(
(numFreeConnections * 100) / postgresMaxConnections()
)
//unready if less than 10%
if (percentageFreeConnections < 10) {
const message =
'Liveness health check failed. Insufficient free database connections for a sustained duration.'
req.log.error(message)
res.status(500).json({
message
})
res.send()
return
}

res.status(200)
res.send()
}

type FreeConnectionsCalculator = {
mean: () => number
}

const handleReadinessFactory = (deps: {
isRedisAlive: RedisCheck
isPostgresAlive: DBCheck
Expand Down Expand Up @@ -112,7 +137,7 @@ const handleReadinessFactory = (deps: {
const percentageFreeConnections = Math.floor(
(numFreeConnections * 100) / postgresMaxConnections()
)
//unready if less than 10% for 20s
//unready if less than 10%
if (percentageFreeConnections < 10) {
const message =
'Readiness health check failed. Insufficient free database connections for a sustained duration.'
Expand Down