Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@ FROM node:22-alpine
MAINTAINER Digitransit version: 0.1
RUN mkdir -p /usr/src/app
WORKDIR /usr/src/app
ENV CHECK_INTERVAL_MINUTES 5
ENV DEBUG ""
ENV TZ "Europe/Helsinki"
ENV DOCKER_USER ""
ENV DOCKER_AUTH ""

RUN apk add --update \
python3 \
Expand Down
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ Autodeployer also takes care of restarting dependant deployments.

Additionally, some deployments are restarted periodically.

## Env variable configuration

These following environmental variables should be added:
* "SLACK_ACCESS_TOKEN" access token used for sending slack messages through a Slack app
* "MONITORING_SLACK_CHANNEL_ID" slack channel id (not the name) for most of the Slack messages
* "ALERT_SLACK_CHANNEL_ID" slack channel id (not the name) for sending messages about image freshness checks
* "DOCKER_USER" docker user that is used for interacting with the Docker API
* "DOCKER_AUTH" docker password that is used for interacting with the Docker API
* "TZ" optional timezone (defaults to "Europe/Helsinki")

## Prerequisites

Deployments should have the following labels defined as deployer uses `app` as an identifier for finding deployments/pods.
Expand Down Expand Up @@ -65,3 +75,16 @@ Restarts deployment at 04:30. Attempts to restart deployment stop after deployme

### restartLimitInterval: "240"
Optional label that defines in minutes how long time has to be since the last restart for a restart to trigger at the time defined in "restartAt" label. If "restartLimitInterval" is not defined, the default value will be 1080 minutes (18 hours).

## Deployment image freshness monitoring

Optionally, it can be checked that an image has been updated within the last 12 hours.

This can be enabled with `checkImageFreshnessAt` label that defines when the check is done in `hh.mm` format:

```yaml
metadata:
labels:
update: "auto"
checkImageFreshnessAt: "09.00"
```
27 changes: 1 addition & 26 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
"license": "(AGPL-3.0 OR EUPL-1.2)",
"type": "module",
"dependencies": {
"@dagrejs/graphlib": "^2.2.4",
"@kubernetes/client-node": "^1.0.0",
"@slack/webhook": "^7.0.4",
"@dagrejs/graphlib": "^2.2.4"
"axios": "^1.7.9"
},
"devDependencies": {
"chai": "^5.1.2",
Expand Down
4 changes: 2 additions & 2 deletions src/dep-deployment-restarter.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import graphlib from '@dagrejs/graphlib'
import { build, isSubGraphStable, deploymentsNeedingRestart } from './graph.js'
import { postSlackMessage } from './util.js'
import { postMonitoringSlackMessage } from './util.js'

/*
* Automatically restarts dependand deployments in controlled manner. This is
Expand Down Expand Up @@ -34,7 +34,7 @@ export default {
const deploymentGraph = build(deployments)
if (graphlib.alg.findCycles(deploymentGraph).length > 0) {
console.log('Bummer! Graph has cycle, %s', deploymentGraph.toJSON())
postSlackMessage('Deployments are configured to restart each other in a cycle.')
postMonitoringSlackMessage('Deployments are configured to restart each other in a cycle.')
} else {
deploymentsNeedingRestart(deploymentGraph).filter(({ from, value }) => {
console.log('deployment %s needs restart', from)
Expand Down
25 changes: 21 additions & 4 deletions src/graph.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Graph } from '@dagrejs/graphlib'
import { postSlackMessage } from './util.js'
import { postMonitoringSlackMessage } from './util.js'

function addDepEdges (graph, deployment, deployments) {
const deploymentLabels = deployment.metadata.labels
Expand All @@ -13,7 +13,7 @@ function addDepEdges (graph, deployment, deployments) {
graph.setEdge(deploymentName, dependency, { delay })
} else {
console.log(`${dependency} does not exist but is defined as a dependency for a deployment`)
postSlackMessage(`${dependency} does not exist but is defined as a dependency for a deployment`)
postMonitoringSlackMessage(`${dependency} does not exist but is defined as a dependency for a deployment`)
}
})
}
Expand Down Expand Up @@ -52,11 +52,9 @@ export function hasPendingDependentRestarts (graph, deploymentId) {

export function build (deployments) {
const graph = new Graph({ directed: true })
console.log('adding vertexes')
deployments.forEach(deployment => {
graph.setNode(deployment.metadata.labels.app, deployment)
})
console.log('adding edges')
deployments.forEach(deployment => {
if (deployment.metadata.labels.restartAfterDeployments) {
addDepEdges(graph, deployment, deployments)
Expand Down Expand Up @@ -86,3 +84,22 @@ export function deploymentsNeedingRestart (graph) {
}
return deployments
}

export function deploymentsNeedingImageFreshnessCheck (graph, currentDate) {
const deployments = []
for (const node of graph.nodes()) {
const deployment = graph.node(node)
const checkTime = deployment.metadata.labels.checkImageFreshnessAt
if (checkTime) {
// time format is hh.mm
const checkTimeParts = checkTime.split('.')
const checkDate = new Date(currentDate.getFullYear(), currentDate.getMonth(), currentDate.getDate(), checkTimeParts[0], checkTimeParts[1])
const timeDifferenceSeconds = Math.round((currentDate.getTime() - checkDate.getTime()) / 1000)
// Between 0 and 5 minutes since the checkTime, this is to avoid duplicate checks
if (timeDifferenceSeconds >= 0 && timeDifferenceSeconds <= 5 * 60) {
deployments.push(deployment)
}
}
}
return deployments
}
50 changes: 50 additions & 0 deletions src/image-freshness-monitor.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { build, deploymentsNeedingImageFreshnessCheck } from './graph.js'
import { postAlertSlackMessage } from './util.js'

/*
* Automatically checks that the image + tag combination used by the deployment
* has been updated within last 24 hours. If not, a message is sent to slack.
* Configured with labels as follows:
* checkImageFreshnessAt: "hh.mm"
* where checkImageFreshnessAt defines when the check should be done (roughly,
* might be delayed by 0-5 mins)
*/
export default {
command: (deployments, context) => {
console.log('Checking for a need to do image freshness checks')
const deploymentGraph = build(deployments)
const now = new Date()
const deploymentsNeedingCheck = deploymentsNeedingImageFreshnessCheck(deploymentGraph, now)
if (deploymentsNeedingCheck.length === 0) {
console.log('Found no deployments that need an image freshness check')
return
}
const promises = []
deploymentsNeedingCheck.forEach(deployment => {
const deploymentId = deployment.metadata.labels.app
const image = deployment.spec.template.spec.containers[0].image
console.log(`Deployment ${deployment.metadata.labels.app} needs image freshness check`)
promises.push(new Promise((resolve) => {
context.dockerRepo.getImageDate(image).then(repoImageDate => {
// check that image is older than 12 hours old
if (repoImageDate && repoImageDate < now.getTime() - 12 * 60 * 60 * 1000) {
console.log('%s image has not been updated within the last 12 hours', deploymentId)
resolve(deployment.metadata.labels.app)
} else {
console.log('%s image has been updated within the last 12 hours', deploymentId)
resolve(null)
}
}).catch((err) => {
console.log(err)
resolve(null)
})
}))
})
Promise.all(promises).then((values) => {
const deploymentsWithOldImages = values.filter(value => value != null)
if (deploymentsWithOldImages.length > 0) {
postAlertSlackMessage(`:boom: These deployments have not been updated within the last 12 hours: ${deploymentsWithOldImages.join(', ')} :boom:`)
}
})
}
}
5 changes: 3 additions & 2 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ import dockerRepo from './dockerRepo.js'
import imageDeployer from './image-deployer.js'
import depDeploymentRestarter from './dep-deployment-restarter.js'
import cronDeploymentRestarter from './cron-deployment-restarter.js'
import imageFreshnessMonitor from './image-freshness-monitor.js'

const CHECK_INTERVAL = (process.env.CHECK_INTERVAL_MINUTES || 5) * 60 * 1000
const CHECK_INTERVAL = 5 * 60 * 1000

const actions = [imageDeployer, depDeploymentRestarter, cronDeploymentRestarter]
const actions = [imageDeployer, depDeploymentRestarter, cronDeploymentRestarter, imageFreshnessMonitor]

const logError = (name, e) => {
console.log('%s: Error occurred %s', name, e)
Expand Down
51 changes: 35 additions & 16 deletions src/util.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,43 @@
import { IncomingWebhook } from '@slack/webhook'
import axios from 'axios'

const url = process.env.SLACK_WEBHOOK_URL || null
let webhook
if (process.env.ENVIRONMENT_TYPE === 'DEV') {
webhook = url !== null ? new IncomingWebhook(url, { username: 'Configuration checker', channel: 'digitransit_monitoring_dev' }) : null
} else {
webhook = url !== null ? new IncomingWebhook(url, { username: 'Configuration checker', channel: 'digitransit_monitoring_prd' }) : null
const MONITORING_CHANNEL_ID = process.env.MONITORING_SLACK_CHANNEL_ID
const MONITORING_USERNAME = `Configuration checker ${process.env.ENVIRONMENT_TYPE}`

const ALERT_CHANNEL_ID = process.env.ALERT_SLACK_CHANNEL_ID
const ALERT_USERNAME = `Image freshness monitor ${process.env.ENVIRONMENT_TYPE}`

const headers = {
Authorization: `Bearer ${process.env.SLACK_ACCESS_TOKEN}`,
'Content-Type': 'application/json',
Accept: '*/*'
}

export function postSlackMessage (message) {
if (webhook === null) {
process.stdout.write(`Not sending to slack: ${message}\n`)
return
function postSlackMessage (text, username, channel) {
if (!process.env.SLACK_ACCESS_TOKEN) {
console.log('Not sending to slack: ' + text)
}

webhook.send({ text: message })
.then(() => {
process.stdout.write(`Sent to slack: ${message}\n`)
axios.post('https://slack.com/api/chat.postMessage', {
channel,
text,
username
}, { headers })
.then(response => {
if (response.status !== 200) {
console.log(`Slack message was not sent successfully. Response: ${response}`)
} else {
console.log(`Sent to slack: ${text}`)
}
})
.catch((err) => {
process.stdout.write(`ERROR sending to slack : ${err}\n`)
.catch(error => {
console.log(`Something went wrong when trying to send message to Slack:\n${error}`)
})
}

export function postMonitoringSlackMessage (text) {
postSlackMessage(text, MONITORING_USERNAME, MONITORING_CHANNEL_ID)
}

export function postAlertSlackMessage (text) {
postSlackMessage(text, ALERT_USERNAME, ALERT_CHANNEL_ID)
}
19 changes: 18 additions & 1 deletion test/graph-test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { expect } from 'chai'
import { describe, it } from 'mocha'
import graphlib from '@dagrejs/graphlib'
import { build, isSubGraphStable, hasPendingDependentRestarts, deploymentsNeedingRestart } from './../src/graph.js'
import { build, isSubGraphStable, hasPendingDependentRestarts, deploymentsNeedingRestart, deploymentsNeedingImageFreshnessCheck } from './../src/graph.js'

const NOW = new Date().getTime()

Expand Down Expand Up @@ -113,4 +113,21 @@ describe('graph-builder', function () {
deploymentGraph = build(testApps)
expect(deploymentsNeedingRestart(deploymentGraph).length).to.equal(1)
})

it('Graph should return deployments needing image freshness check', () => {
// app2 should be checked between 09:00 and 09:05
const testApps = [
appConfig('app1', NOW, {}, true),
appConfig('app2', NOW, { checkImageFreshnessAt: '09.00' }, true)
]
const deploymentGraph = build(testApps)
const currentDate = new Date('2025-01-01T09:01:00')
expect(deploymentsNeedingImageFreshnessCheck(deploymentGraph, currentDate).length).to.equal(1)

const beforeDate = new Date('2025-01-01T08:59:00')
expect(deploymentsNeedingImageFreshnessCheck(deploymentGraph, beforeDate).length).to.equal(0)

const afterDate = new Date('2025-01-01T09:06:00')
expect(deploymentsNeedingImageFreshnessCheck(deploymentGraph, afterDate).length).to.equal(0)
})
})
Loading