Skip to content

Commit

Permalink
Add Slack notifications of node shutdown events
Browse files Browse the repository at this point in the history
* Accept a Slack WebhookURL to notify a Slack channel of node
lifecycle events
* Add -webhook flag to set the Slack WebhookURL
  • Loading branch information
dghubble committed Nov 15, 2022
1 parent a8ca896 commit b9926eb
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 0 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ Notable changes between versions.

## Latest

* Add Slack notifications of node lifecycle events
* Add `-webhook` flag to set the WebhookURL

## v0.1.0

* Initial port from bash script to Go
Expand Down
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,28 @@ systemd:
WantedBy=multi-user.target
```
### Configuration
Configure via flags.
| flag | description | default |
|------------|--------------|--------------|
| -platform | Platform to poll for termination notices | none |
| -webhook | Slack Webhook URL | "" |
| -uncordon | Uncordon node on start | true |
| -drain | Drain node on stop | true |
| -delete | Delete node on stop | true |
| -log-level | Logger level | info |
| -version | Show version | NA |
| -help | Show help | NA |
Other values are set via environment variables.
| variable | description | default |
|------------|------------------------|-----------|
| KUBECONFIG | Path to Kubeconfig | "" |
| HOSTNAME | Current node name | "" |
### Spot Termination Notices
[AWS](https://aws.amazon.com/blogs/aws/new-ec2-spot-instance-termination-notices/) and [Azure](https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-terminate-notification) provide warnings via instance metadata (2 min) before spot terminations. `scuttle` can monitor platform specific instance metadata endpoints to trigger drain or delete actions before shutdown.
Expand Down
3 changes: 3 additions & 0 deletions cmd/scuttle/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ var (

func main() {
flags := struct {
webhook string
platform string
uncordon bool
drain bool
Expand All @@ -37,6 +38,7 @@ func main() {
help bool
}{}

flag.StringVar(&flags.webhook, "webhook", "", "Slack Webhook URL (e.g. https://hooks.slack.com...)")
flag.StringVar(&flags.platform, "platform", "none", "Set platform (none, aws, azure) to poll termination notices")
flag.BoolVar(&flags.uncordon, "uncordon", true, "Enabling uncordoning node on start")
flag.BoolVar(&flags.drain, "drain", true, "Enabling draining node on stop")
Expand Down Expand Up @@ -85,6 +87,7 @@ func main() {
// Termination watcher
scuttle, err := sctl.New(&sctl.Config{
Logger: log,
Webhook: flags.webhook,
Platform: flags.platform,
ShouldUncordon: flags.uncordon,
ShouldDrain: flags.drain,
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go 1.19

require (
github.com/sirupsen/logrus v1.9.0
github.com/slack-go/slack v0.11.4
k8s.io/api v0.25.4
k8s.io/apimachinery v0.25.4
k8s.io/client-go v0.25.4
Expand All @@ -22,6 +23,7 @@ require (
github.com/golang/protobuf v1.5.2 // indirect
github.com/google/gnostic v0.5.7-v3refs // indirect
github.com/google/gofuzz v1.1.0 // indirect
github.com/gorilla/websocket v1.4.2 // indirect
github.com/imdario/mergo v0.3.6 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
Expand Down
8 changes: 8 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ github.com/go-openapi/jsonreference v0.19.5/go.mod h1:RdybgQwPxbL4UEjuAruzK1x3nE
github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
github.com/go-openapi/swag v0.19.14 h1:gm3vOOXfiuw5i9p5N9xJvfjvuofpyvLA9Wr6QfK5Fng=
github.com/go-openapi/swag v0.19.14/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
github.com/go-test/deep v1.0.4 h1:u2CU3YKy9I2pmu9pX0eq50wCgjfGIt539SqR7FbHiho=
github.com/go-test/deep v1.0.4/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
Expand Down Expand Up @@ -110,6 +112,7 @@ github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE=
github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gofuzz v1.1.0 h1:Hsa8mG0dQ46ij8Sl2AYJDUv1oA9/d6Vk+3LG99Oe02g=
Expand All @@ -126,6 +129,8 @@ github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hf
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc=
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
Expand Down Expand Up @@ -166,11 +171,14 @@ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0=
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/slack-go/slack v0.11.4 h1:ojSa7KlPm3PqY2AomX4VTxEsK5eci5JaxCjlzGV5zoM=
github.com/slack-go/slack v0.11.4/go.mod h1:hlGi5oXA+Gt+yWTPP0plCdRKmjsDxecdHxYQdlMQKOw=
github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
Expand Down
6 changes: 6 additions & 0 deletions internal/scuttle.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ const (
// Config configures a Scuttle
type Config struct {
Logger *logrus.Logger
Webhook string
Platform string
ShouldUncordon bool
ShouldDrain bool
Expand Down Expand Up @@ -94,10 +95,12 @@ func (w *Scuttle) Run(ctx context.Context) error {
select {
case <-ctx.Done():
w.log.WithFields(fields).Info("scuttle: stopping...")
w.notifySlack(Shutdown, w.hostname)
return w.stop(stopCtx)
case <-ticker.C:
w.log.WithFields(fields).Debug("scuttle: tick...")
if w.pendingShutdown(ctx) {
w.notifySlack(TermNotice, w.hostname)
return w.stop(stopCtx)
}
}
Expand All @@ -112,6 +115,7 @@ func (w *Scuttle) start(ctx context.Context) error {

if w.config.ShouldUncordon {
w.log.WithFields(fields).Info("scuttle: uncordon node")
w.notifySlack(Uncordon, w.hostname)
drainer := drain.New(&drain.Config{
Client: w.kubeClient,
Logger: w.log,
Expand All @@ -132,6 +136,7 @@ func (w *Scuttle) stop(ctx context.Context) error {
// optionally drain to evict pods on the node
if w.config.ShouldDrain {
w.log.WithFields(fields).Info("scuttle: draining node")
w.notifySlack(Drain, w.hostname)
drainer := drain.New(&drain.Config{
Client: w.kubeClient,
Logger: w.log,
Expand All @@ -148,6 +153,7 @@ func (w *Scuttle) stop(ctx context.Context) error {
// optionally delete the node from the cluster
if w.config.ShouldDelete {
w.log.WithFields(fields).Info("scuttle: deleting node")
w.notifySlack(Delete, w.hostname)
err := w.kubeClient.CoreV1().Nodes().Delete(ctx, w.hostname, v1.DeleteOptions{})
// best-effort, we need to continue even on error
if err != nil {
Expand Down
45 changes: 45 additions & 0 deletions internal/slack.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright (C) 2022 Poseidon Labs
// Copyright (C) 2022 Dalton Hubble
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
package scuttle

import (
"fmt"

"github.com/slack-go/slack"
)

type Notification string

const (
Uncordon Notification = "uncordon"
TermNotice Notification = "term-notice"
Shutdown Notification = "shutdown"
Drain Notification = "drain"
Delete Notification = "delete"
)

func (w *Scuttle) notifySlack(action Notification, node string) {
msg := &slack.WebhookMessage{}

switch action {
case Uncordon:
msg.Text = fmt.Sprintf(":white_check_mark: Uncordon node `%s`", node)
case TermNotice:
msg.Text = fmt.Sprintf(":stopwatch: Detected spot termination notice for `%s`", node)
case Shutdown:
msg.Text = fmt.Sprintf(":octagonal_sign: Detected shutdown of `%s`", node)
case Drain:
msg.Text = fmt.Sprintf(":droplet: Draining node `%s`", node)
case Delete:
msg.Text = fmt.Sprintf(":headstone: Deleting node `%s`", node)
}

err := slack.PostWebhook(w.config.Webhook, msg)
if err != nil {
w.log.Errorf("error notifying Slack webhook url: %v", err)
}
}

0 comments on commit b9926eb

Please sign in to comment.