Skip to content

Commit 03c0b67

Browse files
authored
TECH-3227: Modernizing cloud-init.yml (#22)
1 parent ff48054 commit 03c0b67

File tree

9 files changed

+433
-383
lines changed

9 files changed

+433
-383
lines changed

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,18 @@ Note: The VPC must have `enableDnsHostnames` = `true` and `enableDnsSupport` = `
5050

5151
If you can SSH onto one of the nodes you can run:
5252
`docker exec rabbitmq rabbitmqctl cluster_status` to see the cluster status of that node.
53+
54+
## Upgrading
55+
56+
Sometimes we will need to do a hot restart of a node in the cluster in order to preform some maintenance, upgrade, or infrastructure improvements. To do this graceful we need to remove the current node from the cluster. This helps keep the node in sync and organized correctly. To do this we need to stop the app and reset the node as follows:
57+
58+
```SH
59+
docker exec rabbitmq rabbitmqctl cluster_status
60+
docker exec rabbitmq rabbitmqctl stop_app
61+
docker exec rabbitmq rabbitmqctl reset
62+
docker exec rabbitmq rabbitmqctl cluster_status
63+
```
64+
5365
<!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
5466
README.md updated successfully
5567
<!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->

cloud-init.yaml

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,19 @@
22
write_files:
33
- path: /root/conf/enabled_plugins
44
content: |
5-
[prometheus_rabbitmq_exporter,rabbitmq_management].
6-
- path: /root/conf/rabbitmq.config
5+
[
6+
rabbitmq_management
7+
].
8+
- path: /root/conf/rabbitmq.conf
79
content: |
8-
[ { rabbit, [
9-
{ loopback_users, [ ] } ] }
10-
].
10+
loopback_users = none
11+
- path: /root/conf/advanced.config
12+
content: |
13+
[
14+
{rabbit, [
15+
${feature_flags}
16+
]}
17+
].
1118
- path: /etc/sysconfig/docker
1219
content: |
1320
# The max number of open files for the daemon itself, and all
@@ -24,6 +31,13 @@ write_files:
2431
# when starting the daemon.
2532
DAEMON_PIDFILE_TIMEOUT=10
2633
34+
- path: /root/erlang_cookie.sh
35+
content: |
36+
#!/usr/bin/env bash
37+
mkdir /root/data/ #/root/data:/var/lib/rabbitmq
38+
aws ssm get-parameter --name ${secret_cookie} --with-decryption --region ${region} | jq -r '.Parameter.Value' > /root/data/.erlang.cookie
39+
chmod 400 /root/data/.erlang.cookie #400 is required by rabbitmq
40+
2741
- path: /root/forget_hosts.sh
2842
content: |
2943
#!/usr/bin/env bash
@@ -54,12 +68,12 @@ write_files:
5468
5569
for run in {1..3}; do
5670
sleep $[ ( $RANDOM % 10 ) + 1 ]s
57-
echo "stopping rabbit to try and join other nodes"
71+
echo " stopping rabbit to try and join other nodes"
5872
rabbitmqctl stop_app
5973
6074
NEW_HOSTNAMES=()
6175
for peerhostname in $HOSTNAMES; do
62-
echo "trying to join $${peerhostname}"
76+
echo " trying to join $${peerhostname}"
6377
rabbitmqctl join_cluster rabbit@$peerhostname
6478
st=$?
6579
if [ $st -ne 0 ] && [ $st -ne 130 ]; then # 130 is "already joined"
@@ -69,7 +83,7 @@ write_files:
6983
7084
HOSTNAMES=( $${NEW_HOSTNAMES[@]} )
7185
rabbitmqctl start_app
72-
echo "starting rabbit after trying to join other nodes"
86+
echo " starting rabbit after trying to join other nodes"
7387
7488
if [ $${#HOSTNAMES[@]} -eq 0 ]; then
7589
exit 0
@@ -131,8 +145,10 @@ write_files:
131145
####### Defaults
132146
## Provides auto detected defaults, for vanilla Docker environments,
133147
## please see datadog.yaml.example for all supported options
148+
## https://github.com/DataDog/datadog-agent/blob/main/pkg/config/config_template.yaml
134149
135150
api_key: "ENC[DATADOG_API_KEY]"
151+
ec2_prefer_imdsv2: true
136152
secret_backend_command: /etc/datadog-agent/secrets.py
137153
138154
# Auto discovery settings for vanilla Docker
@@ -184,24 +200,29 @@ write_files:
184200
docker exec rabbitmq rabbitmqctl set_permissions -p / datadog "^aliveness-test$" "^amq\.default$" ".*"
185201
186202
runcmd:
203+
- set -x
187204
- yum update -y
188205
- yum install -y docker jq
189206
- pip3 install boto3
190-
- DD_AGENT_MAJOR_VERSION=7 DD_INSTALL_ONLY=true DD_API_KEY=$(aws ssm get-parameter --name ${dd_api_key} --with-decryption --region ${region} | jq -r '.Parameter.Value') DD_SITE="${dd_site}" bash -c "$(curl -L https://s3.amazonaws.com/dd-agent/scripts/install_script.sh)"
207+
- (set +x; DD_AGENT_MAJOR_VERSION=7 DD_INSTALL_ONLY=true DD_API_KEY=$(aws ssm get-parameter --name ${dd_api_key} --with-decryption --region ${region} | jq -r '.Parameter.Value') DD_SITE="${dd_site}" bash -c "$(curl -L https://s3.amazonaws.com/dd-agent/scripts/install_script_agent7.sh)")
191208
- systemctl start docker
192209
- chkconfig docker on
193210
- usermod -a -G docker dd-agent
194-
- sed -i "s/replace_ip_address_here/$(curl http://169.254.169.254/latest/meta-data/local-ipv4)/g" /root/datadog-agent/conf.d/rabbitmq.d/conf.yaml
211+
- |
212+
TOKEN=`curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600"`
213+
sed -i "s/replace_ip_address_here/$(curl -H "X-aws-ec2-metadata-token: $TOKEN" -v http://169.254.169.254/latest/meta-data/local-ipv4)/g" /root/datadog-agent/conf.d/rabbitmq.d/conf.yaml
195214
- cp /root/datadog-agent/datadog.yaml /etc/datadog-agent/datadog.yaml && chown dd-agent /etc/datadog-agent/datadog.yaml
196215
- cp /root/datadog-agent/conf.d/rabbitmq.d/conf.yaml /etc/datadog-agent/conf.d/rabbitmq.d/conf.yaml && chown dd-agent /etc/datadog-agent/conf.d/rabbitmq.d/conf.yaml
197216
- cp /root/datadog-agent/secrets.py /etc/datadog-agent/secrets.py && chown dd-agent /etc/datadog-agent/secrets.py && chmod 0700 /etc/datadog-agent/secrets.py
198217
- systemctl start datadog-agent
199-
- $(aws ecr get-login --no-include-email --region ${region} --registry-ids ${ecr_registry_id})
200-
- docker run -d --name rabbitmq --hostname $HOSTNAME --log-driver=local --log-opt max-size=10m -p 4369:4369 -p 5672:5672 -p 15672:15672 -p 25672:25672 -e RABBITMQ_ERLANG_COOKIE=$(aws ssm get-parameter --name ${secret_cookie} --with-decryption --region ${region} | jq -r '.Parameter.Value') -v /root/data:/var/lib/rabbitmq -v /root/conf/:/etc/rabbitmq -v /root/bin:/tmp/bin ${rabbitmq_image}
218+
- $(aws ecr get-login-password --region ${region} | docker login --username AWS --password-stdin ${ecr_registry_id}.dkr.ecr.${region}.amazonaws.com)
219+
- bash /root/erlang_cookie.sh
220+
- docker run -d --name rabbitmq --hostname $HOSTNAME --log-driver=local --log-opt max-size=10m -p 4369:4369 -p 5672:5672 -p 15672:15672 -p 25672:25672 -v /root/data:/var/lib/rabbitmq -v /root/conf/:/etc/rabbitmq -v /root/bin:/tmp/bin ${rabbitmq_image}
201221
- sleep 1
202222
- bash /root/find_hosts.sh
203223
- docker exec rabbitmq bash /tmp/bin/join_cluster.sh $(bash /root/find_hosts.sh)
204224
- sleep 1
205225
- bash /root/configure.sh
206226
- sleep 1
207227
- bash /root/configure_datadog_user.sh
228+
- set +x

data.tf

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
2+
data "aws_region" "current" {}
3+
4+
# For use when setting up a new instance or updating to the latest AMI
5+
data "aws_ami" "amazon_linux_2_latest" {
6+
owners = ["amazon"] # force 1st party AMI's
7+
most_recent = true # latest
8+
9+
filter {
10+
# HVM seems better for many reasons if not only tighter hardware integration
11+
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/virtualization_types.html
12+
name = "virtualization-type"
13+
values = ["hvm"]
14+
}
15+
16+
filter {
17+
# We are switching to ARM64 (Graviton) to leverage the best price performance
18+
# https://aws.amazon.com/pm/ec2-graviton/
19+
name = "architecture"
20+
values = ["arm64"]
21+
}
22+
23+
filter {
24+
# gp3 is preferred but not available at time of writing: 8-14-23
25+
# https://aws.amazon.com/ebs/general-purpose/
26+
name = "block-device-mapping.volume-type"
27+
values = ["gp2"]
28+
}
29+
30+
filter {
31+
# Kernel 5.* selected for is tighter integration with Graviton processors
32+
# https://aws.amazon.com/about-aws/whats-new/2021/11/amazon-linux-2-ami-kernel-5-10/
33+
name = "name"
34+
values = ["amzn2-ami-kernel-5.*"]
35+
}
36+
}
37+
38+
# Note: bootstraps with latest if ami is not provided
39+
data "aws_ami" "amazon_linux_2" {
40+
owners = ["amazon"]
41+
most_recent = true
42+
include_deprecated = true # statically passed AMI's may be deprecated
43+
44+
filter {
45+
name = "image-id"
46+
values = [var.ami_id != "" ? var.ami_id : data.aws_ami.amazon_linux_2_latest.image_id]
47+
}
48+
}

iam.tf

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
data "aws_iam_policy_document" "policy_doc" {
2+
statement {
3+
actions = ["sts:AssumeRole"]
4+
5+
principals {
6+
type = "Service"
7+
identifiers = ["ec2.amazonaws.com"]
8+
}
9+
}
10+
}
11+
12+
data "aws_iam_policy_document" "policy_permissions_doc" {
13+
statement {
14+
effect = "Allow"
15+
actions = [
16+
"autoscaling:DescribeAutoScalingInstances",
17+
"ec2:DescribeInstances"
18+
]
19+
resources = [
20+
"*"
21+
]
22+
}
23+
24+
statement {
25+
effect = "Allow"
26+
actions = [
27+
"ecr:GetAuthorizationToken",
28+
"ecr:ListImages",
29+
"ecr:BatchCheckLayerAvailability",
30+
"ecr:BatchGetImage",
31+
"ecr:DescribeImages",
32+
"ecr:DescribeRepositories",
33+
"ecr:GetDownloadUrlForLayer",
34+
"ecr:GetRepositoryPolicy"
35+
]
36+
resources = [
37+
"*"
38+
]
39+
}
40+
41+
statement {
42+
effect = "Allow"
43+
actions = [
44+
"ssm:GetParameter"
45+
]
46+
resources = [
47+
aws_ssm_parameter.datadog_api_key.arn,
48+
aws_ssm_parameter.datadog_user_password.arn,
49+
aws_ssm_parameter.rabbit_admin_password.arn,
50+
aws_ssm_parameter.rabbit_password.arn,
51+
aws_ssm_parameter.secret_cookie.arn,
52+
]
53+
}
54+
55+
statement {
56+
effect = "Allow"
57+
actions = [
58+
"kms:Decrypt"
59+
]
60+
resources = [
61+
var.kms_key_arn
62+
]
63+
}
64+
}
65+
66+
resource "aws_iam_role_policy" "iam_policy" {
67+
name = "${var.name}-${data.aws_region.current.name}"
68+
role = aws_iam_role.iam_role.id
69+
70+
policy = data.aws_iam_policy_document.policy_permissions_doc.json
71+
}
72+
73+
resource "aws_iam_instance_profile" "iam_profile" {
74+
name_prefix = "${var.name}-${data.aws_region.current.name}-"
75+
role = aws_iam_role.iam_role.name
76+
}
77+
78+
resource "aws_iam_role" "iam_role" {
79+
name = "${var.name}-${data.aws_region.current.name}"
80+
assume_role_policy = data.aws_iam_policy_document.policy_doc.json
81+
tags = var.tags
82+
}

0 commit comments

Comments
 (0)