-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add bootstrap option to create a local NVMe raid0 or individual volum…
…e mounts
- Loading branch information
Showing
4 changed files
with
252 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,220 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -o errexit | ||
set -o pipefail | ||
set -o nounset | ||
|
||
err_report() { | ||
echo "Exited with error on line $1" | ||
} | ||
trap 'err_report $LINENO' ERR | ||
|
||
print_help() { | ||
echo "usage: $0 <raid0 | mount>" | ||
echo "Sets up Amazon EC2 Instance Store NVMe disks" | ||
echo "" | ||
echo "-d, --dir directory to mount the filesystem(s) (default: /mnt/k8s-disks/)" | ||
echo "-h, --help print this help" | ||
} | ||
|
||
# Sets up a RAID-0 of NVMe instance storage disks, moves | ||
# the contents of /var/lib/kubelet and /var/lib/containerd | ||
# to the new mounted RAID, and bind mounts the kubelet and | ||
# containerd state directories. | ||
maybe_raid0() { | ||
local md_name="kubernetes" | ||
local md_device="/dev/md/${md_name}" | ||
local md_config="/.aws/mdadm.conf" | ||
local array_mount_point="${MNT_DIR}/0" | ||
mkdir -p "$(dirname "${md_config}")" | ||
|
||
if [[ ! -s "${md_config}" ]]; then | ||
mdadm --create --force --verbose \ | ||
"${md_device}" \ | ||
--level=0 \ | ||
--name="${md_name}" \ | ||
--raid-devices="${#EPHEMERAL_DISKS[@]}" \ | ||
"${EPHEMERAL_DISKS[@]}" | ||
while [ -n "$(mdadm --detail "${md_device}" | grep -ioE 'State :.*resyncing')" ]; do | ||
echo "Raid is resyncing..." | ||
sleep 1 | ||
done | ||
mdadm --detail --scan > "${md_config}" | ||
fi | ||
|
||
## Check if the device symlink has changed on reboot to include a homehost identifier | ||
local current_md_device=$(find /dev/md/ -type l -regex ".*/${md_name}_?[0-9a-z]*$" | tail -n1) | ||
if [[ ! -z ${current_md_device} ]]; then | ||
md_device="${current_md_device}" | ||
fi | ||
|
||
# Format the array if not already formatted. | ||
if [[ -z "$(lsblk "${md_device}" -o fstype --noheadings)" ]]; then | ||
## By default, mkfs tries to use the stripe unit of the array (512k), | ||
## for the log stripe unit, but the max log stripe unit is 256k. | ||
## So instead, we use 32k (8 blocks) to avoid a warning of breaching the max. | ||
## mkfs.xfs defaults to 32k after logging the warning since the default log buffer size is 32k. | ||
mkfs.xfs -l su=8b "${md_device}" | ||
fi | ||
|
||
## Create the mount directory | ||
mkdir -p "${array_mount_point}" | ||
|
||
local dev_uuid=$(blkid -s UUID -o value "${md_device}") | ||
local mount_unit_name="$(systemd-escape --path --suffix=mount "${array_mount_point}")" | ||
cat > "/etc/systemd/system/${mount_unit_name}" << EOF | ||
[Unit] | ||
Description=Mount EC2 Instance Store NVMe disk RAID0 | ||
[Mount] | ||
What=UUID=${dev_uuid} | ||
Where=${array_mount_point} | ||
Type=xfs | ||
Options=defaults,noatime | ||
[Install] | ||
WantedBy=multi-user.target | ||
EOF | ||
systemd-analyze verify "${mount_unit_name}" | ||
systemctl enable "${mount_unit_name}" --now | ||
|
||
prev_running="" | ||
needs_linked="" | ||
for unit in "kubelet" "containerd"; do | ||
## Check if the bind mount from the RAID already exists | ||
if [[ "$(systemctl is-active var-lib-${unit}.mount)" != "active" ]]; then | ||
# Check if components that depend on the RAID are running and, if so, stop them | ||
if systemctl is-active "${unit}" > /dev/null 2>&1; then | ||
prev_running+=" ${unit}" | ||
fi | ||
needs_linked+=" /var/lib/${unit}" | ||
fi | ||
done | ||
|
||
## Check if /var/log/pods has been bind mounted and make sure kubelet is stopped | ||
if [[ "$(systemctl is-active var-log-pods.mount)" != "active" ]]; then | ||
if systemctl is-active "kubelet" > /dev/null 2>&1; then | ||
prev_running+=" ${unit}" | ||
fi | ||
needs_linked+=" /var/log/pods" | ||
fi | ||
|
||
if [[ ! -z "${prev_running}" ]]; then | ||
systemctl stop ${prev_running} | ||
fi | ||
|
||
# Transfer state directories to the array, if they exist. | ||
for mount_point in ${needs_linked}; do | ||
local unit="$(basename "${mount_point}")" | ||
local array_mount_point_unit="${array_mount_point}/${unit}" | ||
mkdir -p "${mount_point}" | ||
echo "Copying ${mount_point}/ to ${array_mount_point_unit}/" | ||
cp -a "${mount_point}/" "${array_mount_point_unit}/" | ||
local mount_unit_name="$(systemd-escape --path --suffix=mount "${mount_point}")" | ||
cat > "/etc/systemd/system/${mount_unit_name}" << EOF | ||
[Unit] | ||
Description=Mount ${unit} on EC2 Instance Store NVMe RAID0 | ||
[Mount] | ||
What=${array_mount_point_unit} | ||
Where=${mount_point} | ||
Type=none | ||
Options=bind | ||
[Install] | ||
WantedBy=multi-user.target | ||
EOF | ||
systemd-analyze verify "${mount_unit_name}" | ||
systemctl enable "${mount_unit_name}" --now | ||
done | ||
|
||
if [[ ! -z "${prev_running}" ]]; then | ||
systemctl start ${prev_running} | ||
fi | ||
} | ||
|
||
# Mounts and creates xfs file systems on all EC2 instance store NVMe disks | ||
# without existing file systems. Mounts in /mnt/k8s-disks/{1..} by default | ||
maybe_mount() { | ||
idx=1 | ||
for dev in "${EPHEMERAL_DISKS[@]}"; do | ||
if [[ -z "$(lsblk "${dev}" -o fstype --noheadings)" ]]; then | ||
mkfs.xfs -l su=8b "${dev}" | ||
fi | ||
if [[ ! -z "$(lsblk "${dev}" -o MOUNTPOINT --noheadings)" ]]; then | ||
echo "${dev} is already mounted." | ||
continue | ||
fi | ||
local mount_point="${MNT_DIR}/${idx}" | ||
local mount_unit_name="$(systemd-escape --path --suffix=mount "${mount_point}")" | ||
mkdir -p "${mount_point}" | ||
cat > "/etc/systemd/system/${mount_unit_name}" << EOF | ||
[Unit] | ||
Description=Mount EC2 Instance Store NVMe disk ${idx} | ||
[Mount] | ||
What=${dev} | ||
Where=${mount_point} | ||
Type=xfs | ||
Options=defaults,noatime | ||
[Install] | ||
WantedBy=multi-user.target | ||
EOF | ||
systemd-analyze verify "${mount_unit_name}" | ||
systemctl enable "${mount_unit_name}" --now | ||
idx=$((idx + 1)) | ||
done | ||
} | ||
|
||
## Main logic | ||
MNT_DIR="/mnt/k8s-disks" | ||
|
||
while [[ $# -gt 0 ]]; do | ||
key="$1" | ||
case $key in | ||
-h | --help) | ||
print_help | ||
exit 0 | ||
;; | ||
-d | --dir) | ||
MNT_DIR="$2" | ||
shift | ||
shift | ||
;; | ||
*) # unknown option | ||
POSITIONAL+=("$1") # save it in an array for later | ||
shift # past argument | ||
;; | ||
esac | ||
done | ||
|
||
set +u | ||
set -- "${POSITIONAL[@]}" # restore positional parameters | ||
DISK_SETUP="$1" | ||
set -u | ||
|
||
if [[ "${DISK_SETUP}" != "raid0" && "${DISK_SETUP}" != "mount" ]]; then | ||
echo "Valid disk setup options are: raid0 or mount" | ||
exit 1 | ||
fi | ||
|
||
disks=($(find -L /dev/disk/by-id/ -xtype l -name '*NVMe_Instance_Storage_*')) | ||
## Bail early if there are no ephemeral disks to setup | ||
if [[ "${#disks[@]}" -eq 0 ]]; then | ||
echo "no ephemeral disks found, skipping disk setup" | ||
exit 0 | ||
fi | ||
|
||
if [ "$(id --user)" -ne 0 ]; then | ||
echo "Must be run as root" | ||
exit 1 | ||
fi | ||
|
||
## Get devices of NVMe instance storage ephemeral disks | ||
EPHEMERAL_DISKS=($(realpath "${disks[@]}" | sort -u)) | ||
|
||
case "${DISK_SETUP}" in | ||
"raid0") | ||
maybe_raid0 | ||
echo "Successfully setup RAID-0 consisting of ${EPHEMERAL_DISKS[@]}" | ||
;; | ||
"mount") | ||
maybe_mount | ||
echo "Successfully setup disk mounts consisting of ${EPHEMERAL_DISKS[@]}" | ||
;; | ||
esac |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters