Skip to content

Commit

Permalink
feat: add greenboot default healthcheck
Browse files Browse the repository at this point in the history
greenboot default healthcheck provides a set of health-checks.

Signed-off-by: Sayan Paul <saypaul@redhat.com>
  • Loading branch information
say-paul committed Oct 3, 2023
1 parent 2d8e5ce commit 11f155b
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 0 deletions.
56 changes: 56 additions & 0 deletions usr/lib/greenboot/check/required.d/01_repository_dns_check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/bin/bash
set -e

REPOS_DIRECTORY=/etc/ostree/remotes.d
DOMAINS_WITH_PROBLEMS=()

get_domain_names_from_platform_urls() {
DOMAIN_NAMES=$(grep -P -ho 'http[s]?\:\/\/[a-zA-Z0-9./-]+' $REPOS_DIRECTORY/* \
| grep -v -P '.*[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' \
| awk -F:// '{print $2}' \
| awk -F/ 'BEGIN{OFS="\n"}{print $1}' \
| sort | uniq)
if [[ -z $DOMAIN_NAMES ]]; then
echo "No domain names have been found"
fi
}

get_dns_resolution_from_domain_names() {
# Check if each domain name resolves into at least 1 IP
# If it doesn't, add it to DOMAINS_WITH_PROBLEMS
for line in $DOMAIN_NAMES; do
NUMBER_OF_IPS_PER_DOMAIN=$(getent hosts "$line" | wc -l)
if [[ $NUMBER_OF_IPS_PER_DOMAIN -eq 0 ]]; then
DOMAINS_WITH_PROBLEMS+=( "$line" )
fi
done
}

assert_dns_resolution_result() {
# If the number of domains with problems is 0, everything's good
# If it's not 0, we exit with errors and print the domains
if [[ ${#DOMAINS_WITH_PROBLEMS[@]} -eq 0 ]]; then
echo "All domains have resolved correctly"
exit 0
else
echo "The following repository domains haven't responded properly to DNS queries:"
echo "${DOMAINS_WITH_PROBLEMS[*]}"
exit 1
fi
}

if [[ ! -d $REPOS_DIRECTORY ]]; then
echo "${REPOS_DIRECTORY} doesn't exist"
exit 1
fi

if [ -z "$(ls -A $REPOS_DIRECTORY)" ]; then
echo "${REPOS_DIRECTORY} is empty, skipping check"
exit 0
fi

get_domain_names_from_platform_urls
if [[ -n $DOMAIN_NAMES ]]; then
get_dns_resolution_from_domain_names
assert_dns_resolution_result
fi
70 changes: 70 additions & 0 deletions usr/lib/greenboot/check/required.d/02_watchdog.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/bin/bash
set -eo pipefail

source_configuration_file() {
GREENBOOT_CONFIGURATION_FILE=/etc/greenboot/greenboot.conf
if test -f "$GREENBOOT_CONFIGURATION_FILE"; then
# shellcheck source=etc/greenboot/greenboot.conf
source $GREENBOOT_CONFIGURATION_FILE
fi
}

set_grace_period() {
DEFAULT_GRACE_PERIOD=24 # default to 24 hours

if [ -n "$GREENBOOT_WATCHDOG_GRACE_PERIOD" ]; then
GRACE_PERIOD=$GREENBOOT_WATCHDOG_GRACE_PERIOD
else
GRACE_PERIOD=$DEFAULT_GRACE_PERIOD
fi
}

check_if_there_is_a_watchdog() {
if wdctl 2>/dev/null ; then
return 0
else
return 1
fi
}

check_if_current_boot_is_wd_triggered() {
if check_if_there_is_a_watchdog ; then
WDCTL_OUTPUT=$(wdctl --flags-only --noheadings | grep -c '1$' || true)
if [ "$WDCTL_OUTPUT" -gt 0 ]; then
# This means the boot was watchdog triggered
# TO-DO: maybe do a rollback here?
echo "Watchdog triggered after recent update"
exit 1
fi
else
# There's no watchdog, so nothing to be done here
exit 0
fi
}

# This is in order to test check_if_current_boot_is_wd_triggered
# function within a container
if [ "${1}" != "--source-only" ]; then
if ! check_if_there_is_a_watchdog ; then
echo "No watchdog on the system, skipping check"
exit 0
fi

source_configuration_file
if [ "${GREENBOOT_WATCHDOG_CHECK_ENABLED,,}" != "true" ]; then
echo "Watchdog check is disabled"
exit 0
fi

set_grace_period

SECONDS_IN_AN_HOUR=$((60 * 60))
LAST_DEPLOYMENT_TIMESTAMP=$(rpm-ostree status --json | jq .deployments[0].timestamp)

HOURS_SINCE_LAST_UPDATE=$((($(date +%s) - "$LAST_DEPLOYMENT_TIMESTAMP") / SECONDS_IN_AN_HOUR))
if [ "$HOURS_SINCE_LAST_UPDATE" -lt "$GRACE_PERIOD" ]; then
check_if_current_boot_is_wd_triggered
else
exit 0
fi
fi
38 changes: 38 additions & 0 deletions usr/lib/greenboot/check/wanted.d/01_update_platforms_check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash
set -e

REPOS_DIRECTORY=/etc/ostree/remotes.d
URLS_WITH_PROBLEMS=()

get_update_platform_urls() {
mapfile -t UPDATE_PLATFORM_URLS < <(grep -P -ho 'http[s]?.*' "${REPOS_DIRECTORY}"/*)
if [[ ${#UPDATE_PLATFORM_URLS[@]} -eq 0 ]]; then
echo "No update platforms found, this can be a mistake"
exit 1
fi
}

assert_update_platforms_are_responding() {
for UPDATE_PLATFORM_URL in "${UPDATE_PLATFORM_URLS[@]}"; do
HTTP_STATUS=$(curl -o /dev/null -Isw '%{http_code}\n' "$UPDATE_PLATFORM_URL" || echo "Unreachable")
if ! [[ $HTTP_STATUS == 2* ]] && ! [[ $HTTP_STATUS == 3* ]]; then
URLS_WITH_PROBLEMS+=( "$UPDATE_PLATFORM_URL" )
fi
done
if [[ ${#URLS_WITH_PROBLEMS[@]} -eq 0 ]]; then
echo "We can connect to all update platforms"
exit 0
else
echo "There are problems connecting with the following URLs:"
echo "${URLS_WITH_PROBLEMS[*]}"
exit 1
fi
}

if [[ ! -d $REPOS_DIRECTORY ]]; then
echo "${REPOS_DIRECTORY} doesn't exist"
exit 1
fi

get_update_platform_urls
assert_update_platforms_are_responding

0 comments on commit 11f155b

Please sign in to comment.