From 97bab65a1dc17a4e4223a3988b26f2a9f1b15d88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reinhard=20N=C3=A4gele?= Date: Thu, 1 Feb 2018 15:31:10 +0100 Subject: [PATCH] Improve readiness check (#3509) This fixes flaky readiness checks by double-checking readiness. It could happen that the script deemed all pods ready when at the moment the check is executed only ready pods are reported but not all replicas are running yet. --- test/verify-release.sh | 53 ++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/test/verify-release.sh b/test/verify-release.sh index 2bcf6f597d17..c57ddbcff313 100755 --- a/test/verify-release.sh +++ b/test/verify-release.sh @@ -1,54 +1,73 @@ -#!/bin/bash -xe +#!/bin/bash set -o errexit set -o nounset set -o pipefail set -o xtrace -NAMESPACE=$1 -if [ -z $NAMESPACE ];then +NAMESPACE="${1:-}" +if [[ -z "$NAMESPACE" ]];then echo "ERROR: No namespace specified" exit 1 fi # Ensure all pods in the namespace entered a Running state -SUCCESS=0 PODS_FOUND=0 POD_RETRY_COUNT=0 RETRY=54 RETRY_DELAY=10 -while [ "$POD_RETRY_COUNT" -lt "$RETRY" ]; do - POD_STATUS=`kubectl get pods --no-headers --namespace $NAMESPACE` - if [ -z "$POD_STATUS" ];then + +while (("$POD_RETRY_COUNT" < "$RETRY")); do + POD_STATUS=$(kubectl get pods --no-headers --namespace "$NAMESPACE") + + if [[ -z "$POD_STATUS" ]];then echo "INFO: No pods found for this release, retrying after sleep" - POD_RETRY_COUNT=$((POD_RETRY_COUNT+1)) - sleep $RETRY_DELAY + POD_RETRY_COUNT=$((POD_RETRY_COUNT + 1)) + + sleep "$RETRY_DELAY" continue else PODS_FOUND=1 fi - if ! echo "$POD_STATUS" | grep -v Running;then + if ! echo "$POD_STATUS" | grep -v Running; then echo "INFO: All pods entered the Running state" CONTAINER_RETRY_COUNT=0 - while [ "$CONTAINER_RETRY_COUNT" -lt "$RETRY" ]; do - UNREADY_CONTAINERS=`kubectl get pods --namespace $NAMESPACE \ - -o jsonpath="{.items[*].status.containerStatuses[?(@.ready!=true)].name}"` - if [ -n "$UNREADY_CONTAINERS" ];then + READINESS_RETRY_COUNT=0 + READINESS_RETRY_DELAY=2 + + while (("$CONTAINER_RETRY_COUNT" < "$RETRY")); do + JSON_PATH="{.items[*].status.containerStatuses[?(@.ready!=true)].name}" + UNREADY_CONTAINERS=$(kubectl get pods --namespace "$NAMESPACE" -o "jsonpath=$JSON_PATH") + + if [[ -n "$UNREADY_CONTAINERS" ]]; then echo "INFO: Some containers are not yet ready; retrying after sleep" - CONTAINER_RETRY_COUNT=$((CONTAINER_RETRY_COUNT+1)) - sleep $RETRY_DELAY + + CONTAINER_RETRY_COUNT=$((CONTAINER_RETRY_COUNT + 1)) + READINESS_RETRY_COUNT=0 + + sleep "$RETRY_DELAY" continue else echo "INFO: All containers are ready" + + if (("$READINESS_RETRY_COUNT" < 3)); then + echo "INFO: Double-checking readiness again" + + READINESS_RETRY_COUNT=$((READINESS_RETRY_COUNT + 1)) + + sleep "$READINESS_RETRY_DELAY" + continue + fi + exit 0 fi done fi done -if [ "$PODS_FOUND" -eq 0 ];then +if (("$PODS_FOUND" == 0)); then echo "WARN: No pods launched by this chart's default settings" exit 0 else