From 9991d12c52d57c4cf089c9a6c7fa9116ab4be2a6 Mon Sep 17 00:00:00 2001 From: Seshachalam Yerasala Venkata <104052572+seshachalam-yv@users.noreply.github.com> Date: Thu, 16 Feb 2023 20:24:08 +0530 Subject: [PATCH] Update `kubectl wait` command to handle multiple conditions correctly (#7439) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix kubectl wait command to handle multiple conditions correctly * Add bash script for checking resource conditions This script waits until all conditions are passed for a given resource in a kubernetes cluster. It takes the resource type, object name, and a list of conditions as arguments. * Enable featureGate ExpandedDNSConfig for provider local Kind cluster Apply suggestions from code review Co-authored-by: Oliver Götz <47362717+oliver-goetz@users.noreply.github.com> * Update hack/usage/wait.sh Co-authored-by: Rafael Franzke * Addressed review feeback * Update example/gardener-local/kind/cluster/templates/cluster.yaml Co-authored-by: Rafael Franzke * Fix typo * Increase retry_limit from 180 to 240 for multi-zone * `RETRY_LIMIT` has been increased to 240 (20 minutes) due to the upgrading of Gardener for seed. * In a single-zone setup, 3 istio-ingressgateway pods will be running, and it will take 9 minutes to complete the rollout. * In a multi-zone setup, 6 istio-ingressgateway pods will be running, and it will take 18 minutes to complete the rollout. * refactor: Optimize bash script and added logs messages with timesdstamp - Removed probes(rediness, liveness) for zero-downtime validator job * Addressed review feeback - Replace RETRY_LIMIT with TIMEOUT in wait-for script * use TIMEOUT instead of RETRY_LIMIT hack/ci-e2e-kind-upgrade.sh * Apply suggestions from code review Co-authored-by: Johannes Scheerer --------- Co-authored-by: Oliver Götz <47362717+oliver-goetz@users.noreply.github.com> Co-authored-by: Rafael Franzke Co-authored-by: Johannes Scheerer --- docs/deployment/getting_started_locally.md | 6 +- docs/development/getting_started_locally.md | 4 +- .../kind/cluster/templates/cluster.yaml | 5 ++ hack/ci-e2e-kind-upgrade.sh | 14 ++-- hack/usage/wait-for.sh | 79 +++++++++++++++++++ skaffold.yaml | 61 ++------------ .../shoots/update/highavailability/upgrade.go | 57 ++++--------- 7 files changed, 116 insertions(+), 110 deletions(-) create mode 100755 hack/usage/wait-for.sh diff --git a/docs/deployment/getting_started_locally.md b/docs/deployment/getting_started_locally.md index 4f1b44aa504..9eed63c073d 100644 --- a/docs/deployment/getting_started_locally.md +++ b/docs/deployment/getting_started_locally.md @@ -97,7 +97,7 @@ Afterwards, the Gardener resources will be deployed into the cluster. You can wait for the `Seed` to be ready by running: ```bash -kubectl wait --for=condition=gardenletready --for=condition=extensionsready --for=condition=bootstrapped seed local --timeout=5m +./hack/usage/wait-for.sh seed local GardenletReady Bootstrapped SeedSystemComponentsHealthy ExtensionsReady ``` Alternatively, you can run `kubectl get seed local` and wait for the `STATUS` to indicate readiness: @@ -116,7 +116,7 @@ kubectl apply -f example/provider-local/shoot.yaml You can wait for the `Shoot` to be ready by running: ```bash -kubectl wait --for=condition=apiserveravailable --for=condition=controlplanehealthy --for=condition=observabilitycomponentshealthy --for=condition=everynodeready --for=condition=systemcomponentshealthy shoot local -n garden-local --timeout=10m +NAMESPACE=garden-local ./hack/usage/wait-for.sh shoot APIServerAvailable ControlPlaneHealthy ObservabilityComponentsHealthy EveryNodeReady SystemComponentsHealthy ``` Alternatively, you can run `kubectl -n garden-local get shoot local` and wait for the `LAST OPERATION` to reach `100%`: @@ -193,7 +193,7 @@ The following steps assume that you are using the kubeconfig that points to the You can wait for the `local2` `Seed` to be ready by running: ```bash -kubectl wait --for=condition=gardenletready --for=condition=extensionsready --for=condition=bootstrapped seed local2 --timeout=5m +./hack/usage/wait-for.sh seed local2 GardenletReady Bootstrapped ExtensionsReady ``` Alternatively, you can run `kubectl get seed local2` and wait for the `STATUS` to indicate readiness: diff --git a/docs/development/getting_started_locally.md b/docs/development/getting_started_locally.md index 5aaac41a7c3..79c0704521a 100644 --- a/docs/development/getting_started_locally.md +++ b/docs/development/getting_started_locally.md @@ -142,7 +142,7 @@ make start-extension-provider-local # You can wait for the `Seed` to become ready by running: ```bash -kubectl wait --for=condition=gardenletready --for=condition=extensionsready --for=condition=bootstrapped seed local --timeout=5m +./hack/usage/wait-for.sh seed local GardenletReady Bootstrapped SeedSystemComponentsHealthy ExtensionsReady ``` Alternatively, you can run `kubectl get seed local` and wait for the `STATUS` to indicate readiness: @@ -161,7 +161,7 @@ kubectl apply -f example/provider-local/shoot.yaml You can wait for the `Shoot` to be ready by running: ```bash -kubectl wait --for=condition=apiserveravailable --for=condition=controlplanehealthy --for=condition=observabilitycomponentshealthy --for=condition=everynodeready --for=condition=systemcomponentshealthy shoot local -n garden-local --timeout=10m +NAMESPACE=garden-local ./hack/usage/wait-for.sh shoot local APIServerAvailable ControlPlaneHealthy ObservabilityComponentsHealthy EveryNodeReady SystemComponentsHealthy ``` Alternatively, you can run `kubectl -n garden-local get shoot local` and wait for the `LAST OPERATION` to reach `100%`: diff --git a/example/gardener-local/kind/cluster/templates/cluster.yaml b/example/gardener-local/kind/cluster/templates/cluster.yaml index ed50ce75b9b..a6116cc624c 100644 --- a/example/gardener-local/kind/cluster/templates/cluster.yaml +++ b/example/gardener-local/kind/cluster/templates/cluster.yaml @@ -1,5 +1,10 @@ apiVersion: kind.x-k8s.io/v1alpha4 kind: Cluster +featureGates: +# Introduced due to the issue outlined in the following link: https://github.com/gardener/gardener/issues/7297#issuecomment-1377515385." +# Feature gate 'ExpandedDNSConfig' will be true by default from k8s version 1.26. +# Remove this once kind cluster version is upgraded to 1.26 + ExpandedDNSConfig: true nodes: - role: control-plane image: {{ .Values.image }} diff --git a/hack/ci-e2e-kind-upgrade.sh b/hack/ci-e2e-kind-upgrade.sh index 1d18892e735..008a276c54b 100755 --- a/hack/ci-e2e-kind-upgrade.sh +++ b/hack/ci-e2e-kind-upgrade.sh @@ -170,12 +170,6 @@ function set_seed_name() { esac } -function wait_until_seed_gets_upgraded() { - echo "Wait until seed gets upgraded from version '$GARDENER_PREVIOUS_RELEASE' to '$GARDENER_NEXT_RELEASE'" - kubectl wait seed $1 --timeout=5m \ - --for=jsonpath='{.status.gardener.version}'=$GARDENER_NEXT_RELEASE && condition=gardenletready && condition=extensionsready && condition=bootstrapped -} - clamp_mss_to_pmtu set_gardener_upgrade_version_env_variables set_cluster_name @@ -203,7 +197,13 @@ make test-pre-upgrade GARDENER_PREVIOUS_RELEASE=$GARDENER_PREVIOUS_RELEASE GARDE echo "Upgrading gardener version '$GARDENER_PREVIOUS_RELEASE' to '$GARDENER_NEXT_RELEASE'" upgrade_to_next_release -wait_until_seed_gets_upgraded "$SEED_NAME" + +echo "Wait until seed '$SEED_NAME' gets upgraded from version '$GARDENER_PREVIOUS_RELEASE' to '$GARDENER_NEXT_RELEASE'" +kubectl wait seed $SEED_NAME --timeout=5m --for=jsonpath="{.status.gardener.version}=$GARDENER_NEXT_RELEASE" +# TIMEOUT has been increased to 1200 (20 minutes) due to the upgrading of Gardener for seed. +# In a single-zone setup, 2 istio-ingressgateway pods will be running, and it will take 9 minutes to complete the rollout. +# In a multi-zone setup, 6 istio-ingressgateway pods will be running, and it will take 18 minutes to complete the rollout. +TIMEOUT=1200 ./hack/usage/wait-for.sh seed "$SEED_NAME" GardenletReady Bootstrapped SeedSystemComponentsHealthy ExtensionsReady BackupBucketsReady echo "Running gardener post-upgrade tests" make test-post-upgrade GARDENER_PREVIOUS_RELEASE=$GARDENER_PREVIOUS_RELEASE GARDENER_NEXT_RELEASE=$GARDENER_NEXT_RELEASE diff --git a/hack/usage/wait-for.sh b/hack/usage/wait-for.sh new file mode 100755 index 00000000000..bc61876cd6d --- /dev/null +++ b/hack/usage/wait-for.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2023 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v. 2 except as noted otherwise in the LICENSE file +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script waits until all conditions are passed for a given resource in a kubernetes cluster. +# It takes the resource type, object name, and a list of conditions as arguments +set -euo pipefail + +if [ "$#" -lt 3 ]; then + echo "Usage: $0 ... +Note: Namespace/Timeout will be used from the 'NAMESPACE'/'TIMEOUT' environment variable if set, otherwise it is optional. + TIMEOUT: The operation will be retried until the timeout[default 600 seconds] is reached, with a 5 second sleep interval between each retry. +" + exit 1 +fi + +RESOURCE_TYPE=$1 +OBJECT_NAME=$2 +shift 2 +CONDITIONS=("$@") +NAMESPACE=${NAMESPACE:-} + +# The number of retries before failing +TIMEOUT=${TIMEOUT:-600} + +# The interval between each retry in seconds +SLEEP_INTERVAL=${SLEEP_INTERVAL:-5} + +RED='\033[0;31m' +GREEN='\033[0;32m' +NO_COLOR='\033[0m' + +echo "⏳ Checking conditions for ${RESOURCE_TYPE}/${OBJECT_NAME}..." +retries=0 +while [ "${retries}" -lt "${TIMEOUT}" ]; do + if [ -z "$NAMESPACE" ]; then + # Get the condition types in jsonpath format and pipe to yq to extract the value of conditions + CONDITION_STATES=$(kubectl get "${RESOURCE_TYPE}" "${OBJECT_NAME}" --request-timeout='1s' -o json | yq '.status.conditions') + else + # Get the condition types in jsonpath format and pipe to yq to extract the value of conditions + CONDITION_STATES=$(kubectl get "${RESOURCE_TYPE}" "${OBJECT_NAME}" -n "$NAMESPACE" --request-timeout='1s' -o json | yq '.status.conditions') + fi + + # A flag to indicate if all conditions have passed + ALL_PASSED=true + # Iterate through each condition + for condition in "${CONDITIONS[@]}"; do + if ! echo "${CONDITION_STATES}" | yq -e '.[] | select(.type == "'"${condition}"'").status == "True"' &>/dev/null; then + ALL_PASSED=false + break + fi + done + + # If all conditions have passed, break the loop + if [ "${ALL_PASSED}" = true ]; then + echo -e "${GREEN}✅ All conditions passed for ${RESOURCE_TYPE}/${OBJECT_NAME}.${NO_COLOR}" + break + fi + + retries=$((retries + SLEEP_INTERVAL)) + sleep "${SLEEP_INTERVAL}" +done + +if [ "${retries}" -ge "${TIMEOUT}" ]; then + echo -e "${RED}❌ ERROR: ${condition} not met for ${RESOURCE_TYPE}/${OBJECT_NAME} after ${TIMEOUT} seconds.${NO_COLOR}" + exit 1 +fi diff --git a/skaffold.yaml b/skaffold.yaml index c1e76ad61c8..a902a1ff17f 100644 --- a/skaffold.yaml +++ b/skaffold.yaml @@ -1117,18 +1117,7 @@ deploy: command: - bash - -ec - - | - echo "Wait until seed is ready" - for i in `seq 1 30`; - do - if kubectl get seed local 2> /dev/null; then - break - fi - echo "Wait until seed gets created by gardenlet" - sleep 2 - done - kubectl wait --for=condition=gardenletready --for=condition=extensionsready --for=condition=bootstrapped \ - --for=condition=seedsystemcomponentshealthy --for=condition=backupbucketsready seed local --timeout=5m + - hack/usage/wait-for.sh seed local GardenletReady Bootstrapped SeedSystemComponentsHealthy ExtensionsReady BackupBucketsReady releases: - name: gardener-gardenlet chartPath: charts/gardener/gardenlet @@ -1164,17 +1153,7 @@ profiles: value: - bash - -ec - - | - echo "Wait until seed is ready" - for i in `seq 1 30`; - do - if kubectl --kubeconfig=$GARDENER_LOCAL_KUBECONFIG get seed local2 2> /dev/null; then - break - fi - echo "Wait until seed gets created by gardenlet" - sleep 2 - done - kubectl --kubeconfig=$GARDENER_LOCAL_KUBECONFIG wait --for=condition=gardenletready --for=condition=extensionsready --for=condition=bootstrapped seed local2 --timeout=5m + - KUBECONFIG=$GARDENER_LOCAL_KUBECONFIG hack/usage/wait-for.sh seed local2 GardenletReady Bootstrapped SeedSystemComponentsHealthy ExtensionsReady BackupBucketsReady - op: add path: /deploy/helm/releases/0/valuesFiles/- value: example/gardener-local/gardenlet/values-kind2.yaml @@ -1195,17 +1174,7 @@ profiles: value: - bash - -ec - - | - echo "Wait until seed is ready" - for i in `seq 1 60`; - do - if kubectl --kubeconfig=$GARDENER_LOCAL_KUBECONFIG get seed $SEED_NAME 2> /dev/null; then - break - fi - echo "Wait until seed gets created by gardenlet" - sleep 2 - done - kubectl --kubeconfig=$GARDENER_LOCAL_KUBECONFIG wait --for=condition=gardenletready --for=condition=bootstrapped seed $SEED_NAME --timeout=5m + - KUBECONFIG=$GARDENER_LOCAL_KUBECONFIG hack/usage/wait-for.sh seed "$SEED_NAME" GardenletReady Bootstrapped SeedSystemComponentsHealthy ExtensionsReady - op: add path: /deploy/helm/releases/0/valuesFiles/- value: example/gardener-local/gardenlet/values-provider-extensions.yaml @@ -1236,17 +1205,7 @@ profiles: value: - bash - -ec - - | - echo "Wait until seed is ready" - for i in `seq 1 30`; - do - if kubectl --kubeconfig=$GARDENER_LOCAL_HA_SINGLE_ZONE_KUBECONFIG get seed local-ha-single-zone 2> /dev/null; then - break - fi - echo "Wait until seed gets created by gardenlet" - sleep 2 - done - kubectl --kubeconfig=$GARDENER_LOCAL_HA_SINGLE_ZONE_KUBECONFIG wait --for=condition=gardenletready --for=condition=extensionsready --for=condition=bootstrapped seed local-ha-single-zone --timeout=5m + - hack/usage/wait-for.sh seed local-ha-single-zone GardenletReady Bootstrapped SeedSystemComponentsHealthy ExtensionsReady BackupBucketsReady - name: ha-multi-zone patches: @@ -1258,14 +1217,4 @@ profiles: value: - bash - -ec - - | - echo "Wait until seed is ready" - for i in `seq 1 30`; - do - if kubectl --kubeconfig=$GARDENER_LOCAL_HA_MULTI_ZONE_KUBECONFIG get seed local-ha-multi-zone 2> /dev/null; then - break - fi - echo "Wait until seed gets created by gardenlet" - sleep 2 - done - kubectl --kubeconfig=$GARDENER_LOCAL_HA_MULTI_ZONE_KUBECONFIG wait --for=condition=gardenletready --for=condition=extensionsready --for=condition=bootstrapped seed local-ha-multi-zone --timeout=5m + - TIMEOUT=1200 hack/usage/wait-for.sh seed local-ha-multi-zone GardenletReady Bootstrapped SeedSystemComponentsHealthy ExtensionsReady BackupBucketsReady diff --git a/test/utils/shoots/update/highavailability/upgrade.go b/test/utils/shoots/update/highavailability/upgrade.go index e7257e813c9..d1a8ca0086e 100644 --- a/test/utils/shoots/update/highavailability/upgrade.go +++ b/test/utils/shoots/update/highavailability/upgrade.go @@ -145,48 +145,21 @@ func DeployZeroDownTimeValidatorJob(ctx context.Context, c client.Client, testNa Spec: corev1.PodSpec{ Containers: []corev1.Container{ { - Name: "validator", - Image: "alpine/curl", - Command: []string{"/bin/sh"}, - - //To avoid flakiness, consider downtime when curl fails consecutively back-to-back. - Args: []string{"-ec", - "echo '" + - "failed=0 ; threshold=2 ; " + - "while [ $failed -lt $threshold ] ; do " + - "$(curl -k https://kube-apiserver/healthz -H \"Authorization: " + token + "\" -s -f -o /dev/null ); " + - "if [ $? -gt 0 ] ; then let failed++; echo \"etcd is unhealthy and retrying\"; continue; fi ; " + - "echo \"kube-apiserver is healthy\"; touch /tmp/healthy; let failed=0; " + - "sleep 1; done; echo \"kube-apiserver is unhealthy\"; exit 1;" + - "' > test.sh && sh test.sh", - }, - ReadinessProbe: &corev1.Probe{ - InitialDelaySeconds: int32(5), - FailureThreshold: int32(2), - PeriodSeconds: int32(1), - SuccessThreshold: int32(3), - ProbeHandler: corev1.ProbeHandler{ - Exec: &corev1.ExecAction{ - Command: []string{ - "cat", - "/tmp/healthy", - }, - }, - }, - }, - LivenessProbe: &corev1.Probe{ - InitialDelaySeconds: int32(5), - FailureThreshold: int32(2), - PeriodSeconds: int32(1), - ProbeHandler: corev1.ProbeHandler{ - Exec: &corev1.ExecAction{ - Command: []string{ - "cat", - "/tmp/healthy", - }, - }, - }, - }, + Name: "validator", + Image: "alpine/curl", + Command: []string{"/bin/sh", "-ec", + //To avoid flakiness, consider downtime when curl fails consecutively back-to-back three times. + "failed=0; threshold=3; " + + "while [ $failed -lt $threshold ]; do " + + "if curl -m 2 -k https://kube-apiserver/healthz -H 'Authorization: " + token + "' -s -f -o /dev/null ; then " + + "echo $(date +'%Y-%m-%dT%H:%M:%S.%3N%z') INFO: kube-apiserver is healthy.; failed=0; " + + "else failed=$((failed+1)); " + + "echo $(date +'%Y-%m-%dT%H:%M:%S.%3N%z') ERROR: kube-apiserver is unhealthy and retrying.; " + + "fi; " + + "sleep 1; " + + "done; " + + "echo $(date +'%Y-%m-%dT%H:%M:%S.%3N%z') ERROR: kube-apiserver is still unhealthy after $failed attempts. Considered as downtime.; " + + "exit 1; "}, }, }, RestartPolicy: corev1.RestartPolicyNever,