Skip to content

Commit

Permalink
add capi update job
Browse files Browse the repository at this point in the history
  • Loading branch information
heliubj18 committed Jul 23, 2024
1 parent 03ac3bf commit e8cc4d6
Show file tree
Hide file tree
Showing 15 changed files with 474 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,19 @@ tests:
test:
- chain: cucushift-installer-check-cluster-health
workflow: osd-ccs-aws
- as: aws-rosa-hcp-capi-upgrade-f14
cron: 20 19 2,18 * *
steps:
cluster_profile: aws-qe
env:
BASE_DOMAIN: qe.devcluster.openshift.com
OPENSHIFT_VERSION: "4.16"
TEST_FILTERS: ~ChkUpgrade&;~NonPreRelease&;~Serial&;~Disruptive&;~DisconnectedOnly&;~HyperShiftMGMT&;~MicroShiftOnly&;~NonHyperShiftHOST&;ROSA&
TEST_IMPORTANCE: Critical
test:
- ref: openshift-extended-test
- ref: openshift-e2e-test-qe-report
workflow: rosa-aws-sts-hcp-capi-upgrade
- as: aws-rosa-hcp-byo-kms-oidc-auditlog-stage-critical-f14
cron: 37 6 8,22 * *
steps:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39707,6 +39707,93 @@ periodics:
- name: result-aggregator
secret:
secretName: result-aggregator
- agent: kubernetes
cluster: build05
cron: 20 19 2,18 * *
decorate: true
decoration_config:
skip_cloning: true
extra_refs:
- base_ref: release-4.16
org: openshift
repo: openshift-tests-private
labels:
ci-operator.openshift.io/cloud: aws
ci-operator.openshift.io/cloud-cluster-profile: aws-qe
ci-operator.openshift.io/variant: amd64-stable
ci.openshift.io/generator: prowgen
pj-rehearse.openshift.io/can-be-rehearsed: "true"
name: periodic-ci-openshift-openshift-tests-private-release-4.16-amd64-stable-aws-rosa-hcp-capi-upgrade-f14
spec:
containers:
- args:
- --gcs-upload-secret=/secrets/gcs/service-account.json
- --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson
- --lease-server-credentials-file=/etc/boskos/credentials
- --oauth-token-path=/usr/local/github-credentials/oauth
- --report-credentials-file=/etc/report/credentials
- --secret-dir=/secrets/ci-pull-credentials
- --secret-dir=/usr/local/aws-rosa-hcp-capi-upgrade-f14-cluster-profile
- --target=aws-rosa-hcp-capi-upgrade-f14
- --variant=amd64-stable
command:
- ci-operator
image: ci-operator:latest
imagePullPolicy: Always
name: ""
resources:
requests:
cpu: 10m
volumeMounts:
- mountPath: /etc/boskos
name: boskos
readOnly: true
- mountPath: /secrets/ci-pull-credentials
name: ci-pull-credentials
readOnly: true
- mountPath: /usr/local/aws-rosa-hcp-capi-upgrade-f14-cluster-profile
name: cluster-profile
- mountPath: /secrets/gcs
name: gcs-credentials
readOnly: true
- mountPath: /usr/local/github-credentials
name: github-credentials-openshift-ci-robot-private-git-cloner
readOnly: true
- mountPath: /secrets/manifest-tool
name: manifest-tool-local-pusher
readOnly: true
- mountPath: /etc/pull-secret
name: pull-secret
readOnly: true
- mountPath: /etc/report
name: result-aggregator
readOnly: true
serviceAccountName: ci-operator
volumes:
- name: boskos
secret:
items:
- key: credentials
path: credentials
secretName: boskos-credentials
- name: ci-pull-credentials
secret:
secretName: ci-pull-credentials
- name: cluster-profile
secret:
secretName: cluster-secrets-aws-qe
- name: github-credentials-openshift-ci-robot-private-git-cloner
secret:
secretName: github-credentials-openshift-ci-robot-private-git-cloner
- name: manifest-tool-local-pusher
secret:
secretName: manifest-tool-local-pusher
- name: pull-secret
secret:
secretName: registry-pull-credentials
- name: result-aggregator
secret:
secretName: result-aggregator
- agent: kubernetes
cluster: build05
cron: 31 11 13,27 * *
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ metadata:
name: "${CLUSTER_NAME}-pool-0"
spec:
clusterName: "${CLUSTER_NAME}"
replicas: 1
replicas: ${MACHINEPOOL_REPLICAS}
template:
spec:
clusterName: "${CLUSTER_NAME}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ ref:
- name: MAX_REPLICAS
default: ""
documentation: The max number of the hcp worker nodes. Autoscaling min/max value must be equal or multiple of the availability zones count. The value must not less that the min_replica.
- name: MACHINEPOOL_REPLICAS
default: "1"
documentation: machinepool replicas
- name: ADDITIONAL_TAGS
default: ""
documentation: Apply user defined tags to all resources created by CAPI ROSA HCP. Tags are comma separated example - 'foo:bar,bar:baz', The default value is "capi-prow-ci:${CLUSTER_NAME}".
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
approvers:
- LiangquanLi930
- heliubj18
- fxierh
reviewers:
- LiangquanLi930
- heliubj18
- fxierh
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#!/bin/bash

set -euo pipefail

function set_proxy () {
if test -s "${SHARED_DIR}/proxy-conf.sh" ; then
echo "setting the proxy"
# cat "${SHARED_DIR}/proxy-conf.sh"
echo "source ${SHARED_DIR}/proxy-conf.sh"
source "${SHARED_DIR}/proxy-conf.sh"
else
echo "no proxy setting."
fi
}

function rosa_login() {
# ROSA_VERSION=$(rosa version)
ROSA_TOKEN=$(cat "${CLUSTER_PROFILE_DIR}/ocm-token")

if [[ ! -z "${ROSA_TOKEN}" ]]; then
echo "Logging into ${OCM_LOGIN_ENV} with offline token using rosa cli"
rosa login --env "${OCM_LOGIN_ENV}" --token "${ROSA_TOKEN}"
ocm login --url "${OCM_LOGIN_ENV}" --token "${ROSA_TOKEN}"
else
echo "Cannot login! You need to specify the offline token ROSA_TOKEN!"
exit 1
fi
}

function find_openshift_version() {
# Get the openshift version
CHANNEL_GROUP=stable
version_cmd="rosa list versions --hosted-cp --channel-group ${CHANNEL_GROUP} -o json"
if [[ ${AVAILABLE_UPGRADE} == "yes" ]] ; then
version_cmd="$version_cmd | jq -r '.[] | select(.available_upgrades!=null) .raw_id'"
else
version_cmd="$version_cmd | jq -r '.[].raw_id'"
fi
versionList=$(eval $version_cmd)
echo -e "Available cluster versions:\n${versionList}"

if [[ -z "$OPENSHIFT_VERSION" ]]; then
OPENSHIFT_VERSION=$(echo "$versionList" | head -1)
elif [[ $OPENSHIFT_VERSION =~ ^[0-9]+\.[0-9]+$ ]]; then
OPENSHIFT_VERSION=$(echo "$versionList" | grep -E "^${OPENSHIFT_VERSION}" | head -1 || true)
else
# Match the whole line
OPENSHIFT_VERSION=$(echo "$versionList" | grep -x "${OPENSHIFT_VERSION}" || true)
fi

if [[ -z "$OPENSHIFT_VERSION" ]]; then
echo "Requested cluster version not available!"
exit 1
fi
}

set_proxy
rosa_login
find_openshift_version

export KUBECONFIG="${SHARED_DIR}/kubeconfig"
if [[ -f "${SHARED_DIR}/mgmt_kubeconfig" ]]; then
export KUBECONFIG="${SHARED_DIR}/mgmt_kubeconfig"
fi

export AWS_SHARED_CREDENTIALS_FILE="${CLUSTER_PROFILE_DIR}/.awscred"
export AWS_REGION=${REGION}
export AWS_PAGER=""

# get cluster namesapce
CLUSTER_NAME=$(cat "${SHARED_DIR}/cluster-name")
if [[ -z "${CLUSTER_NAME}" ]] ; then
echo "Error: cluster name not found"
exit 1
fi

read -r namespace _ _ <<< "$(oc get cluster -A | grep ${CLUSTER_NAME})"
if [[ -z "${namespace}" ]]; then
echo "capi cluster name not found error, ${CLUSTER_NAME}"
exit 1
fi

echo "upgrade rosacontrolplane"
rosacontrolplane_name=$(oc get cluster "${CLUSTER_NAME}" -n "${namespace}" -ojsonpath='{.spec.controlPlaneRef.name}')
version=$(oc get rosacontrolplane ${rosacontrolplane_name} -n ${namespace} -ojsonpath='{.spec.version}')
echo "rosa controlplane version is $version now, begin to upgrade to $OPENSHIFT_VERSION"
oc patch -n "${namespace}" --type=merge --patch='{"spec":{"version":"'"${OPENSHIFT_VERSION}"'"}}' rosacontrolplane/${rosacontrolplane_name}
new_version=$(oc get rosacontrolplane ${rosacontrolplane_name} -n ${namespace} -ojsonpath='{.spec.version}')
echo "now rosacontrolplane version is ${new_version}"

CLUSTER_ID=$(cat $SHARED_DIR/cluster-id)
start_time=$(date +"%s")
while true; do
sleep 60
rosa_hcp_version=$(rosa describe cluster -c "${CLUSTER_ID}" -o json | jq -r '.openshift_version')
echo "rosa hcp version: ${rosa_hcp_version}"
if [[ "${rosa_hcp_version}" == "${new_version}" ]]; then
break
fi
if (( $(date +"%s") - $start_time >= $CLUSTER_TIMEOUT )); then
echo "error: Timed out while waiting for cluster cp upgrade ${rosa_hcp_version}"
exit 1
fi
done

echo "rosa hcp cp upgrade done"


Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"path": "cucushift/hypershift-extended/capi/upgrade-cp/cucushift-hypershift-extended-capi-upgrade-cp-ref.yaml",
"owners": {
"approvers": [
"LiangquanLi930",
"heliubj18",
"fxierh"
],
"reviewers": [
"LiangquanLi930",
"heliubj18",
"fxierh"
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
ref:
as: cucushift-hypershift-extended-capi-upgrade-cp
from: rosa-aws-cli
grace_period: 5m
cli: latest
commands: cucushift-hypershift-extended-capi-upgrade-cp-commands.sh
resources:
requests:
cpu: 100m
memory: 100Mi
env:
- name: CLUSTER_TIMEOUT
default: "1800"
documentation: Set to number of seconds for the cluster to timeout if it's not ready.
- name: OCM_LOGIN_ENV
default: "staging"
documentation: The environment for rosa login. The supported values are [production, staging, integration].
- name: CLUSTER_NAME
default: ""
documentation: The name of the rosa cluster to create. Must be unique for the account, lowercase, and no more than 54 characters.
- name: REGION
default: "us-east-1"
documentation: "The AWS region of the cluster."
- name: OPENSHIFT_VERSION
default: "4.16.0"
documentation: The openshift version for the cluster (e.g. "4.15.0"). Specify a major/minor (e.g. "4.15") to get the latest version from that stream.
documentation: |-
capi rosa hcp upgrade control plane for rosa hcp.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
approvers:
- LiangquanLi930
- heliubj18
- fxierh
reviewers:
- LiangquanLi930
- heliubj18
- fxierh
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/bin/bash

set -euo pipefail

function set_proxy () {
if test -s "${SHARED_DIR}/proxy-conf.sh" ; then
echo "setting the proxy"
# cat "${SHARED_DIR}/proxy-conf.sh"
echo "source ${SHARED_DIR}/proxy-conf.sh"
source "${SHARED_DIR}/proxy-conf.sh"
else
echo "no proxy setting."
fi
}

function rosa_login() {
# ROSA_VERSION=$(rosa version)
ROSA_TOKEN=$(cat "${CLUSTER_PROFILE_DIR}/ocm-token")

if [[ ! -z "${ROSA_TOKEN}" ]]; then
echo "Logging into ${OCM_LOGIN_ENV} with offline token using rosa cli"
rosa login --env "${OCM_LOGIN_ENV}" --token "${ROSA_TOKEN}"
ocm login --url "${OCM_LOGIN_ENV}" --token "${ROSA_TOKEN}"
else
echo "Cannot login! You need to specify the offline token ROSA_TOKEN!"
exit 1
fi
}

set_proxy
rosa_login

export KUBECONFIG="${SHARED_DIR}/kubeconfig"
if [[ -f "${SHARED_DIR}/mgmt_kubeconfig" ]]; then
export KUBECONFIG="${SHARED_DIR}/mgmt_kubeconfig"
fi

export AWS_SHARED_CREDENTIALS_FILE="${CLUSTER_PROFILE_DIR}/.awscred"
export AWS_REGION=${REGION}
export AWS_PAGER=""

# get cluster namesapce
CLUSTER_NAME=$(cat "${SHARED_DIR}/cluster-name")
if [[ -z "${CLUSTER_NAME}" ]] ; then
echo "Error: cluster name not found"
exit 1
fi

read -r namespace _ _ <<< "$(oc get cluster -A | grep ${CLUSTER_NAME})"
if [[ -z "${namespace}" ]]; then
echo "capi cluster name not found error, ${CLUSTER_NAME}"
exit 1
fi

echo "upgrade rosacontrolplane"
rosacontrolplane_name=$(oc get cluster "${CLUSTER_NAME}" -n "${namespace}" -ojsonpath='{.spec.controlPlaneRef.name}')
cp_version=$(oc get rosacontrolplane -n ${namespace} -ojsonpath='{.spec.version}')

rosamachinepool_name=$(oc get MachinePool -n "${namespace}" "${machinepool}" -ojsonpath='{.spec.template.spec.infrastructureRef.name}')
np_version=$(oc get rosamachinepool "${rosamachinepool_name}" -n "${namespace}" -ojsonpath='{.status.version}')

if [[ "${cp_version}" == "${np_version}" ]] ; then
echo "rosamachinepool version is same as rosacontrolplane ${cp_version}"
exit 1
fi

oc patch -n "${namespace}" --type=merge --patch='{"spec":{"updateConfig":{"rollingUpdate":{"maxSurge": 2, "maxUnavailable": 3}}}}' rosamachinepool/${rosamachinepool_name}
oc patch -n "${namespace}" --type=merge --patch='{"spec":{"version":"'"${cp_version}"'"}}' rosamachinepool/${rosamachinepool_name}
new_version=$(oc get rosamachinepool ${rosamachinepool_name} -n ${namespace} -ojsonpath='{.spec.version}')
echo "now rosamachinepool version is ${new_version}"

CLUSTER_ID=$(cat $SHARED_DIR/cluster-id)
start_time=$(date +"%s")
while true; do
sleep 120
mp_version=$(rosa describe machinepool -c ${CLUSTER_ID} --machinepool ${rosamachinepool_name} -o json | jq -r '.version.raw_id')
echo "rosa hcp version: ${mp_version}"
if [[ "${mp_version}" == "${new_version}" ]]; then
break
fi
if (( $(date +"%s") - $start_time >= $CLUSTER_TIMEOUT )); then
echo "error: Timed out while waiting for cluster np upgrade ${mp_version}"
exit 1
fi
done

echo "rosa hcp np upgrade done"


Loading

0 comments on commit e8cc4d6

Please sign in to comment.