Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add capi rosa hcp upgrade job #54658

Merged
merged 2 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,21 @@ tests:
test:
- chain: cucushift-installer-check-cluster-health
workflow: osd-ccs-aws
- as: aws-rosa-hcp-capi-upgrade-f14
cron: 20 19 2,18 * *
steps:
cluster_profile: aws-sd-qe
env:
BASE_DOMAIN: qe.devcluster.openshift.com
OPENSHIFT_VERSION: "4.16"
REGION: us-west-2
TEST_FILTERS: ~ChkUpgrade&;~NonPreRelease&;~Serial&;~Disruptive&;~DisconnectedOnly&;~HyperShiftMGMT&;~MicroShiftOnly&;~NonHyperShiftHOST&;ROSA&
TEST_IMPORTANCE: Critical
UPGRADED_TO_VERSION: "4.16"
test:
- ref: openshift-extended-test
- ref: openshift-e2e-test-qe-report
workflow: rosa-aws-sts-hcp-capi-upgrade
- as: aws-rosa-hcp-byo-kms-oidc-auditlog-stage-critical-f14
cron: 37 6 8,22 * *
steps:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38797,6 +38797,93 @@ periodics:
- name: result-aggregator
secret:
secretName: result-aggregator
- agent: kubernetes
cluster: build03
cron: 20 19 2,18 * *
decorate: true
decoration_config:
skip_cloning: true
extra_refs:
- base_ref: release-4.16
org: openshift
repo: openshift-tests-private
labels:
ci-operator.openshift.io/cloud: aws
ci-operator.openshift.io/cloud-cluster-profile: aws-sd-qe
ci-operator.openshift.io/variant: amd64-stable
ci.openshift.io/generator: prowgen
pj-rehearse.openshift.io/can-be-rehearsed: "true"
name: periodic-ci-openshift-openshift-tests-private-release-4.16-amd64-stable-aws-rosa-hcp-capi-upgrade-f14
spec:
containers:
- args:
- --gcs-upload-secret=/secrets/gcs/service-account.json
- --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson
- --lease-server-credentials-file=/etc/boskos/credentials
- --oauth-token-path=/usr/local/github-credentials/oauth
- --report-credentials-file=/etc/report/credentials
- --secret-dir=/secrets/ci-pull-credentials
- --secret-dir=/usr/local/aws-rosa-hcp-capi-upgrade-f14-cluster-profile
- --target=aws-rosa-hcp-capi-upgrade-f14
- --variant=amd64-stable
command:
- ci-operator
image: ci-operator:latest
imagePullPolicy: Always
name: ""
resources:
requests:
cpu: 10m
volumeMounts:
- mountPath: /etc/boskos
name: boskos
readOnly: true
- mountPath: /secrets/ci-pull-credentials
name: ci-pull-credentials
readOnly: true
- mountPath: /usr/local/aws-rosa-hcp-capi-upgrade-f14-cluster-profile
name: cluster-profile
- mountPath: /secrets/gcs
name: gcs-credentials
readOnly: true
- mountPath: /usr/local/github-credentials
name: github-credentials-openshift-ci-robot-private-git-cloner
readOnly: true
- mountPath: /secrets/manifest-tool
name: manifest-tool-local-pusher
readOnly: true
- mountPath: /etc/pull-secret
name: pull-secret
readOnly: true
- mountPath: /etc/report
name: result-aggregator
readOnly: true
serviceAccountName: ci-operator
volumes:
- name: boskos
secret:
items:
- key: credentials
path: credentials
secretName: boskos-credentials
- name: ci-pull-credentials
secret:
secretName: ci-pull-credentials
- name: cluster-profile
secret:
secretName: cluster-secrets-aws-sd-qe
- name: github-credentials-openshift-ci-robot-private-git-cloner
secret:
secretName: github-credentials-openshift-ci-robot-private-git-cloner
- name: manifest-tool-local-pusher
secret:
secretName: manifest-tool-local-pusher
- name: pull-secret
secret:
secretName: registry-pull-credentials
- name: result-aggregator
secret:
secretName: result-aggregator
- agent: kubernetes
cluster: build03
cron: 31 11 13,27 * *
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
approvers:
- LiangquanLi930
- heliubj18
- fxierh
reviewers:
- LiangquanLi930
- heliubj18
- fxierh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/bin/bash

set -euo pipefail

function set_proxy () {
if test -s "${SHARED_DIR}/proxy-conf.sh" ; then
echo "setting the proxy"
# cat "${SHARED_DIR}/proxy-conf.sh"
echo "source ${SHARED_DIR}/proxy-conf.sh"
source "${SHARED_DIR}/proxy-conf.sh"
else
echo "no proxy setting."
fi
}

function rosa_login() {
# ROSA_VERSION=$(rosa version)
ROSA_TOKEN=$(cat "${CLUSTER_PROFILE_DIR}/ocm-token")

if [[ ! -z "${ROSA_TOKEN}" ]]; then
echo "Logging into ${OCM_LOGIN_ENV} with offline token using rosa cli"
rosa login --env "${OCM_LOGIN_ENV}" --token "${ROSA_TOKEN}"
ocm login --url "${OCM_LOGIN_ENV}" --token "${ROSA_TOKEN}"
else
echo "Cannot login! You need to specify the offline token ROSA_TOKEN!"
exit 1
fi
}

set_proxy
rosa_login

export KUBECONFIG="${SHARED_DIR}/kubeconfig"
if [[ -f "${SHARED_DIR}/mgmt_kubeconfig" ]]; then
export KUBECONFIG="${SHARED_DIR}/mgmt_kubeconfig"
fi

export AWS_SHARED_CREDENTIALS_FILE="${CLUSTER_PROFILE_DIR}/.awscred"
export AWS_REGION=${REGION}
export AWS_PAGER=""

# get cluster namesapce
CLUSTER_NAME=$(cat "${SHARED_DIR}/cluster-name")
if [[ -z "${CLUSTER_NAME}" ]] ; then
echo "Error: cluster name not found"
exit 1
fi

echo "dump rosa cluster info: ${CLUSTER_NAME}"
rosa describe cluster -c ${CLUSTER_NAME} > ${ARTIFACT_DIR}/${CLUSTER_NAME}.yaml
echo "dump capa logs"
capa_controller=$(oc get pod -n capa-system -lcontrol-plane=capa-controller-manager -ojsonpath='{.items[*].metadata.name}')
if [[ -n "${capa_controller}" ]] ; then
oc logs -n capa-system ${capa_controller} > ${ARTIFACT_DIR}/${capa_controller}.logs
fi

echo "dump nodepool"
nodepool_name=$(cat "${SHARED_DIR}/rosa_nodepool")
rosa describe machinepool -c ${CLUSTER_NAME} --machinepool "${nodepool_name}" > ${ARTIFACT_DIR}/${nodepool_name}.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"path": "cucushift/hypershift-extended/capi/dump/cucushift-hypershift-extended-capi-dump-ref.yaml",
"owners": {
"approvers": [
"LiangquanLi930",
"heliubj18",
"fxierh"
],
"reviewers": [
"LiangquanLi930",
"heliubj18",
"fxierh"
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
ref:
as: cucushift-hypershift-extended-capi-dump
from: rosa-aws-cli
grace_period: 5m
cli: latest
commands: cucushift-hypershift-extended-capi-dump-commands.sh
resources:
requests:
cpu: 100m
memory: 100Mi
env:
- name: OCM_LOGIN_ENV
default: "staging"
documentation: The environment for rosa login. The supported values are [production, staging, integration].
- name: REGION
default: "us-east-1"
documentation: "The AWS region of the cluster."
documentation: |-
dump capi information for debug
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ export AWS_PAGER=""
# download clusterctl and clusterawsadm
mkdir -p /tmp/bin
export PATH=/tmp/bin:$PATH
curl -L https://github.com/kubernetes-sigs/cluster-api/releases/download/v1.7.2/clusterctl-linux-amd64 -o /tmp/bin/clusterctl && \
curl -L https://github.com/kubernetes-sigs/cluster-api/releases/download/v1.7.4/clusterctl-linux-amd64 -o /tmp/bin/clusterctl && \
chmod +x /tmp/bin/clusterctl

curl -L https://github.com/kubernetes-sigs/cluster-api-provider-aws/releases/download/v2.5.0/clusterawsadm_v2.5.0_linux_amd64 -o /tmp/bin/clusterawsadm && \
curl -L https://github.com/kubernetes-sigs/cluster-api-provider-aws/releases/download/v2.6.1/clusterawsadm-linux-amd64 -o /tmp/bin/clusterawsadm && \
chmod +x /tmp/bin/clusterawsadm

export KUBECONFIG="${SHARED_DIR}/kubeconfig"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ metadata:
name: "${CLUSTER_NAME}-pool-0"
spec:
clusterName: "${CLUSTER_NAME}"
replicas: 1
replicas: ${MACHINEPOOL_REPLICAS}
template:
spec:
clusterName: "${CLUSTER_NAME}"
Expand Down Expand Up @@ -413,6 +413,8 @@ CLUSTER_ID=$(rosa describe cluster -c ${CLUSTER_NAME} -o json | jq '.id' | cut -
echo "Cluster ${CLUSTER_NAME} is being created with cluster-id: ${CLUSTER_ID}"
echo -n $CLUSTER_ID > $SHARED_DIR/cluster-id
echo "rosa" > $SHARED_DIR/cluster-type
echo "${CLUSTER_NAME}-pool-0" > "${SHARED_DIR}/capi_machinepool"
echo "${NODEPOOL_NAME}" > "${SHARED_DIR}/rosa_nodepool"

# collect rosa hcp info
rosa logs install -c ${CLUSTER_ID} --watch
Expand All @@ -429,6 +431,7 @@ while true; do
fi
if (( $(date +"%s") - $start_time >= $CLUSTER_TIMEOUT )); then
echo "error: Timed out while waiting for cluster to be ready"
oc -n default get rosacontrolplane ${CLUSTER_NAME}-control-plane -oyaml > ${ARTIFACT_DIR}/${CLUSTER_NAME}-control-plane.yaml
exit 1
fi
if [[ "${CLUSTER_STATE}" != "installing" && "${CLUSTER_STATE}" != "pending" && "${CLUSTER_STATE}" != "waiting" && "${CLUSTER_STATE}" != "validating" ]]; then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ ref:
- name: MAX_REPLICAS
default: ""
documentation: The max number of the hcp worker nodes. Autoscaling min/max value must be equal or multiple of the availability zones count. The value must not less that the min_replica.
- name: MACHINEPOOL_REPLICAS
default: "1"
documentation: machinepool replicas
- name: ADDITIONAL_TAGS
default: ""
documentation: Apply user defined tags to all resources created by CAPI ROSA HCP. Tags are comma separated example - 'foo:bar,bar:baz', The default value is "capi-prow-ci:${CLUSTER_NAME}".
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
approvers:
- LiangquanLi930
- heliubj18
- fxierh
reviewers:
- LiangquanLi930
- heliubj18
- fxierh
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#!/bin/bash

set -euo pipefail

function set_proxy () {
if test -s "${SHARED_DIR}/proxy-conf.sh" ; then
echo "setting the proxy"
# cat "${SHARED_DIR}/proxy-conf.sh"
echo "source ${SHARED_DIR}/proxy-conf.sh"
source "${SHARED_DIR}/proxy-conf.sh"
else
echo "no proxy setting."
fi
}

function rosa_login() {
# ROSA_VERSION=$(rosa version)
ROSA_TOKEN=$(cat "${CLUSTER_PROFILE_DIR}/ocm-token")

if [[ ! -z "${ROSA_TOKEN}" ]]; then
echo "Logging into ${OCM_LOGIN_ENV} with offline token using rosa cli"
rosa login --env "${OCM_LOGIN_ENV}" --token "${ROSA_TOKEN}"
ocm login --url "${OCM_LOGIN_ENV}" --token "${ROSA_TOKEN}"
else
echo "Cannot login! You need to specify the offline token ROSA_TOKEN!"
exit 1
fi
}

function find_openshift_version() {
# Get the openshift version
CHANNEL_GROUP=stable
version_cmd="rosa list versions --hosted-cp --channel-group ${CHANNEL_GROUP} -o json"
version_cmd="$version_cmd | jq -r '.[].raw_id'"

versionList=$(eval $version_cmd)
echo -e "Available cluster versions:\n${versionList}"

if [[ -z "$UPGRADED_TO_VERSION" ]]; then
UPGRADED_TO_VERSION=$(echo "$versionList" | head -1)
elif [[ $UPGRADED_TO_VERSION =~ ^[0-9]+\.[0-9]+$ ]]; then
UPGRADED_TO_VERSION=$(echo "$versionList" | grep -E "^${UPGRADED_TO_VERSION}" | head -1 || true)
else
# Match the whole line
UPGRADED_TO_VERSION=$(echo "$versionList" | grep -x "${UPGRADED_TO_VERSION}" || true)
fi

if [[ -z "$UPGRADED_TO_VERSION" ]]; then
echo "Requested cluster version not available!"
exit 1
fi
}

set_proxy
rosa_login
find_openshift_version

export KUBECONFIG="${SHARED_DIR}/kubeconfig"
if [[ -f "${SHARED_DIR}/mgmt_kubeconfig" ]]; then
export KUBECONFIG="${SHARED_DIR}/mgmt_kubeconfig"
fi

export AWS_SHARED_CREDENTIALS_FILE="${CLUSTER_PROFILE_DIR}/.awscred"
export AWS_REGION=${REGION}
export AWS_PAGER=""

# get cluster namesapce
CLUSTER_NAME=$(cat "${SHARED_DIR}/cluster-name")
if [[ -z "${CLUSTER_NAME}" ]] ; then
echo "Error: cluster name not found"
exit 1
fi

read -r namespace _ _ <<< "$(oc get cluster -A | grep ${CLUSTER_NAME})"
if [[ -z "${namespace}" ]]; then
echo "capi cluster name not found error, ${CLUSTER_NAME}"
exit 1
fi

echo "upgrade rosacontrolplane"
rosacontrolplane_name=$(oc get cluster "${CLUSTER_NAME}" -n "${namespace}" -ojsonpath='{.spec.controlPlaneRef.name}')
version=$(oc get rosacontrolplane ${rosacontrolplane_name} -n ${namespace} -ojsonpath='{.spec.version}')
echo "rosa controlplane version is $version now, begin to upgrade to $UPGRADED_TO_VERSION"
oc patch -n "${namespace}" --type=merge --patch='{"spec":{"version":"'"${UPGRADED_TO_VERSION}"'"}}' rosacontrolplane/${rosacontrolplane_name}
new_version=$(oc get rosacontrolplane ${rosacontrolplane_name} -n ${namespace} -ojsonpath='{.spec.version}')
echo "now rosacontrolplane version is ${new_version}"

CLUSTER_ID=$(cat $SHARED_DIR/cluster-id)
start_time=$(date +"%s")
while true; do
sleep 150
rosa_hcp_version=$(rosa describe cluster -c "${CLUSTER_ID}" -o json | jq -r '.openshift_version')
echo "rosa hcp version: ${rosa_hcp_version}"
if [[ "${rosa_hcp_version}" == "${new_version}" ]]; then
break
fi
if (( $(date +"%s") - $start_time >= $CLUSTER_TIMEOUT )); then
echo "error: Timed out while waiting for cluster cp upgrade ${rosa_hcp_version}"
exit 1
fi
done

echo "rosa hcp cp upgrade done"


Loading