Skip to content

Conformance External Workloads (ci-external-workloads) #4545

Conformance External Workloads (ci-external-workloads)

Conformance External Workloads (ci-external-workloads) #4545

name: Conformance External Workloads (ci-external-workloads)
# Any change in triggers needs to be reflected in the concurrency group.
on:
workflow_dispatch:
inputs:
PR-number:
description: "Pull request number."
required: true
context-ref:
description: "Context in which the workflow runs. If PR is from a fork, will be the PR target branch (general case). If PR is NOT from a fork, will be the PR branch itself (this allows committers to test changes to workflows directly from PRs)."
required: true
SHA:
description: "SHA under test (head of the PR branch)."
required: true
extra-args:
description: "[JSON object] Arbitrary arguments passed from the trigger comment via regex capture group. Parse with 'fromJson(inputs.extra-args).argName' in workflow."
required: false
default: '{}'
# Run once a day
schedule:
- cron: '0 8 * * *'
# By specifying the access of one of the scopes, all of those that are not
# specified are set to 'none'.
permissions:
# To be able to access the repository with actions/checkout
contents: read
# To allow retrieving information from the PR API
pull-requests: read
# To be able to set commit status
statuses: write
concurrency:
# Structure:
# - Workflow name
# - Event type
# - A unique identifier depending on event type:
# - schedule: SHA
# - workflow_dispatch: PR number
#
# This structure ensures a unique concurrency group name is generated for each
# type of testing, such that re-runs will cancel the previous run.
group: |
${{ github.workflow }}
${{ github.event_name }}
${{
(github.event_name == 'schedule' && github.sha) ||
(github.event_name == 'workflow_dispatch' && github.event.inputs.PR-number)
}}
cancel-in-progress: true
env:
clusterName: ${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-${{ github.run_attempt }}-vm
vmName: ${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-${{ github.run_attempt }}-vm
vmStartupScript: .github/gcp-vm-startup.sh
# renovate: datasource=github-releases depName=cilium/cilium-cli
cilium_cli_version: v0.14.8
cilium_cli_ci_version:
check_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
USE_GKE_GCLOUD_AUTH_PLUGIN: True
jobs:
commit-status-start:
name: Commit Status Start
runs-on: ubuntu-latest
steps:
- name: Set initial commit status
uses: myrotvorets/set-commit-status-action@38f3f27c7d52fb381273e95542f07f0fba301307 # v2.0.0
with:
sha: ${{ inputs.SHA || github.sha }}
generate-matrix:
name: Generate Matrix
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: Checkout context ref (trusted)
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
ref: ${{ inputs.context-ref || github.sha }}
persist-credentials: false
- name: Convert YAML to JSON
run: |
work_dir=".github/actions/gke"
destination_directory="/tmp/generated/gke"
mkdir -p "${destination_directory}"
yq -o=json ${work_dir}/k8s-versions.yaml | jq . > "${destination_directory}/gke.json"
- name: Generate Matrix
id: set-matrix
run: |
cd /tmp/generated/gke
# Use complete matrix in case of scheduled run
# main -> event_name = schedule
# other stable branches -> PR-number starting with v (e.g. v1.14)
if [[ "${{ github.event_name }}" == "schedule" || "${{ inputs.PR-number }}" == v* ]];then
jq '{ "include": [ .k8s[] ] }' gke.json > /tmp/matrix.json
else
jq '{ "include": [ .k8s[] | select(.default) ] }' gke.json > /tmp/matrix.json
fi
echo "Generated matrix:"
cat /tmp/matrix.json
echo "matrix=$(jq -c . < /tmp/matrix.json)" >> $GITHUB_OUTPUT
installation-and-connectivity:
name: Installation and Connectivity Test
needs: generate-matrix
runs-on: ubuntu-latest
timeout-minutes: 30
env:
job_name: "Installation and Connectivity Test"
strategy:
fail-fast: false
matrix: ${{fromJson(needs.generate-matrix.outputs.matrix)}}
steps:
- name: Checkout context ref (trusted)
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
ref: ${{ inputs.context-ref || github.sha }}
persist-credentials: false
- name: Set Environment Variables
uses: ./.github/actions/set-env-variables
- name: Set up job variables
id: vars
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
SHA="${{ inputs.SHA }}"
OWNER="${{ inputs.PR-number }}"
else
SHA="${{ github.sha }}"
OWNER="${{ github.ref_name }}"
OWNER="${OWNER/./-}"
fi
CILIUM_INSTALL_DEFAULTS="--cluster-name=${{ env.clusterName }} \
--datapath-mode=tunnel \
--chart-directory=./untrusted/install/kubernetes/cilium \
--helm-set=image.repository=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/cilium-ci \
--helm-set=image.useDigest=false \
--helm-set=image.tag=${SHA} \
--helm-set=operator.image.repository=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/operator \
--helm-set=operator.image.suffix=-ci \
--helm-set=operator.image.tag=${SHA} \
--helm-set=operator.image.useDigest=false \
--helm-set=clustermesh.apiserver.image.repository=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/clustermesh-apiserver-ci \
--helm-set=clustermesh.apiserver.image.tag=${SHA} \
--helm-set=clustermesh.apiserver.image.useDigest=false \
--helm-set=hubble.relay.image.repository=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/hubble-relay-ci \
--helm-set=hubble.relay.image.tag=${SHA} \
--helm-set=bpf.monitorAggregation=none \
--wait=false \
--rollback=false \
--kube-proxy-replacement=strict \
--version="
HUBBLE_ENABLE_DEFAULTS="--chart-directory=install/kubernetes/cilium \
--relay-image=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/hubble-relay-ci:${SHA} \
--relay-version=${SHA}"
CONNECTIVITY_TEST_DEFAULTS="--flow-validation=disabled --hubble=false --collect-sysdump-on-failure \
--external-target google.com --external-cidr 8.0.0.0/8 --external-ip 8.8.4.4 --external-other-ip 8.8.8.8"
CLUSTERMESH_ENABLE_DEFAULTS="--apiserver-image=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/clustermesh-apiserver-ci \
--apiserver-version=${SHA}"
echo cilium_install_defaults=${CILIUM_INSTALL_DEFAULTS} >> $GITHUB_OUTPUT
echo hubble_enable_defaults=${HUBBLE_ENABLE_DEFAULTS} >> $GITHUB_OUTPUT
echo connectivity_test_defaults=${CONNECTIVITY_TEST_DEFAULTS} >> $GITHUB_OUTPUT
echo clustermesh_enable_defaults=${CLUSTERMESH_ENABLE_DEFAULTS} >> $GITHUB_OUTPUT
echo sha=${SHA} >> $GITHUB_OUTPUT
echo owner=${OWNER} >> $GITHUB_OUTPUT
- name: Install Cilium CLI
uses: cilium/cilium-cli@5362f383942260c0aed4f3876e09c3452435577a # v0.14.8
with:
release-version: ${{ env.cilium_cli_version }}
ci-version: ${{ env.cilium_cli_ci_version }}
- name: Set up gcloud credentials
id: 'auth'
uses: google-github-actions/auth@f105ef0cdb3b102a020be1767fcc8a974898b7c6 # v1.2.0
with:
credentials_json: '${{ secrets.GCP_PR_SA_KEY }}'
- name: Set up gcloud CLI
uses: google-github-actions/setup-gcloud@e30db14379863a8c79331b04a9969f4c1e225e0b # v1.1.1
with:
project_id: ${{ secrets.GCP_PROJECT_ID }}
version: "405.0.0"
- name: Install gke-gcloud-auth-plugin
run: |
gcloud components install gke-gcloud-auth-plugin
- name: Display gcloud CLI info
run: |
gcloud info
- name: Create GCP VM
uses: nick-invision/retry@14672906e672a08bd6eeb15720e9ed3ce869cdd4 # v2.9.0
with:
retry_on: error
timeout_minutes: 1
max_attempts: 10
command: |
gcloud compute instances create ${{ env.vmName }}-${{ matrix.vmIndex }} \
--labels "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \
--zone ${{ matrix.zone }} \
--machine-type e2-custom-2-4096 \
--boot-disk-type pd-standard \
--boot-disk-size 10GB \
--preemptible \
--image-project ubuntu-os-cloud \
--image-family ubuntu-2004-lts \
--metadata hostname=${{ env.vmName }}-${{ matrix.vmIndex }} \
--metadata-from-file startup-script=${{ env.vmStartupScript}}
- name: Create GKE cluster
run: |
gcloud container clusters create ${{ env.clusterName }} \
--labels "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \
--zone ${{ matrix.zone }} \
--cluster-version ${{ matrix.version }} \
--enable-ip-alias \
--node-taints node.cilium.io/agent-not-ready=true:NoExecute \
--cluster-ipv4-cidr="/21" \
--services-ipv4-cidr="/24" \
--image-type COS_CONTAINERD \
--num-nodes 2 \
--machine-type e2-custom-2-4096 \
--disk-type pd-standard \
--disk-size 10GB \
--preemptible
- name: Get cluster credentials
run: |
gcloud container clusters get-credentials ${{ env.clusterName }} --zone ${{ matrix.zone }}
# Warning: since this is a privileged workflow, subsequent workflow job
# steps must take care not to execute untrusted code.
- name: Checkout pull request branch (NOT TRUSTED)
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
ref: ${{ steps.vars.outputs.sha }}
persist-credentials: false
path: untrusted
sparse-checkout: |
install/kubernetes/cilium
- name: Wait for images to be available
timeout-minutes: 30
shell: bash
run: |
for image in cilium-ci operator-generic-ci hubble-relay-ci clustermesh-apiserver-ci ; do
until docker manifest inspect quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/$image:${{ steps.vars.outputs.sha }} &> /dev/null; do sleep 45s; done
done
- name: Install Cilium in cluster
run: |
cilium install ${{ steps.vars.outputs.cilium_install_defaults }}
- name: Enable cluster mesh
run: |
cilium clustermesh enable ${{ steps.vars.outputs.clustermesh_enable_defaults }}
- name: Wait for cluster mesh status to be ready
run: |
cilium clustermesh status --wait
- name: Add VM to cluster mesh
run: |
cilium clustermesh vm create ${{ env.vmName }}-${{ matrix.vmIndex }} -n default --ipv4-alloc-cidr 10.192.1.0/30
cilium clustermesh vm status
- name: Install Cilium on VM
run: |
cilium clustermesh vm install install-external-workload.sh --config debug
gcloud compute scp install-external-workload.sh ${{ env.vmName }}-${{ matrix.vmIndex }}:~/ --zone ${{ matrix.zone }}
gcloud compute ssh ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} \
--command "~/install-external-workload.sh"
sleep 5s
gcloud compute ssh ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} \
--command "sudo cilium status"
- name: Verify cluster DNS on VM
# Limit nslookup to the first (global) DNS server setting
run: |
gcloud compute ssh ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} \
--command "nslookup -d2 -retry=10 -timeout=5 -norecurse clustermesh-apiserver.kube-system.svc.cluster.local \$(systemd-resolve --status | grep -m 1 \"Current DNS Server:\" | cut -d':' -f2)"
- name: Ping clustermesh-apiserver from VM
run: |
gcloud compute ssh ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} \
--command "ping -c 3 \$(sudo cilium service list get -o jsonpath='{[?(@.spec.flags.name==\"clustermesh-apiserver\")].spec.backend-addresses[0].ip}')"
- name: Make JUnit report directory
run: |
mkdir -p cilium-junits
- name: Run connectivity test (${{ join(matrix.*, ', ') }})
run: |
cilium connectivity test ${{ steps.vars.outputs.connectivity_test_defaults }} \
--junit-file "cilium-junits/${{ env.job_name }} (${{ join(matrix.*, ', ') }}).xml" \
--junit-property github_job_step="Run connectivity test (${{ join(matrix.*, ', ') }})"
- name: Post-test information gathering
if: ${{ !success() }}
run: |
kubectl get pods --all-namespaces -o wide
kubectl get cew --all-namespaces -o wide
kubectl get cep --all-namespaces -o wide
cilium status
cilium clustermesh status
cilium clustermesh vm status
gcloud compute ssh ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} --command "sudo cilium status"
gcloud compute ssh ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} --command "sudo docker logs cilium --timestamps"
cilium sysdump --output-filename cilium-sysdump-final-${{ join(matrix.*, '-') }}
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently
- name: Clean up GKE cluster and VM
if: ${{ always() }}
run: |
while [ "$(gcloud container operations list --zone ${{ matrix.zone }} --filter="status=RUNNING AND targetLink~${{ env.clusterName }}" --format="value(name)")" ];do
echo "cluster has an ongoing operation, waiting for all operations to finish"; sleep 15
done
gcloud container clusters delete ${{ env.clusterName }} --zone ${{ matrix.zone }} --quiet --async
gcloud compute instances delete ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} --quiet
shell: bash {0} # Disable default fail-fast behavior so that all commands run independently
- name: Upload artifacts
if: ${{ !success() }}
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
with:
name: cilium-sysdumps
path: cilium-sysdump-*.zip
retention-days: 5
- name: Upload JUnits [junit]
if: ${{ always() }}
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
with:
name: cilium-junits
path: cilium-junits/*.xml
retention-days: 2
- name: Publish Test Results As GitHub Summary
if: ${{ always() }}
uses: aanm/junit2md@332ebf0fddd34e91b03a832cfafaa826306558f9 # v0.0.3
with:
junit-directory: "cilium-junits"
commit-status-final:
if: ${{ always() }}
name: Commit Status Final
needs: installation-and-connectivity
runs-on: ubuntu-latest
steps:
- name: Set final commit status
uses: myrotvorets/set-commit-status-action@38f3f27c7d52fb381273e95542f07f0fba301307 # v2.0.0
with:
sha: ${{ inputs.SHA || github.sha }}
status: ${{ needs.installation-and-connectivity.result }}