Skip to content

chore(gpu): rework ci to adapt to the shortage of h100 #3698

chore(gpu): rework ci to adapt to the shortage of h100

chore(gpu): rework ci to adapt to the shortage of h100 #3698

Workflow file for this run

name: AWS Tests on CPU
env:
CARGO_TERM_COLOR: always
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUSTFLAGS: "-C target-cpu=native"
RUST_BACKTRACE: "full"
RUST_MIN_STACK: "8388608"
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
pull_request:
types: [ labeled ]
schedule:
# Nightly tests @ 1AM after each work day
- cron: "0 1 * * MON-FRI"
jobs:
should-run:
runs-on: ubuntu-latest
if: github.event_name != 'schedule' ||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
permissions:
pull-requests: write
outputs:
csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
core_crypto_test: ${{ env.IS_PULL_REQUEST == 'false' ||
steps.changed-files.outputs.core_crypto_any_changed ||
steps.changed-files.outputs.dependencies_any_changed }}
boolean_test: ${{ env.IS_PULL_REQUEST == 'false' ||
steps.changed-files.outputs.boolean_any_changed ||
steps.changed-files.outputs.dependencies_any_changed }}
shortint_test: ${{ env.IS_PULL_REQUEST == 'false' ||
steps.changed-files.outputs.shortint_any_changed ||
steps.changed-files.outputs.dependencies_any_changed }}
high_level_api_test: ${{ env.IS_PULL_REQUEST == 'false' ||
steps.changed-files.outputs.high_level_api_any_changed ||
steps.changed-files.outputs.dependencies_any_changed }}
c_api_test: ${{ env.IS_PULL_REQUEST == 'false' ||
steps.changed-files.outputs.c_api_any_changed ||
steps.changed-files.outputs.dependencies_any_changed }}
examples_test: ${{ env.IS_PULL_REQUEST == 'false' ||
steps.changed-files.outputs.examples_any_changed ||
steps.changed-files.outputs.dependencies_any_changed }}
apps_test: ${{ env.IS_PULL_REQUEST == 'false' ||
steps.changed-files.outputs.apps_any_changed || steps.changed-files.outputs.dependencies_any_changed }}
user_docs_test: ${{ env.IS_PULL_REQUEST == 'false' ||
steps.changed-files.outputs.user_docs_any_changed ||
steps.changed-files.outputs.dependencies_any_changed }}
any_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.aggregated-changes.outputs.any_changed }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@c3a1bb2c992d77180ae65be6ae6c166cf40f857c
with:
since_last_remote_commit: true
files_yaml: |
dependencies:
- tfhe/Cargo.toml
- concrete-csprng/**
- tfhe-zk-pok/**
csprng:
- concrete-csprng/**
zk_pok:
- tfhe-zk-pok/**
core_crypto:
- tfhe/src/core_crypto/**
boolean:
- tfhe/src/core_crypto/**
- tfhe/src/boolean/**
shortint:
- tfhe/src/core_crypto/**
- tfhe/src/shortint/**
high_level_api:
- tfhe/src/**
- '!tfhe/src/c_api/**'
- '!tfhe/src/boolean/**'
- '!tfhe/src/js_on_wasm_api/**'
c_api:
- tfhe/src/**
examples:
- tfhe/src/**
- '!tfhe/src/c_api/**'
- tfhe/examples/**
apps:
- tfhe/src/**
- '!tfhe/src/c_api/**'
- apps/trivium/src/**
user_docs:
- tfhe/src/**
- '!tfhe/src/c_api/**'
- 'tfhe/docs/**.md'
- README.md
- name: Aggregate file changes
id: aggregated-changes
if: ( steps.changed-files.outputs.dependencies_any_changed == 'true' ||
steps.changed-files.outputs.csprng_any_changed == 'true' ||
steps.changed-files.outputs.zk_pok_any_changed == 'true' ||
steps.changed-files.outputs.core_crypto_any_changed == 'true' ||
steps.changed-files.outputs.boolean_any_changed == 'true' ||
steps.changed-files.outputs.shortint_any_changed == 'true' ||
steps.changed-files.outputs.high_level_api_any_changed == 'true' ||
steps.changed-files.outputs.c_api_any_changed == 'true' ||
steps.changed-files.outputs.examples_any_changed == 'true' ||
steps.changed-files.outputs.apps_any_changed == 'true' ||
steps.changed-files.outputs.user_docs_any_changed == 'true')
run: |
echo "any_changed=true" >> "$GITHUB_OUTPUT"
setup-instance:
name: Setup instance (cpu-tests)
if: github.event_name != 'pull_request' ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.any_file_changed == 'true')
needs: should-run
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: cpu-big
cpu-tests:
name: CPU tests
if: github.event_name != 'pull_request' ||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
needs: [ should-run, setup-instance ]
concurrency:
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
cancel-in-progress: true
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
with:
toolchain: stable
- name: Run concrete-csprng tests
if: needs.should-run.outputs.csprng_test == 'true'
run: |
make test_concrete_csprng
- name: Run tfhe-zk-pok tests
if: needs.should-run.outputs.zk_pok_test == 'true'
run: |
make test_zk_pok
- name: Run core tests
if: needs.should-run.outputs.core_crypto_test == 'true'
run: |
AVX512_SUPPORT=ON make test_core_crypto
- name: Run boolean tests
if: needs.should-run.outputs.boolean_test == 'true'
run: |
make test_boolean
- name: Run C API tests
if: needs.should-run.outputs.c_api_test == 'true'
run: |
make test_c_api
- name: Run user docs tests
if: needs.should-run.outputs.user_docs_test == 'true'
run: |
make test_user_doc
- name: Gen Keys if required
if: needs.should-run.outputs.shortint_test == 'true'
run: |
make gen_key_cache
- name: Run shortint tests
if: needs.should-run.outputs.shortint_test == 'true'
run: |
BIG_TESTS_INSTANCE=TRUE make test_shortint_ci
- name: Run high-level API tests
if: needs.should-run.outputs.high_level_api_test == 'true'
run: |
BIG_TESTS_INSTANCE=TRUE make test_high_level_api
- name: Run example tests
if: needs.should-run.outputs.examples_test == 'true'
run: |
make test_examples
make dark_market
- name: Run apps tests
if: needs.should-run.outputs.apps_test == 'true'
run: |
make test_trivium
make test_kreyvium
- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (cpu-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
needs: [ setup-instance, cpu-tests ]
runs-on: ubuntu-latest
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cpu-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"