Speed up gfortran debug CI builds #5793
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: 'Test Suite' | |
| on: | |
| push: | |
| branches: [master] | |
| pull_request: | |
| types: [opened, synchronize, reopened, ready_for_review] | |
| workflow_dispatch: | |
| schedule: | |
| - cron: '0 6 * * 1' # Weekly Monday 6 AM UTC: refresh coverage cache before 7-day expiry | |
| concurrency: | |
| # PRs: group by branch (new push cancels old). Push to master: unique per SHA (never cancelled). | |
| group: ${{ github.workflow }}-${{ github.event_name == 'push' && github.sha || github.ref }} | |
| cancel-in-progress: ${{ github.event_name != 'push' }} | |
| jobs: | |
| lint-gate: | |
| name: Lint Gate | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v4 | |
| - name: Setup Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.12' | |
| - name: Initialize MFC | |
| run: ./mfc.sh init | |
| - name: Check Formatting | |
| run: | | |
| ./mfc.sh format -j "$(nproc)" | |
| git diff --exit-code || (echo "::error::Code is not formatted. Run './mfc.sh format' locally." && exit 1) | |
| - name: Spell Check | |
| run: ./mfc.sh spelling | |
| - name: Lint Toolchain | |
| run: ./mfc.sh lint | |
| - name: Lint Source | |
| run: python3 toolchain/mfc/lint_source.py | |
| - name: Lint Docs | |
| run: python3 toolchain/mfc/lint_docs.py | |
| - name: Lint Parameter Docs | |
| run: python3 toolchain/mfc/lint_param_docs.py | |
| file-changes: | |
| name: Detect File Changes | |
| runs-on: 'ubuntu-latest' | |
| outputs: | |
| checkall: ${{ steps.changes.outputs.checkall }} | |
| cases_py: ${{ steps.changes.outputs.cases_py }} | |
| dep_changed: ${{ steps.dep-check.outputs.dep_changed }} | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v4 | |
| - name: Detect Changes | |
| uses: dorny/paths-filter@v3 | |
| id: changes | |
| with: | |
| filters: ".github/file-filter.yml" | |
| - name: Check for Fortran dependency changes | |
| id: dep-check | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| # Detect added/removed use/include statements that change the | |
| # Fortran dependency graph, which would make the coverage cache stale. | |
| PR_NUMBER="${{ github.event.pull_request.number }}" | |
| BEFORE="${{ github.event.before }}" | |
| AFTER="${{ github.event.after }}" | |
| if [ "${{ github.event_name }}" = "pull_request" ]; then | |
| # Default to dep_changed=true if gh pr diff fails (safe fallback). | |
| DIFF=$(gh pr diff "$PR_NUMBER" 2>/dev/null) || { | |
| echo "gh pr diff failed — defaulting to dep_changed=true for safety." | |
| echo "dep_changed=true" >> "$GITHUB_OUTPUT" | |
| exit 0 | |
| } | |
| elif [ "${{ github.event_name }}" = "push" ]; then | |
| DIFF=$(git diff "$BEFORE".."$AFTER" 2>/dev/null) || { | |
| echo "git diff failed for push event — defaulting to dep_changed=true for safety." | |
| echo "dep_changed=true" >> "$GITHUB_OUTPUT" | |
| exit 0 | |
| } | |
| else | |
| DIFF="" | |
| fi | |
| if echo "$DIFF" | \ | |
| grep -qE '^[+-][[:space:]]*(use[[:space:],]+[a-zA-Z_]|#:include[[:space:]]|include[[:space:]]+['"'"'"])'; then | |
| echo "dep_changed=true" >> "$GITHUB_OUTPUT" | |
| echo "Fortran dependency change detected — will rebuild coverage cache." | |
| else | |
| echo "dep_changed=false" >> "$GITHUB_OUTPUT" | |
| fi | |
| rebuild-cache: | |
| name: Rebuild Coverage Cache | |
| needs: [lint-gate, file-changes] | |
| if: >- | |
| github.repository == 'MFlowCode/MFC' && | |
| ( | |
| (github.event_name == 'pull_request' && | |
| (needs.file-changes.outputs.cases_py == 'true' || | |
| needs.file-changes.outputs.dep_changed == 'true')) || | |
| (github.event_name == 'push' && | |
| (needs.file-changes.outputs.cases_py == 'true' || | |
| needs.file-changes.outputs.dep_changed == 'true')) || | |
| github.event_name == 'workflow_dispatch' || | |
| github.event_name == 'schedule' | |
| ) | |
| timeout-minutes: 240 | |
| runs-on: | |
| group: phoenix | |
| labels: gt | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
| clean: false | |
| - name: Rebuild Cache via SLURM | |
| run: bash .github/scripts/submit-slurm-job.sh .github/workflows/common/rebuild-cache.sh cpu none phoenix | |
| - name: Print Logs | |
| if: always() | |
| run: cat rebuild-cache-cpu-none.out | |
| - name: Upload Cache Artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: coverage-cache | |
| path: toolchain/mfc/test/test_coverage_cache.json.gz | |
| retention-days: 1 | |
| - name: Save Coverage Cache | |
| uses: actions/cache/save@v4 | |
| with: | |
| path: toolchain/mfc/test/test_coverage_cache.json.gz | |
| key: coverage-cache-${{ github.event.pull_request.number || 'master' }}-${{ hashFiles('toolchain/mfc/test/cases.py') }}-${{ github.sha }} | |
| continue-on-error: true | |
| github: | |
| name: Github | |
| needs: [lint-gate, file-changes, rebuild-cache] | |
| if: >- | |
| !cancelled() && | |
| needs.lint-gate.result == 'success' && | |
| needs.file-changes.result == 'success' && | |
| (needs.rebuild-cache.result == 'success' || needs.rebuild-cache.result == 'skipped') && | |
| needs.file-changes.outputs.checkall == 'true' | |
| strategy: | |
| matrix: | |
| os: ['ubuntu', 'macos'] | |
| mpi: ['mpi'] | |
| precision: [''] | |
| debug: ['reldebug', 'no-debug'] | |
| intel: [true, false] | |
| exclude: | |
| - os: macos | |
| intel: true | |
| include: | |
| - os: ubuntu | |
| mpi: no-mpi | |
| precision: single | |
| debug: no-debug | |
| intel: false | |
| fail-fast: false | |
| continue-on-error: true | |
| runs-on: ${{ matrix.os }}-latest | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v4 | |
| - name: Fetch master for coverage diff | |
| run: | | |
| git fetch origin master:master --depth=1 | |
| git fetch --deepen=200 | |
| continue-on-error: true | |
| - name: Download Coverage Cache (from rebuild in this run) | |
| id: cache-artifact | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: coverage-cache | |
| path: toolchain/mfc/test | |
| continue-on-error: true | |
| - name: Restore Coverage Cache (from previous run) | |
| if: steps.cache-artifact.outcome != 'success' | |
| id: cache-restore | |
| uses: actions/cache/restore@v4 | |
| with: | |
| path: toolchain/mfc/test/test_coverage_cache.json.gz | |
| key: coverage-cache-${{ github.event.pull_request.number || 'master' }}-${{ hashFiles('toolchain/mfc/test/cases.py') }}-${{ github.sha }} | |
| restore-keys: | | |
| coverage-cache-${{ github.event.pull_request.number || 'master' }}- | |
| coverage-cache-master- | |
| continue-on-error: true | |
| - name: Coverage Cache Status | |
| run: | | |
| if [ "${{ steps.cache-artifact.outcome }}" = "success" ]; then | |
| echo "Coverage cache: loaded from rebuild artifact (this run)" | |
| elif [ "${{ steps.cache-restore.outputs.cache-hit }}" = "true" ]; then | |
| echo "Coverage cache: restored from actions/cache (previous run)" | |
| elif [ -f toolchain/mfc/test/test_coverage_cache.json.gz ]; then | |
| echo "Coverage cache: using committed fallback in repo" | |
| else | |
| echo "Coverage cache: none available — full test suite will run" | |
| fi | |
| - name: Setup MacOS | |
| if: matrix.os == 'macos' | |
| run: | | |
| brew update | |
| brew upgrade || true | |
| brew install coreutils python fftw hdf5 gcc@15 boost open-mpi lapack | |
| echo "FC=gfortran-15" >> $GITHUB_ENV | |
| echo "BOOST_INCLUDE=/opt/homebrew/include/" >> $GITHUB_ENV | |
| - name: Setup Ubuntu | |
| if: matrix.os == 'ubuntu' && matrix.intel == false | |
| run: | | |
| sudo apt update -y | |
| sudo apt install -y cmake gcc g++ python3 python3-dev hdf5-tools \ | |
| libfftw3-dev libhdf5-dev openmpi-bin libopenmpi-dev \ | |
| libblas-dev liblapack-dev | |
| - name: Setup Ubuntu (Intel) | |
| if: matrix.os == 'ubuntu' && matrix.intel == true | |
| run: | | |
| wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | |
| sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | |
| sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" | |
| sudo apt-get update | |
| sudo apt-get install -y intel-oneapi-compiler-fortran intel-oneapi-mpi intel-oneapi-mpi-devel | |
| # Export only new/changed env vars from setvars.sh. | |
| # `printenv >> $GITHUB_ENV` dumps all vars including shell internals | |
| # with special characters that corrupt GITHUB_ENV parsing. | |
| printenv | sort > /tmp/env_before | |
| source /opt/intel/oneapi/setvars.sh | |
| printenv | sort > /tmp/env_after | |
| diff /tmp/env_before /tmp/env_after | grep '^>' | sed 's/^> //' >> $GITHUB_ENV | |
| - name: Build | |
| run: | | |
| /bin/bash mfc.sh test -v --dry-run -j $(nproc) --${{ matrix.debug }} --${{ matrix.mpi }} $PRECISION $TEST_ALL | |
| env: | |
| TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }} | |
| PRECISION: ${{ matrix.precision != '' && format('--{0}', matrix.precision) || '' }} | |
| - name: Test | |
| run: | | |
| /bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) $ONLY_CHANGES $TEST_ALL $TEST_PCT | |
| env: | |
| TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }} | |
| TEST_PCT: ${{ matrix.debug == 'reldebug' && '-% 20' || '' }} | |
| ONLY_CHANGES: ${{ github.event_name == 'pull_request' && '--only-changes' || '' }} | |
| self: | |
| name: "${{ matrix.cluster_name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }}${{ matrix.shard != '' && format(' [{0}]', matrix.shard) || '' }})" | |
| needs: [lint-gate, file-changes, rebuild-cache] | |
| if: >- | |
| !cancelled() && | |
| needs.lint-gate.result == 'success' && | |
| needs.file-changes.result == 'success' && | |
| (needs.rebuild-cache.result == 'success' || needs.rebuild-cache.result == 'skipped') && | |
| github.repository == 'MFlowCode/MFC' && | |
| needs.file-changes.outputs.checkall == 'true' && | |
| github.event.pull_request.draft != true | |
| # Frontier CCE compiler is periodically broken by toolchain updates (e.g. | |
| # cpe/25.03 introduced an IPA SIGSEGV in CCE 19.0.0). Allow Frontier to | |
| # fail without blocking PR merges; Phoenix remains a hard gate. | |
| continue-on-error: ${{ matrix.runner == 'frontier' }} | |
| timeout-minutes: 480 | |
| strategy: | |
| matrix: | |
| include: | |
| # Phoenix (GT) — build+test combined in SLURM job | |
| - runner: 'gt' | |
| cluster: 'phoenix' | |
| cluster_name: 'Georgia Tech | Phoenix' | |
| device: 'gpu' | |
| interface: 'acc' | |
| - runner: 'gt' | |
| cluster: 'phoenix' | |
| cluster_name: 'Georgia Tech | Phoenix' | |
| device: 'gpu' | |
| interface: 'omp' | |
| - runner: 'gt' | |
| cluster: 'phoenix' | |
| cluster_name: 'Georgia Tech | Phoenix' | |
| device: 'cpu' | |
| interface: 'none' | |
| # Frontier (ORNL) — CCE | |
| - runner: 'frontier' | |
| cluster: 'frontier' | |
| cluster_name: 'Oak Ridge | Frontier' | |
| device: 'gpu' | |
| interface: 'acc' | |
| shard: '1/2' | |
| - runner: 'frontier' | |
| cluster: 'frontier' | |
| cluster_name: 'Oak Ridge | Frontier' | |
| device: 'gpu' | |
| interface: 'acc' | |
| shard: '2/2' | |
| - runner: 'frontier' | |
| cluster: 'frontier' | |
| cluster_name: 'Oak Ridge | Frontier' | |
| device: 'gpu' | |
| interface: 'omp' | |
| shard: '1/2' | |
| - runner: 'frontier' | |
| cluster: 'frontier' | |
| cluster_name: 'Oak Ridge | Frontier' | |
| device: 'gpu' | |
| interface: 'omp' | |
| shard: '2/2' | |
| - runner: 'frontier' | |
| cluster: 'frontier' | |
| cluster_name: 'Oak Ridge | Frontier' | |
| device: 'cpu' | |
| interface: 'none' | |
| # Frontier AMD — build on login node, GPU tests sharded for batch partition | |
| - runner: 'frontier' | |
| cluster: 'frontier_amd' | |
| cluster_name: 'Oak Ridge | Frontier (AMD)' | |
| device: 'gpu' | |
| interface: 'omp' | |
| shard: '1/2' | |
| - runner: 'frontier' | |
| cluster: 'frontier_amd' | |
| cluster_name: 'Oak Ridge | Frontier (AMD)' | |
| device: 'gpu' | |
| interface: 'omp' | |
| shard: '2/2' | |
| - runner: 'frontier' | |
| cluster: 'frontier_amd' | |
| cluster_name: 'Oak Ridge | Frontier (AMD)' | |
| device: 'cpu' | |
| interface: 'none' | |
| runs-on: | |
| group: phoenix | |
| labels: ${{ matrix.runner }} | |
| env: | |
| NODE_OPTIONS: ${{ matrix.cluster == 'phoenix' && '--max-old-space-size=2048' || '' }} | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v4 | |
| with: | |
| # clean: false preserves .slurm_job_id files across reruns so | |
| # submit-slurm-job.sh can detect and cancel stale SLURM jobs on retry. | |
| clean: false | |
| - name: Clean stale output files | |
| run: rm -f *.out | |
| - name: Download Coverage Cache (from rebuild in this run) | |
| id: cache-artifact | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: coverage-cache | |
| path: toolchain/mfc/test | |
| continue-on-error: true | |
| - name: Restore Coverage Cache (from previous run) | |
| if: steps.cache-artifact.outcome != 'success' | |
| id: cache-restore | |
| uses: actions/cache/restore@v4 | |
| with: | |
| path: toolchain/mfc/test/test_coverage_cache.json.gz | |
| key: coverage-cache-${{ github.event.pull_request.number || 'master' }}-${{ hashFiles('toolchain/mfc/test/cases.py') }}-${{ github.sha }} | |
| restore-keys: | | |
| coverage-cache-${{ github.event.pull_request.number || 'master' }}- | |
| coverage-cache-master- | |
| continue-on-error: true | |
| - name: Coverage Cache Status | |
| run: | | |
| if [ "${{ steps.cache-artifact.outcome }}" = "success" ]; then | |
| echo "Coverage cache: loaded from rebuild artifact (this run)" | |
| elif [ "${{ steps.cache-restore.outputs.cache-hit }}" = "true" ]; then | |
| echo "Coverage cache: restored from actions/cache (previous run)" | |
| elif [ -f toolchain/mfc/test/test_coverage_cache.json.gz ]; then | |
| echo "Coverage cache: using committed fallback in repo" | |
| else | |
| echo "Coverage cache: none available — full test suite will run" | |
| fi | |
| - name: Fetch Dependencies | |
| if: matrix.cluster != 'phoenix' | |
| timeout-minutes: 60 | |
| run: bash .github/workflows/${{ matrix.cluster }}/build.sh ${{ matrix.device }} ${{ matrix.interface }} | |
| - name: Build | |
| run: bash .github/scripts/submit-slurm-job.sh .github/workflows/common/build.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.cluster }} ${{ matrix.shard }} | |
| - name: Test | |
| run: bash .github/scripts/submit-slurm-job.sh .github/workflows/common/test.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.cluster }} ${{ matrix.shard }} | |
| - name: Cancel SLURM Jobs | |
| if: cancelled() | |
| run: | | |
| find . -name "*.slurm_job_id" | while read -r f; do | |
| job_id=$(cat "$f") | |
| echo "Cancelling SLURM job $job_id" | |
| scancel "$job_id" 2>/dev/null || true | |
| done | |
| - name: Compute Log Slug | |
| if: always() | |
| id: log | |
| run: | | |
| SHARD_SUFFIX="" | |
| SHARD="${{ matrix.shard }}" | |
| if [ -n "$SHARD" ]; then | |
| SHARD_SUFFIX="-$(echo "$SHARD" | sed 's|/|-of-|')" | |
| fi | |
| echo "build_slug=build-${{ matrix.device }}-${{ matrix.interface }}${SHARD_SUFFIX}" >> "$GITHUB_OUTPUT" | |
| echo "test_slug=test-${{ matrix.device }}-${{ matrix.interface }}${SHARD_SUFFIX}" >> "$GITHUB_OUTPUT" | |
| - name: Print Logs | |
| if: always() | |
| run: | | |
| for f in ${{ steps.log.outputs.build_slug }}.out ${{ steps.log.outputs.test_slug }}.out; do | |
| [ -f "$f" ] && echo "=== $f ===" && cat "$f" | |
| done | |
| - name: Archive Logs | |
| uses: actions/upload-artifact@v4 | |
| if: matrix.cluster != 'phoenix' | |
| with: | |
| name: logs-${{ strategy.job-index }}-${{ steps.log.outputs.test_slug }} | |
| path: | | |
| ${{ steps.log.outputs.build_slug }}.out | |
| ${{ steps.log.outputs.test_slug }}.out | |
| case-optimization: | |
| name: "Case Opt | ${{ matrix.cluster_name }} (${{ matrix.device }}-${{ matrix.interface }})" | |
| if: github.repository == 'MFlowCode/MFC' && needs.file-changes.outputs.checkall == 'true' && github.event.pull_request.draft != true | |
| needs: [lint-gate, file-changes] | |
| # Frontier is non-blocking for the same reason as the self job above. | |
| continue-on-error: ${{ matrix.runner == 'frontier' }} | |
| timeout-minutes: 480 | |
| strategy: | |
| matrix: | |
| include: | |
| - runner: 'gt' | |
| cluster: 'phoenix' | |
| cluster_name: 'Georgia Tech | Phoenix' | |
| device: 'gpu' | |
| interface: 'acc' | |
| - runner: 'gt' | |
| cluster: 'phoenix' | |
| cluster_name: 'Georgia Tech | Phoenix' | |
| device: 'gpu' | |
| interface: 'omp' | |
| - runner: 'frontier' | |
| cluster: 'frontier' | |
| cluster_name: 'Oak Ridge | Frontier' | |
| device: 'gpu' | |
| interface: 'acc' | |
| - runner: 'frontier' | |
| cluster: 'frontier' | |
| cluster_name: 'Oak Ridge | Frontier' | |
| device: 'gpu' | |
| interface: 'omp' | |
| - runner: 'frontier' | |
| cluster: 'frontier_amd' | |
| cluster_name: 'Oak Ridge | Frontier (AMD)' | |
| device: 'gpu' | |
| interface: 'omp' | |
| runs-on: | |
| group: phoenix | |
| labels: ${{ matrix.runner }} | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v4 | |
| with: | |
| clean: false | |
| - name: Clean stale output files | |
| run: rm -f *.out | |
| - name: Fetch Dependencies | |
| if: matrix.cluster != 'phoenix' | |
| run: bash .github/workflows/${{ matrix.cluster }}/build.sh ${{ matrix.device }} ${{ matrix.interface }} | |
| - name: Pre-Build (SLURM) | |
| if: matrix.cluster == 'phoenix' | |
| run: bash .github/scripts/submit-slurm-job.sh .github/scripts/prebuild-case-optimization.sh cpu ${{ matrix.interface }} ${{ matrix.cluster }} | |
| - name: Build & Run Case-Optimization Tests | |
| if: matrix.cluster != 'phoenix' | |
| run: bash .github/scripts/submit-slurm-job.sh .github/scripts/run_case_optimization.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.cluster }} | |
| - name: Run Case-Optimization Tests | |
| if: matrix.cluster == 'phoenix' | |
| run: bash .github/scripts/submit-slurm-job.sh .github/scripts/run_case_optimization.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.cluster }} | |
| - name: Cancel SLURM Jobs | |
| if: cancelled() | |
| run: | | |
| find . -name "*.slurm_job_id" | while read -r f; do | |
| job_id=$(cat "$f") | |
| echo "Cancelling SLURM job $job_id" | |
| scancel "$job_id" 2>/dev/null || true | |
| done | |
| - name: Print Logs | |
| if: always() | |
| run: | | |
| for f in prebuild-case-optimization-${{ matrix.device }}-${{ matrix.interface }}.out \ | |
| run-case-optimization-${{ matrix.device }}-${{ matrix.interface }}.out; do | |
| [ -f "$f" ] && echo "=== $f ===" && cat "$f" | |
| done | |
| - name: Archive Logs | |
| uses: actions/upload-artifact@v4 | |
| if: always() | |
| with: | |
| name: case-opt-${{ strategy.job-index }}-${{ matrix.cluster }}-${{ matrix.interface }} | |
| path: | | |
| prebuild-case-optimization-${{ matrix.device }}-${{ matrix.interface }}.out | |
| run-case-optimization-${{ matrix.device }}-${{ matrix.interface }}.out |