Skip to content

Commit 1619a34

Browse files
authored
fix (CI): Refactor SSH runners (#41991)
* Change ssh runner type * Add wait step to SSH runner workflow * Rename wait step to wait2 in ssh-runner.yml * Remove wait step from ssh-runner.yml Removed the wait step from the SSH runner workflow. * Update runner type for single GPU A10 instance * Update SSH runner version to 1.90.3 * Add sha256sum to ssh-runner workflow * Update runner type and remove unused steps
1 parent ff0f7d6 commit 1619a34

File tree

1 file changed

+5
-11
lines changed

1 file changed

+5
-11
lines changed

.github/workflows/ssh-runner.yml

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ on:
44
workflow_dispatch:
55
inputs:
66
runner_type:
7-
description: 'Type of runner to test (a10 or t4)'
7+
description: 'Type of runner to test (a10)'
88
required: true
99
docker_image:
1010
description: 'Name of the Docker image'
@@ -36,14 +36,10 @@ jobs:
3636
NUM_GPUS: ${{ github.event.inputs.num_gpus }}
3737
RUNNER_TYPE: ${{ github.event.inputs.runner_type }}
3838
run: |
39-
if [[ "$NUM_GPUS" == "single" && "$RUNNER_TYPE" == "t4" ]]; then
40-
echo "RUNNER=aws-g4dn-4xlarge-cache" >> $GITHUB_ENV
41-
elif [[ "$NUM_GPUS" == "multi" && "$RUNNER_TYPE" == "t4" ]]; then
42-
echo "RUNNER=aws-g4dn-12xlarge-cache" >> $GITHUB_ENV
43-
elif [[ "$NUM_GPUS" == "single" && "$RUNNER_TYPE" == "a10" ]]; then
44-
echo "RUNNER=aws-g5-4xlarge-cache" >> $GITHUB_ENV
39+
if [[ "$NUM_GPUS" == "single" && "$RUNNER_TYPE" == "a10" ]]; then
40+
echo "RUNNER=aws-g5-4xlarge-cache-ssh" >> $GITHUB_ENV
4541
elif [[ "$NUM_GPUS" == "multi" && "$RUNNER_TYPE" == "a10" ]]; then
46-
echo "RUNNER=aws-g5-12xlarge-cache" >> $GITHUB_ENV
42+
echo "RUNNER=aws-g5-12xlarge-cache-ssh" >> $GITHUB_ENV
4743
else
4844
echo "RUNNER=" >> $GITHUB_ENV
4945
fi
@@ -61,8 +57,6 @@ jobs:
6157
group: ${{ needs.get_runner.outputs.RUNNER }}
6258
container:
6359
image: ${{ github.event.inputs.docker_image }}
64-
options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
65-
6660
steps:
6761
- name: Update clone
6862
working-directory: /transformers
@@ -106,7 +100,7 @@ jobs:
106100
else
107101
echo "SLACKCHANNEL=${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}" >> $GITHUB_ENV
108102
fi
109-
103+
110104
- name: Tailscale # In order to be able to SSH when a test fails
111105
uses: huggingface/tailscale-action@main
112106
with:

0 commit comments

Comments
 (0)