|
4 | 4 | workflow_dispatch: |
5 | 5 | inputs: |
6 | 6 | runner_type: |
7 | | - description: 'Type of runner to test (a10 or t4)' |
| 7 | + description: 'Type of runner to test (a10)' |
8 | 8 | required: true |
9 | 9 | docker_image: |
10 | 10 | description: 'Name of the Docker image' |
@@ -36,14 +36,10 @@ jobs: |
36 | 36 | NUM_GPUS: ${{ github.event.inputs.num_gpus }} |
37 | 37 | RUNNER_TYPE: ${{ github.event.inputs.runner_type }} |
38 | 38 | run: | |
39 | | - if [[ "$NUM_GPUS" == "single" && "$RUNNER_TYPE" == "t4" ]]; then |
40 | | - echo "RUNNER=aws-g4dn-4xlarge-cache" >> $GITHUB_ENV |
41 | | - elif [[ "$NUM_GPUS" == "multi" && "$RUNNER_TYPE" == "t4" ]]; then |
42 | | - echo "RUNNER=aws-g4dn-12xlarge-cache" >> $GITHUB_ENV |
43 | | - elif [[ "$NUM_GPUS" == "single" && "$RUNNER_TYPE" == "a10" ]]; then |
44 | | - echo "RUNNER=aws-g5-4xlarge-cache" >> $GITHUB_ENV |
| 39 | + if [[ "$NUM_GPUS" == "single" && "$RUNNER_TYPE" == "a10" ]]; then |
| 40 | + echo "RUNNER=aws-g5-4xlarge-cache-ssh" >> $GITHUB_ENV |
45 | 41 | elif [[ "$NUM_GPUS" == "multi" && "$RUNNER_TYPE" == "a10" ]]; then |
46 | | - echo "RUNNER=aws-g5-12xlarge-cache" >> $GITHUB_ENV |
| 42 | + echo "RUNNER=aws-g5-12xlarge-cache-ssh" >> $GITHUB_ENV |
47 | 43 | else |
48 | 44 | echo "RUNNER=" >> $GITHUB_ENV |
49 | 45 | fi |
|
61 | 57 | group: ${{ needs.get_runner.outputs.RUNNER }} |
62 | 58 | container: |
63 | 59 | image: ${{ github.event.inputs.docker_image }} |
64 | | - options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ |
65 | | - |
66 | 60 | steps: |
67 | 61 | - name: Update clone |
68 | 62 | working-directory: /transformers |
@@ -106,7 +100,7 @@ jobs: |
106 | 100 | else |
107 | 101 | echo "SLACKCHANNEL=${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}" >> $GITHUB_ENV |
108 | 102 | fi |
109 | | -
|
| 103 | + |
110 | 104 | - name: Tailscale # In order to be able to SSH when a test fails |
111 | 105 | uses: huggingface/tailscale-action@main |
112 | 106 | with: |
|
0 commit comments