From 9a8000fbf112ddc54b5afaa8aa2575f80f8ff8de Mon Sep 17 00:00:00 2001 From: Eli Uriegas Date: Fri, 25 Feb 2022 11:33:33 -0800 Subject: [PATCH] ci: Bump CUDA 11.1 -> 11.3 We're deprecating support for CUDA 11.1 so moving all of our CUDA 11.1 workflows to CUDA 11.3 Signed-off-by: Eli Uriegas Pull Request resolved: https://github.com/pytorch/pytorch/pull/73449 Signed-off-by: Eli Uriegas --- .github/generated-ciflow-ruleset.json | 19 +- .github/scripts/generate_ci_workflows.py | 28 +- .github/workflows/generated-docker-builds.yml | 2 - ...torch-linux-xenial-cuda11.1-py3.7-gcc7.yml | 239 --------- ...inux-xenial-cuda11.3-py3.7-gcc7-debug.yml} | 22 +- ...rated-periodic-win-vs2019-cuda11.1-py3.yml | 452 ------------------ 6 files changed, 18 insertions(+), 744 deletions(-) delete mode 100644 .github/workflows/generated-periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7.yml rename .github/workflows/{generated-periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug.yml => generated-periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug.yml} (98%) delete mode 100644 .github/workflows/generated-periodic-win-vs2019-cuda11.1-py3.yml diff --git a/.github/generated-ciflow-ruleset.json b/.github/generated-ciflow-ruleset.json index 81abc2237bc43..d7ea54bbaf0e3 100644 --- a/.github/generated-ciflow-ruleset.json +++ b/.github/generated-ciflow-ruleset.json @@ -33,11 +33,9 @@ "macos-11-py3-x86-64", "parallelnative-linux-xenial-py3.7-gcc5.4", "periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7", - "periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7", "periodic-linux-bionic-cuda11.5-py3.7-gcc7", "periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck", - "periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug", - "periodic-win-vs2019-cuda11.1-py3", + "periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug", "periodic-win-vs2019-cuda11.5-py3", "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-build", "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single", @@ -114,11 +112,9 @@ "linux-xenial-cuda11.3-py3.7-gcc7", "linux-xenial-cuda11.3-py3.7-gcc7-no-ops", "periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7", - "periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7", "periodic-linux-bionic-cuda11.5-py3.7-gcc7", "periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck", - "periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug", - "periodic-win-vs2019-cuda11.1-py3", + "periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug", "periodic-win-vs2019-cuda11.5-py3", "win-vs2019-cuda11.3-py3" ], @@ -168,8 +164,7 @@ "ciflow/libtorch": [ "libtorch-linux-xenial-cuda10.2-py3.7-gcc7", "libtorch-linux-xenial-cuda11.3-py3.7-gcc7", - "periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7", - "periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7" + "periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7" ], "ciflow/linux": [ "caffe2-linux-xenial-py3.7-gcc5.4", @@ -193,10 +188,9 @@ "linux-xenial-py3.7-gcc7-no-ops", "parallelnative-linux-xenial-py3.7-gcc5.4", "periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7", - "periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7", "periodic-linux-bionic-cuda11.5-py3.7-gcc7", "periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck", - "periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug", + "periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug", "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-build", "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single", "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit", @@ -236,11 +230,9 @@ "ios-12-5-1-arm64-metal", "linux-docs-push", "periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7", - "periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7", "periodic-linux-bionic-cuda11.5-py3.7-gcc7", "periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck", - "periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug", - "periodic-win-vs2019-cuda11.1-py3", + "periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug", "periodic-win-vs2019-cuda11.5-py3" ], "ciflow/slow": [ @@ -287,7 +279,6 @@ "linux-vulkan-bionic-py3.7-clang9" ], "ciflow/win": [ - "periodic-win-vs2019-cuda11.1-py3", "periodic-win-vs2019-cuda11.5-py3", "win-vs2019-cpu-py3", "win-vs2019-cuda11.3-py3" diff --git a/.github/scripts/generate_ci_workflows.py b/.github/scripts/generate_ci_workflows.py index 82198b077ea6a..f0f1a3d3f9129 100755 --- a/.github/scripts/generate_ci_workflows.py +++ b/.github/scripts/generate_ci_workflows.py @@ -420,18 +420,6 @@ def generate_workflow_file(self, workflow_template: jinja2.Template) -> None: labels={LABEL_CIFLOW_SCHEDULED, LABEL_CIFLOW_CUDA, LABEL_CIFLOW_WIN} ), ), - CIWorkflow( - arch="windows", - build_environment="periodic-win-vs2019-cuda11.1-py3", - cuda_version="11.1", - enable_distributed_test=False, - test_runner_type=WINDOWS_CUDA_TEST_RUNNER, - num_test_shards=2, - is_scheduled="45 0,4,8,12,16,20 * * *", - ciflow_config=CIFlowConfig( - labels={LABEL_CIFLOW_SCHEDULED, LABEL_CIFLOW_WIN, LABEL_CIFLOW_CUDA} - ), - ), ] LINUX_WORKFLOWS = [ @@ -663,8 +651,8 @@ def generate_workflow_file(self, workflow_template: jinja2.Template) -> None: ), CIWorkflow( arch="linux", - build_environment="periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug", - docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7", + build_environment="periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug", + docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7", test_runner_type=LINUX_CUDA_TEST_RUNNER, num_test_shards=2, build_with_debug=True, @@ -673,18 +661,6 @@ def generate_workflow_file(self, workflow_template: jinja2.Template) -> None: labels={LABEL_CIFLOW_SCHEDULED, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CUDA} ), ), - CIWorkflow( - arch="linux", - build_environment="periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7", - docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7", - test_runner_type=LINUX_CUDA_TEST_RUNNER, - build_generates_artifacts=False, - exclude_test=True, - is_scheduled="45 0,4,8,12,16,20 * * *", - ciflow_config=CIFlowConfig( - labels={LABEL_CIFLOW_SCHEDULED, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_LIBTORCH, LABEL_CIFLOW_CUDA}, - ), - ), CIWorkflow( arch="linux", build_environment="linux-bionic-py3.7-clang9", diff --git a/.github/workflows/generated-docker-builds.yml b/.github/workflows/generated-docker-builds.yml index f2430f744e5ea..357305f2b3b2d 100644 --- a/.github/workflows/generated-docker-builds.yml +++ b/.github/workflows/generated-docker-builds.yml @@ -40,8 +40,6 @@ jobs: docker_image_short_name: 'pytorch-linux-bionic-rocm4.5-py3.7' - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7' docker_image_short_name: 'pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7' - - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7' - docker_image_short_name: 'pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7' - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7' docker_image_short_name: 'pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7' - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c' diff --git a/.github/workflows/generated-periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7.yml b/.github/workflows/generated-periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7.yml deleted file mode 100644 index 742d7a3074b95..0000000000000 --- a/.github/workflows/generated-periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7.yml +++ /dev/null @@ -1,239 +0,0 @@ -# @generated DO NOT EDIT MANUALLY -# Template is at: .github/templates/linux_ci_workflow.yml.j2 -# Generation script: .github/scripts/generate_ci_workflows.py -name: periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7 - -on: - push: - tags: - - 'ciflow/all/*' - - 'ciflow/cuda/*' - - 'ciflow/libtorch/*' - - 'ciflow/linux/*' - - 'ciflow/scheduled/*' - schedule: - - cron: 45 0,4,8,12,16,20 * * * - workflow_dispatch: - -env: - BUILD_ENVIRONMENT: periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7 - DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7 - SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2 - XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla - TORCH_CUDA_ARCH_LIST: 5.2 - IN_CI: 1 - IS_GHA: 1 - # This is used for the phase of adding wheel tests only, will be removed once completed - IN_WHEEL_TEST: 1 - # Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh - CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts - ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" - PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - AWS_DEFAULT_REGION: us-east-1 - PR_NUMBER: ${{ github.event.pull_request.number }} - SHA1: ${{ github.event.pull_request.head.sha || github.sha }} - PYTORCH_RETRY_TEST_CASES: 1 -concurrency: - group: periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} - cancel-in-progress: true - -jobs: - - build: - runs-on: linux.2xlarge - timeout-minutes: 240 - env: - JOB_BASE_NAME: periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7-build - outputs: - docker_image: ${{ steps.calculate-tag.outputs.docker_image }} - steps: - - name: print labels - run: echo "${PR_LABELS}" - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - - name: Log in to ECR - env: - AWS_RETRY_MODE: standard - AWS_MAX_ATTEMPTS: 5 - run: | - AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ - --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" - - name: Chown workspace - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${ALPINE_IMAGE}" - # Ensure the working directory gets chowned back to the current user - docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Clean workspace - run: | - rm -rf "${GITHUB_WORKSPACE}" - mkdir "${GITHUB_WORKSPACE}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Preserve github env variables for use in docker - run: | - env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - # deep clone, to allow use of git merge-base - fetch-depth: 0 - submodules: recursive - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - - name: Calculate docker image tag - id: calculate-tag - run: | - DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker) - echo "DOCKER_TAG=${DOCKER_TAG}" >> "${GITHUB_ENV}" - echo "DOCKER_IMAGE=${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" >> "${GITHUB_ENV}" - echo "::set-output name=docker_tag::${DOCKER_TAG}" - echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" - - name: Check if image should be built - id: check - env: - BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }} - run: | - set -x - # Check if image already exists, if it does then skip building it - if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then - exit 0 - fi - if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then - # if we're on the base branch then use the parent commit - MERGE_BASE=$(git rev-parse HEAD~) - else - # otherwise we're on a PR, so use the most recent base commit - MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION") - fi - # Covers the case where a previous tag doesn't exist for the tree - # this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly - if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then - echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit" - exit 1 - fi - PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker") - # If no image exists but the hash is the same as the previous hash then we should error out here - if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then - echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch" - echo " contact the PyTorch team to restore the original images" - exit 1 - fi - echo ::set-output name=rebuild::yes - - name: Build and push docker image - if: ${{ steps.check.outputs.rebuild }} - env: - DOCKER_SKIP_S3_UPLOAD: 1 - working-directory: .circleci/docker - run: | - export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/} - ./build_docker.sh - - name: Pull Docker image - run: | - retry () { - "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") - } - retry docker pull "${DOCKER_IMAGE}" - - name: Parse ref - shell: bash - id: parse-ref - run: ./.github/scripts/parse_ref.py - - name: Build - env: - BRANCH: ${{ steps.parse-ref.outputs.branch }} - run: | - # detached container should get cleaned up by teardown_ec2_linux - container_name=$(docker run \ - -e BUILD_ENVIRONMENT \ - -e JOB_BASE_NAME \ - -e MAX_JOBS="$(nproc --ignore=2)" \ - -e AWS_DEFAULT_REGION \ - -e IS_GHA \ - -e PR_NUMBER \ - -e SHA1 \ - -e BRANCH \ - -e GITHUB_RUN_ID \ - -e SCCACHE_BUCKET \ - -e XLA_CLANG_CACHE_S3_BUCKET_NAME \ - -e CUSTOM_TEST_ARTIFACT_BUILD_DIR \ - -e SKIP_SCCACHE_INITIALIZATION=1 \ - -e TORCH_CUDA_ARCH_LIST \ - -e PR_LABELS \ - -e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \ - --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ - --security-opt seccomp=unconfined \ - --cap-add=SYS_PTRACE \ - --tty \ - --detach \ - --user jenkins \ - -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \ - -w /var/lib/jenkins/workspace \ - "${DOCKER_IMAGE}" - ) - docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh' - - name: Display and upload binary build size statistics (Click Me) - # temporary hack: set CIRCLE_* vars, until we update - # tools/stats/print_test_stats.py to natively support GitHub Actions - env: - SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }} - BRANCH: ${{ steps.parse-ref.outputs.branch }} - TAG: ${{ steps.parse-ref.outputs.tag }} - WORKFLOW_ID: '${{ github.run_id }}' - run: | - COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0) - export COMMIT_TIME - pip3 install requests==2.26 boto3==1.16.34 - python3 -m tools.stats.upload_binary_size_to_scuba || exit 0 - - name: Chown workspace - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Chown workspace - if: always() - run: | - # Ensure the working directory gets chowned back to the current user - docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - - name: Kill containers, clean up images - if: always() - run: | - # ignore expansion of "docker ps -q" since it could be empty - # shellcheck disable=SC2046 - docker stop $(docker ps -q) || true - # Prune all of the docker images - docker system prune -af - - name: Hold runner for 2 hours or until ssh sessions have drained - # Always hold for active ssh sessions - if: always() - run: .github/scripts/wait_for_ssh_to_drain.sh - - name: Clean up docker images - if: always() - run: | - # Prune all of the docker images - docker system prune -af diff --git a/.github/workflows/generated-periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug.yml b/.github/workflows/generated-periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug.yml similarity index 98% rename from .github/workflows/generated-periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug.yml rename to .github/workflows/generated-periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug.yml index 21b5ebec0d4d6..a193a55b4fc3f 100644 --- a/.github/workflows/generated-periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug.yml +++ b/.github/workflows/generated-periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug.yml @@ -1,7 +1,7 @@ # @generated DO NOT EDIT MANUALLY # Template is at: .github/templates/linux_ci_workflow.yml.j2 # Generation script: .github/scripts/generate_ci_workflows.py -name: periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug +name: periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug on: push: @@ -15,8 +15,8 @@ on: workflow_dispatch: env: - BUILD_ENVIRONMENT: periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug - DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7 + BUILD_ENVIRONMENT: periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug + DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7 SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2 XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla TORCH_CUDA_ARCH_LIST: 5.2 @@ -35,7 +35,7 @@ env: PYTORCH_RETRY_TEST_CASES: 1 DEBUG: 1 concurrency: - group: periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + group: periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} cancel-in-progress: true jobs: @@ -44,7 +44,7 @@ jobs: runs-on: linux.2xlarge timeout-minutes: 240 env: - JOB_BASE_NAME: periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug-build + JOB_BASE_NAME: periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug-build outputs: docker_image: ${{ steps.calculate-tag.outputs.docker_image }} steps: @@ -256,7 +256,7 @@ jobs: timeout-minutes: 240 env: DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }} - JOB_BASE_NAME: periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug-test + JOB_BASE_NAME: periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug-test TEST_CONFIG: distributed SHARD_NUMBER: 1 NUM_TEST_SHARDS: 1 @@ -476,7 +476,7 @@ jobs: env: AWS_DEFAULT_REGION: us-east-1 BRANCH: ${{ steps.parse-ref.outputs.branch }} - JOB_BASE_NAME: periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug-test + JOB_BASE_NAME: periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug-test PR_NUMBER: ${{ github.event.pull_request.number }} SHA1: ${{ github.event.pull_request.head.sha || github.sha }} TAG: ${{ steps.parse-ref.outputs.tag }} @@ -510,7 +510,7 @@ jobs: timeout-minutes: 240 env: DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }} - JOB_BASE_NAME: periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug-test + JOB_BASE_NAME: periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug-test TEST_CONFIG: default SHARD_NUMBER: 1 NUM_TEST_SHARDS: 2 @@ -730,7 +730,7 @@ jobs: env: AWS_DEFAULT_REGION: us-east-1 BRANCH: ${{ steps.parse-ref.outputs.branch }} - JOB_BASE_NAME: periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug-test + JOB_BASE_NAME: periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug-test PR_NUMBER: ${{ github.event.pull_request.number }} SHA1: ${{ github.event.pull_request.head.sha || github.sha }} TAG: ${{ steps.parse-ref.outputs.tag }} @@ -764,7 +764,7 @@ jobs: timeout-minutes: 240 env: DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }} - JOB_BASE_NAME: periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug-test + JOB_BASE_NAME: periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug-test TEST_CONFIG: default SHARD_NUMBER: 2 NUM_TEST_SHARDS: 2 @@ -984,7 +984,7 @@ jobs: env: AWS_DEFAULT_REGION: us-east-1 BRANCH: ${{ steps.parse-ref.outputs.branch }} - JOB_BASE_NAME: periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug-test + JOB_BASE_NAME: periodic-linux-xenial-cuda11.3-py3.7-gcc7-debug-test PR_NUMBER: ${{ github.event.pull_request.number }} SHA1: ${{ github.event.pull_request.head.sha || github.sha }} TAG: ${{ steps.parse-ref.outputs.tag }} diff --git a/.github/workflows/generated-periodic-win-vs2019-cuda11.1-py3.yml b/.github/workflows/generated-periodic-win-vs2019-cuda11.1-py3.yml deleted file mode 100644 index 7394300b3faf4..0000000000000 --- a/.github/workflows/generated-periodic-win-vs2019-cuda11.1-py3.yml +++ /dev/null @@ -1,452 +0,0 @@ -# @generated DO NOT EDIT MANUALLY -# Template is at: .github/templates/windows_ci_workflow.yml.j2 -# Generation script: .github/scripts/generate_ci_workflows.py -name: periodic-win-vs2019-cuda11.1-py3 - -on: - push: - tags: - - 'ciflow/all/*' - - 'ciflow/cuda/*' - - 'ciflow/scheduled/*' - - 'ciflow/win/*' - schedule: - - cron: 45 0,4,8,12,16,20 * * * - workflow_dispatch: - -env: - BUILD_ENVIRONMENT: periodic-win-vs2019-cuda11.1-py3 - BUILD_WHEEL: 1 - MAX_JOBS: 8 - CUDA_VERSION: "11.1" - IN_CI: 1 - IS_GHA: 1 - INSTALL_WINDOWS_SDK: 1 - PYTHON_VERSION: "3.8" - PYTORCH_RETRY_TEST_CASES: 1 - PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }} - SCCACHE_BUCKET: "ossci-compiler-cache" - VC_PRODUCT: "BuildTools" - VC_VERSION: "" - VS_VERSION: "16.8.6" - VC_YEAR: "2019" - ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" - no_proxy: localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock - AWS_DEFAULT_REGION: us-east-1 - PR_NUMBER: ${{ github.event.pull_request.number }} - SHA1: ${{ github.event.pull_request.head.sha || github.sha }} - TORCH_CUDA_ARCH_LIST: "7.0" - USE_CUDA: 1 - -concurrency: - group: periodic-win-vs2019-cuda11.1-py3-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} - cancel-in-progress: true - -jobs: - build: - runs-on: "windows.4xlarge" - timeout-minutes: 240 - env: - JOB_BASE_NAME: periodic-win-vs2019-cuda11.1-py3-build - http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" - https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" - steps: - - name: print labels - run: echo "${PR_LABELS}" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - # deep clone, to allow use of git merge-base - fetch-depth: 0 - submodules: recursive - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - - name: Install Visual Studio 2019 toolchain - shell: powershell - run: | - .\.circleci\scripts\vs_install.ps1 - - name: Install Cuda - shell: bash - run: | - .circleci/scripts/windows_cuda_install.sh - - name: Install Cudnn - shell: bash - run: | - .circleci/scripts/windows_cudnn_install.sh - - uses: actions/setup-python@v2 - name: Setup Python3 - with: - python-version: '3.x' - - name: Parse ref - shell: bash - id: parse-ref - run: ./.github/scripts/parse_ref.py - - name: Build - shell: bash - env: - PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/ - BRANCH: ${{ steps.parse-ref.outputs.branch }} - run: | - .jenkins/pytorch/win-build.sh - # Upload to github so that people can click and download artifacts - - name: Upload artifacts to s3 - uses: seemethere/upload-artifact-s3@v3 - with: - retention-days: 14 - if-no-files-found: error - name: ${{ env.BUILD_ENVIRONMENT }} - path: C:\${{ github.run_id }}\build-results - - name: Wait until all sessions have drained - shell: powershell - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - - name: Cleanup build-results and workspaces - if: always() - shell: bash - env: - PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/ - # Should remove the entirety of pytorch-${{ github.run_id }} - run: | - rm -rf "${PYTORCH_FINAL_PACKAGE_DIR}" - rm -rf ./* - test_default_1_2: - name: test (default, 1, 2, windows.8xlarge.nvidia.gpu) - timeout-minutes: 240 - env: - JOB_BASE_NAME: periodic-win-vs2019-cuda11.1-py3-test - SHARD_NUMBER: 1 - NUM_TEST_SHARDS: 2 - TEST_CONFIG: default - http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" - https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" - PR_BODY: ${{ github.event.pull_request.body }} - needs: build - runs-on: windows.8xlarge.nvidia.gpu - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - # deep clone, to allow use of git merge-base - fetch-depth: 0 - submodules: recursive - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - - name: Install Visual Studio 2019 toolchain - shell: powershell - run: | - .\.circleci\scripts\vs_install.ps1 - - name: Install Cuda - shell: bash - run: | - .circleci/scripts/windows_cuda_install.sh - - name: Install Cudnn - shell: bash - run: | - .circleci/scripts/windows_cudnn_install.sh - - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b - name: Download PyTorch Build Artifacts - with: - name: ${{ env.BUILD_ENVIRONMENT }} - path: C:\${{ github.run_id }}\build-results - - name: Check build-results folder - shell: powershell - run: | - tree /F C:\$Env:GITHUB_RUN_ID\build-results - # Needed for coverage in win-test.sh - - uses: actions/setup-python@v2 - name: Setup Python3 - with: - python-version: '3.x' - - name: Test - shell: bash - env: - PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/ - # Time out the test phase after 3.5 hours - timeout-minutes: 210 - run: | - .jenkins/pytorch/win-test.sh - - name: Zip JSONs for upload - if: always() - env: - FILE_SUFFIX: '${{ github.job }}-default-1-2-windows.8xlarge.nvidia.gpu' - shell: powershell - run: | - # -ir => recursive include all files in pattern - 7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json' - - uses: seemethere/upload-artifact-s3@v3 - name: Store Test Downloaded JSONs on S3 - if: always() - with: - retention-days: 14 - if-no-files-found: warn - path: - test-jsons-*.zip - - name: Zip test reports for upload - if: always() - env: - FILE_SUFFIX: '${{ github.job }}-default-1-2-windows.8xlarge.nvidia.gpu' - shell: powershell - run: | - # -ir => recursive include all files in pattern - 7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml' - - uses: seemethere/upload-artifact-s3@v3 - name: Store Test Reports on S3 - if: always() - with: - retention-days: 14 - if-no-files-found: error - path: - test-reports-*.zip - - name: Install render_test_results dependencies - if: always() - shell: bash - run: | - python3 -m pip install junitparser==2.1.1 rich==10.9.0 - - name: "[[ Click me for rendered test results (useful for finding failing tests) ]]" - if: always() - shell: bash - # Encoding is weird on windows, just try to default to utf-8 if possible - env: - PYTHONIOENCODING: "utf-8" - run: | - python3 tools/render_junit.py test/ - - name: Wait until all sessions have drained - shell: powershell - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - - name: Parse ref - shell: bash - id: parse-ref - run: ./.github/scripts/parse_ref.py - - name: Upload test statistics - if: always() - env: - AWS_DEFAULT_REGION: us-east-1 - BRANCH: ${{ steps.parse-ref.outputs.branch }} - JOB_BASE_NAME: periodic-win-vs2019-cuda11.1-py3-test - PR_NUMBER: ${{ github.event.pull_request.number }} - SHA1: ${{ github.event.pull_request.head.sha || github.sha }} - TAG: ${{ steps.parse-ref.outputs.tag }} - WORKFLOW_ID: '${{ github.run_id }}' - shell: bash - run: | - python3 -m pip install -r requirements.txt - python3 -m pip install boto3==1.19.12 - python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test - - name: Cleanup workspace - if: always() - shell: bash - # Should remove the entirety of pytorch-${{ github.run_id }} - run: | - rm -rf ./* - test_default_2_2: - name: test (default, 2, 2, windows.8xlarge.nvidia.gpu) - timeout-minutes: 240 - env: - JOB_BASE_NAME: periodic-win-vs2019-cuda11.1-py3-test - SHARD_NUMBER: 2 - NUM_TEST_SHARDS: 2 - TEST_CONFIG: default - http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" - https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" - PR_BODY: ${{ github.event.pull_request.body }} - needs: build - runs-on: windows.8xlarge.nvidia.gpu - steps: - - name: Display EC2 information - shell: bash - run: | - set -euo pipefail - function get_ec2_metadata() { - # Pulled from instance metadata endpoint for EC2 - # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - category=$1 - curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" - } - echo "ami-id: $(get_ec2_metadata ami-id)" - echo "instance-id: $(get_ec2_metadata instance-id)" - echo "instance-type: $(get_ec2_metadata instance-type)" - - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: seemethere/add-github-ssh-key@v1 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PyTorch - uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - # deep clone, to allow use of git merge-base - fetch-depth: 0 - submodules: recursive - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - - name: Install Visual Studio 2019 toolchain - shell: powershell - run: | - .\.circleci\scripts\vs_install.ps1 - - name: Install Cuda - shell: bash - run: | - .circleci/scripts/windows_cuda_install.sh - - name: Install Cudnn - shell: bash - run: | - .circleci/scripts/windows_cudnn_install.sh - - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b - name: Download PyTorch Build Artifacts - with: - name: ${{ env.BUILD_ENVIRONMENT }} - path: C:\${{ github.run_id }}\build-results - - name: Check build-results folder - shell: powershell - run: | - tree /F C:\$Env:GITHUB_RUN_ID\build-results - # Needed for coverage in win-test.sh - - uses: actions/setup-python@v2 - name: Setup Python3 - with: - python-version: '3.x' - - name: Test - shell: bash - env: - PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/ - # Time out the test phase after 3.5 hours - timeout-minutes: 210 - run: | - .jenkins/pytorch/win-test.sh - - name: Zip JSONs for upload - if: always() - env: - FILE_SUFFIX: '${{ github.job }}-default-2-2-windows.8xlarge.nvidia.gpu' - shell: powershell - run: | - # -ir => recursive include all files in pattern - 7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json' - - uses: seemethere/upload-artifact-s3@v3 - name: Store Test Downloaded JSONs on S3 - if: always() - with: - retention-days: 14 - if-no-files-found: warn - path: - test-jsons-*.zip - - name: Zip test reports for upload - if: always() - env: - FILE_SUFFIX: '${{ github.job }}-default-2-2-windows.8xlarge.nvidia.gpu' - shell: powershell - run: | - # -ir => recursive include all files in pattern - 7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml' - - uses: seemethere/upload-artifact-s3@v3 - name: Store Test Reports on S3 - if: always() - with: - retention-days: 14 - if-no-files-found: error - path: - test-reports-*.zip - - name: Install render_test_results dependencies - if: always() - shell: bash - run: | - python3 -m pip install junitparser==2.1.1 rich==10.9.0 - - name: "[[ Click me for rendered test results (useful for finding failing tests) ]]" - if: always() - shell: bash - # Encoding is weird on windows, just try to default to utf-8 if possible - env: - PYTHONIOENCODING: "utf-8" - run: | - python3 tools/render_junit.py test/ - - name: Wait until all sessions have drained - shell: powershell - if: always() - timeout-minutes: 120 - run: | - .github\scripts\wait_for_ssh_to_drain.ps1 - - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) - shell: powershell - if: always() - run: | - .github\scripts\kill_active_ssh_sessions.ps1 - - name: Parse ref - shell: bash - id: parse-ref - run: ./.github/scripts/parse_ref.py - - name: Upload test statistics - if: always() - env: - AWS_DEFAULT_REGION: us-east-1 - BRANCH: ${{ steps.parse-ref.outputs.branch }} - JOB_BASE_NAME: periodic-win-vs2019-cuda11.1-py3-test - PR_NUMBER: ${{ github.event.pull_request.number }} - SHA1: ${{ github.event.pull_request.head.sha || github.sha }} - TAG: ${{ steps.parse-ref.outputs.tag }} - WORKFLOW_ID: '${{ github.run_id }}' - shell: bash - run: | - python3 -m pip install -r requirements.txt - python3 -m pip install boto3==1.19.12 - python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test - - name: Cleanup workspace - if: always() - shell: bash - # Should remove the entirety of pytorch-${{ github.run_id }} - run: | - rm -rf ./*