From 65a894dc5b0e9f46076f280eeaa7512f50f26bb6 Mon Sep 17 00:00:00 2001 From: regisss <15324346+regisss@users.noreply.github.com> Date: Thu, 17 Oct 2024 17:26:01 +0200 Subject: [PATCH] Upgrade to SynapseAI 1.18.0 (#1418) --- .github/workflows/fast_tests.yml | 8 ++--- .github/workflows/slow_tests.yml | 36 +++++++++---------- .github/workflows/slow_tests_gaudi2.yml | 32 ++++++++--------- Makefile | 4 +-- README.md | 8 ++--- docs/Dockerfile | 2 +- docs/source/installation.mdx | 2 +- docs/source/usage_guides/deepspeed.mdx | 4 +-- examples/audio-classification/README.md | 2 +- examples/gaudi_spawn.py | 2 +- examples/kubernetes/Dockerfile | 6 ++-- examples/kubernetes/README.md | 6 ++-- examples/kubernetes/README.md.gotmpl | 6 ++-- examples/kubernetes/docker-compose.yaml | 18 +++++----- examples/multi-node-training/EFA/Dockerfile | 4 +-- .../multi-node-training/GaudiNIC/Dockerfile | 4 +-- examples/speech-recognition/README.md | 2 +- examples/text-generation/README.md | 2 +- .../text-generation-pipeline/README.md | 2 +- notebooks/AI_HW_Summit_2022.ipynb | 2 +- optimum/habana/accelerate/accelerator.py | 2 +- optimum/habana/accelerate/state.py | 2 +- optimum/habana/utils.py | 2 +- 23 files changed, 79 insertions(+), 79 deletions(-) diff --git a/.github/workflows/fast_tests.yml b/.github/workflows/fast_tests.yml index 501a7f1aa2..990de2ccfb 100644 --- a/.github/workflows/fast_tests.yml +++ b/.github/workflows/fast_tests.yml @@ -22,7 +22,7 @@ jobs: uses: actions/checkout@v2 - name: Pull image run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run tests run: | docker run \ @@ -34,7 +34,7 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ /bin/bash tests/ci/fast_tests.sh diffusers: name: Run tests for optimum.habana.diffusers @@ -45,7 +45,7 @@ jobs: uses: actions/checkout@v2 - name: Pull image run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run tests run: | docker run \ @@ -57,5 +57,5 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ /bin/bash tests/ci/fast_tests_diffusers.sh diff --git a/.github/workflows/slow_tests.yml b/.github/workflows/slow_tests.yml index 574cb71d18..b969273a3c 100644 --- a/.github/workflows/slow_tests.yml +++ b/.github/workflows/slow_tests.yml @@ -19,7 +19,7 @@ jobs: uses: actions/checkout@v2 - name: Pull image run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run tests run: | docker run \ @@ -31,7 +31,7 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ /bin/bash tests/ci/example_diff_tests.sh stable-diffusion: name: Test Stable Diffusion @@ -45,7 +45,7 @@ jobs: uses: actions/checkout@v2 - name: Pull image run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run tests run: | docker run \ @@ -57,7 +57,7 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ /bin/bash tests/ci/slow_tests_diffusers.sh deepspeed: name: Test DeepSpeed models @@ -72,7 +72,7 @@ jobs: uses: actions/checkout@v2 - name: Pull image run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run tests run: | docker run \ @@ -84,7 +84,7 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ /bin/bash tests/ci/slow_tests_deepspeed.sh multi-card: name: Test multi-card models @@ -99,7 +99,7 @@ jobs: uses: actions/checkout@v2 - name: Pull image run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run tests run: | docker run \ @@ -111,7 +111,7 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ /bin/bash tests/ci/slow_tests_8x.sh single-card: name: Test single-card models @@ -127,7 +127,7 @@ jobs: uses: actions/checkout@v2 - name: Pull image run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run tests run: | docker run \ @@ -139,7 +139,7 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ /bin/bash tests/ci/slow_tests_1x.sh albert-xxl-single-card: name: Test single-card ALBERT XXL @@ -158,7 +158,7 @@ jobs: - name: Pull image if: github.event.schedule == '0 21 * * 6' run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run test if: github.event.schedule == '0 21 * * 6' run: | @@ -171,7 +171,7 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ /bin/bash tests/ci/albert_xxl_1x.sh - name: Warning if: github.event.schedule != '0 21 * * 6' @@ -192,7 +192,7 @@ jobs: uses: actions/checkout@v2 - name: Pull image run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run tests run: | docker run \ @@ -204,7 +204,7 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }} trl: name: Test TRL integration @@ -223,7 +223,7 @@ jobs: uses: actions/checkout@v2 - name: Pull image run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run tests run: | docker run \ @@ -235,7 +235,7 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ /bin/bash tests/ci/slow_tests_trl.sh sentence-transformers: name: Test Sentence Transformers integration @@ -263,7 +263,7 @@ jobs: path: sentence-transformers - name: Pull image run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run tests run: | docker run \ @@ -275,5 +275,5 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ /bin/bash optimum-habana/tests/ci/sentence_transformers.sh diff --git a/.github/workflows/slow_tests_gaudi2.yml b/.github/workflows/slow_tests_gaudi2.yml index 5f170645e7..f48f264965 100644 --- a/.github/workflows/slow_tests_gaudi2.yml +++ b/.github/workflows/slow_tests_gaudi2.yml @@ -17,7 +17,7 @@ jobs: uses: actions/checkout@v2 - name: Pull image run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run tests run: | docker run \ @@ -33,7 +33,7 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ /bin/bash tests/ci/slow_tests_diffusers.sh deepspeed: name: Test DeepSpeed models @@ -46,7 +46,7 @@ jobs: uses: actions/checkout@v2 - name: Pull image run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run tests run: | docker run \ @@ -62,7 +62,7 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ /bin/bash tests/ci/slow_tests_deepspeed.sh ${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }} fsdp: name: Test FSDP models @@ -75,7 +75,7 @@ jobs: uses: actions/checkout@v2 - name: Pull image run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run tests run: | docker run \ @@ -91,7 +91,7 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ make slow_tests_fsdp TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }} multi-card: name: Test multi-card models @@ -104,7 +104,7 @@ jobs: uses: actions/checkout@v2 - name: Pull image run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run tests run: | docker run \ @@ -120,7 +120,7 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ /bin/bash tests/ci/slow_tests_8x.sh single-card: name: Test single-card models @@ -134,7 +134,7 @@ jobs: uses: actions/checkout@v2 - name: Pull image run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run tests run: | docker run \ @@ -151,7 +151,7 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ /bin/bash tests/ci/slow_tests_1x.sh text-generation: name: Test text-generation example @@ -166,7 +166,7 @@ jobs: uses: actions/checkout@v2 - name: Pull image run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run tests run: | docker run \ @@ -182,7 +182,7 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }} trl: name: Test TRL integration @@ -195,7 +195,7 @@ jobs: uses: actions/checkout@v2 - name: Pull image run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run tests run: | docker run \ @@ -211,7 +211,7 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ /bin/bash tests/ci/slow_tests_trl.sh sentence-transformers: name: Test Sentence Transformers integration @@ -232,7 +232,7 @@ jobs: path: sentence-transformers - name: Pull image run: | - docker pull vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest + docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest - name: Run tests run: | docker run \ @@ -248,5 +248,5 @@ jobs: --cap-add=sys_nice \ --net=host \ --ipc=host \ - vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest \ + vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest \ /bin/bash optimum-habana/tests/ci/sentence_transformers.sh diff --git a/Makefile b/Makefile index b72f04fd21..2ac2c85fe4 100644 --- a/Makefile +++ b/Makefile @@ -91,7 +91,7 @@ slow_tests_8x: test_installs # Run DeepSpeed non-regression tests slow_tests_deepspeed: test_installs - python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0 + python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0 python -m pytest tests/test_examples.py -v -s -k "deepspeed" slow_tests_diffusers: test_installs @@ -105,7 +105,7 @@ slow_tests_diffusers: test_installs # Run text-generation non-regression tests slow_tests_text_generation_example: test_installs - python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0 + python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0 python -m pytest tests/test_text_generation_example.py tests/test_encoder_decoder.py -v -s --token $(TOKEN) # Run image-to-text non-regression tests diff --git a/README.md b/README.md index 842c9725a7..2219cf5e3d 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ Please refer to the Intel Gaudi AI Accelerator official [installation guide](htt > Tests should be run in a Docker container based on Intel Gaudi Docker images. > -> The current version has been validated for SynapseAI 1.17. +> The current version has been validated for SynapseAI 1.18. ## Install the library and get example scripts @@ -59,9 +59,9 @@ The `--upgrade-strategy eager` option is needed to ensure `optimum-habana` is up To use the example associated with the latest stable release, run: > ``` > git clone https://github.com/huggingface/optimum-habana -> cd optimum-habana && git checkout v1.13.1 +> cd optimum-habana && git checkout v1.14.0 > ``` -> with `v1.13.1` the version number of this release. +> with `v1.14.0` the version number of this release. ### Option 2: Use the latest main branch under development @@ -88,7 +88,7 @@ git clone -b transformers_future https://github.com/huggingface/optimum-habana To use DeepSpeed on HPUs, you also need to run the following command: >```bash ->pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0 +>pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0 >``` To install the requirements for every example: diff --git a/docs/Dockerfile b/docs/Dockerfile index 9cc27f2f8c..6dd8d3a29f 100644 --- a/docs/Dockerfile +++ b/docs/Dockerfile @@ -1,4 +1,4 @@ -FROM vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest +FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest ARG commit_sha ARG clone_url diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index 019ad6d38d..44fff7c10c 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -23,6 +23,6 @@ python -m pip install --upgrade-strategy eager optimum[habana] To use DeepSpeed on HPUs, you also need to run the following command: ```bash -python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0 +python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0 ``` diff --git a/docs/source/usage_guides/deepspeed.mdx b/docs/source/usage_guides/deepspeed.mdx index a1d4616154..3117f8f468 100644 --- a/docs/source/usage_guides/deepspeed.mdx +++ b/docs/source/usage_guides/deepspeed.mdx @@ -31,7 +31,7 @@ You can find more information about DeepSpeed Gaudi integration [here](https://d To use DeepSpeed on Gaudi, you need to install Optimum Habana and [Habana's DeepSpeed fork](https://github.com/HabanaAI/DeepSpeed) with: ```bash pip install optimum[habana] -pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0 +pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0 ``` @@ -78,7 +78,7 @@ It is strongly advised to read [this section](https://huggingface.co/docs/transf -Other examples of configurations for HPUs are proposed [here](https://github.com/HabanaAI/Model-References/tree/1.17.0/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/scripts) by Habana. +Other examples of configurations for HPUs are proposed [here](https://github.com/HabanaAI/Model-References/tree/1.18.0/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/scripts) by Habana. The [Transformers documentation](https://huggingface.co/docs/transformers/main_classes/deepspeed#configuration) explains how to write a configuration from scratch very well. A more complete description of all configuration possibilities is available [here](https://www.deepspeed.ai/docs/config-json/). diff --git a/examples/audio-classification/README.md b/examples/audio-classification/README.md index 197bf9ff5b..db99428175 100644 --- a/examples/audio-classification/README.md +++ b/examples/audio-classification/README.md @@ -110,7 +110,7 @@ On 8 HPUs, this script should run in ~12 minutes and yield an accuracy of **80.4 > You need to install DeepSpeed with: > ```bash -> pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0 +> pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0 > ``` DeepSpeed can be used with almost the same command as for a multi-card run: diff --git a/examples/gaudi_spawn.py b/examples/gaudi_spawn.py index 655f2ab0d1..0f76dcd379 100644 --- a/examples/gaudi_spawn.py +++ b/examples/gaudi_spawn.py @@ -84,7 +84,7 @@ def main(): if not is_deepspeed_available(): raise ImportError( "--use_deepspeed requires deepspeed: `pip install" - " git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0`." + " git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0`." ) # Patch sys.argv diff --git a/examples/kubernetes/Dockerfile b/examples/kubernetes/Dockerfile index b205eb3a1e..08f2937fca 100644 --- a/examples/kubernetes/Dockerfile +++ b/examples/kubernetes/Dockerfile @@ -1,7 +1,7 @@ -ARG GAUDI_SW_VER=1.17.0 +ARG GAUDI_SW_VER=1.18.0 ARG OS=ubuntu22.04 -ARG TORCH_VER=2.3.1 -ARG OPTIMUM_HABANA_VER=1.13.0 +ARG TORCH_VER=2.4.0 +ARG OPTIMUM_HABANA_VER=1.14.0 FROM vault.habana.ai/gaudi-docker/${GAUDI_SW_VER}/${OS}/habanalabs/pytorch-installer-${TORCH_VER}:latest AS optimum-habana diff --git a/examples/kubernetes/README.md b/examples/kubernetes/README.md index efd19bbf4a..2ba6b017f1 100644 --- a/examples/kubernetes/README.md +++ b/examples/kubernetes/README.md @@ -43,12 +43,12 @@ Use the the following commands to build the containers: ```bash # Specify the Gaudi SW version, OS, and PyTorch version which will be used for the base container -export GAUDI_SW_VER=1.17.0 +export GAUDI_SW_VER=1.18.0 export OS=ubuntu22.04 -export TORCH_VER=2.3.1 +export TORCH_VER=2.4.0 # Specify the version of optimum-habana to install in the container -export OPTIMUM_HABANA_VER=1.13.0 +export OPTIMUM_HABANA_VER=1.14.0 git clone https://github.com/huggingface/optimum-habana.git diff --git a/examples/kubernetes/README.md.gotmpl b/examples/kubernetes/README.md.gotmpl index 39e53888d5..52a2c4fbab 100644 --- a/examples/kubernetes/README.md.gotmpl +++ b/examples/kubernetes/README.md.gotmpl @@ -43,12 +43,12 @@ Use the the following commands to build the containers: ```bash # Specify the Gaudi SW version, OS, and PyTorch version which will be used for the base container -export GAUDI_SW_VER=1.17.0 +export GAUDI_SW_VER=1.18.0 export OS=ubuntu22.04 -export TORCH_VER=2.3.1 +export TORCH_VER=2.4.0 # Specify the version of optimum-habana to install in the container -export OPTIMUM_HABANA_VER=1.13.0 +export OPTIMUM_HABANA_VER=1.14.0 git clone https://github.com/huggingface/optimum-habana.git diff --git a/examples/kubernetes/docker-compose.yaml b/examples/kubernetes/docker-compose.yaml index 78a48ef92e..214707eccb 100644 --- a/examples/kubernetes/docker-compose.yaml +++ b/examples/kubernetes/docker-compose.yaml @@ -5,30 +5,30 @@ services: http_proxy: ${http_proxy:-""} https_proxy: ${https_proxy:-""} no_proxy: ${no_proxy:-""} - GAUDI_SW_VER: ${GAUDI_SW_VER:-1.17.0} + GAUDI_SW_VER: ${GAUDI_SW_VER:-1.18.0} OS: ${OS:-ubuntu22.04} - OPTIMUM_HABANA_VER: ${OPTIMUM_HABANA_VER:-1.13.0} - TORCH_VER: ${TORCH_VER:-2.3.1} + OPTIMUM_HABANA_VER: ${OPTIMUM_HABANA_VER:-1.14.0} + TORCH_VER: ${TORCH_VER:-2.4.0} REGISTRY: ${REGISTRY} REPO: ${REPO} context: . labels: - org.opencontainers.base.name: "vault.habana.ai/gaudi-docker/${GAUDI_SW_VER:-1.17.0}/${OS:-ubuntu22.04}/habanalabs/pytorch-installer-${TORCH_VER:-2.3.1}:latest" + org.opencontainers.base.name: "vault.habana.ai/gaudi-docker/${GAUDI_SW_VER:-1.18.0}/${OS:-ubuntu22.04}/habanalabs/pytorch-installer-${TORCH_VER:-2.3.1}:latest" org.opencontainers.image.title: "Optimum for Intel® Gaudi® Accelerators" - org.opencontainers.image.version: gaudi-${GAUDI_SW_VER:-1.17.0}-optimum-habana-${OPTIMUM_HABANA_VER:-1.13.0} + org.opencontainers.image.version: gaudi-${GAUDI_SW_VER:-1.18.0}-optimum-habana-${OPTIMUM_HABANA_VER:-1.13.0} command: > sh -c "python -c 'from optimum import habana; print(\"optimum-habana:\", habana.__version__)'" - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-gaudi-${GAUDI_SW_VER:-1.17.0}-optimum-habana-${OPTIMUM_HABANA_VER:-1.13.0} + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-gaudi-${GAUDI_SW_VER:-1.18.0}-optimum-habana-${OPTIMUM_HABANA_VER:-1.13.0} pull_policy: always optimum-habana-examples: build: labels: - org.opencontainers.base.name: "${REGISTRY}/${REPO}:gaudi-${GAUDI_SW_VER:-1.17.0}-optimum-habana-${OPTIMUM_HABANA_VER:-1.13.0}" + org.opencontainers.base.name: "${REGISTRY}/${REPO}:gaudi-${GAUDI_SW_VER:-1.18.0}-optimum-habana-${OPTIMUM_HABANA_VER:-1.13.0}" org.opencontainers.image.title: "Optimum for Intel® Gaudi® Accelerators Examples" - org.opencontainers.image.version: gaudi-${GAUDI_SW_VER:-1.17.0}-optimum-habana-examples-${OPTIMUM_HABANA_VER:-1.13.0} + org.opencontainers.image.version: gaudi-${GAUDI_SW_VER:-1.18.0}-optimum-habana-examples-${OPTIMUM_HABANA_VER:-1.13.0} target: optimum-habana-examples command: > sh -c "python -c 'from optimum import habana; print(\"optimum-habana:\", habana.__version__)'" extends: optimum-habana - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-gaudi-${GAUDI_SW_VER:-1.17.0}-optimum-habana-examples-${OPTIMUM_HABANA_VER:-1.13.0} + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-gaudi-${GAUDI_SW_VER:-1.18.0}-optimum-habana-examples-${OPTIMUM_HABANA_VER:-1.13.0} diff --git a/examples/multi-node-training/EFA/Dockerfile b/examples/multi-node-training/EFA/Dockerfile index 25a4e4875c..a527f99603 100644 --- a/examples/multi-node-training/EFA/Dockerfile +++ b/examples/multi-node-training/EFA/Dockerfile @@ -1,4 +1,4 @@ -FROM vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest +FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest # Installs pdsh and upgrade pip RUN apt-get update && apt-get install -y pdsh && \ @@ -18,7 +18,7 @@ RUN sed -i 's/#Port 22/Port 3022/g' /etc/ssh/sshd_config && \ # Installs Optimum Habana and Habana's fork of DeepSpeed RUN pip install optimum[habana] && \ - pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0 + pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0 CMD ssh-keygen -t rsa -b 4096 -N '' -f ~/.ssh/id_rsa && \ chmod 600 ~/.ssh/id_rsa && \ diff --git a/examples/multi-node-training/GaudiNIC/Dockerfile b/examples/multi-node-training/GaudiNIC/Dockerfile index 7063c0ad43..b3763c4277 100644 --- a/examples/multi-node-training/GaudiNIC/Dockerfile +++ b/examples/multi-node-training/GaudiNIC/Dockerfile @@ -1,4 +1,4 @@ -FROM vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest +FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest # Installs pdsh and upgrade pip RUN apt-get update && apt-get install -y pdsh && \ @@ -12,7 +12,7 @@ RUN sed -i 's/#Port 22/Port 3022/g' /etc/ssh/sshd_config && \ # Installs Optimum Habana and Habana's fork of DeepSpeed RUN pip install optimum[habana] && \ - pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0 + pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0 CMD ssh-keygen -t rsa -b 4096 -N '' -f ~/.ssh/id_rsa && \ chmod 600 ~/.ssh/id_rsa && \ diff --git a/examples/speech-recognition/README.md b/examples/speech-recognition/README.md index 9436fd448a..8839e0e018 100644 --- a/examples/speech-recognition/README.md +++ b/examples/speech-recognition/README.md @@ -141,7 +141,7 @@ On 8 HPUs, this script should run in *ca.* 49 minutes and yield a CTC loss of ** > You need to install DeepSpeed with: > ```bash -> pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0 +> pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0 > ``` DeepSpeed can be used with almost the same command as for a multi-card run: diff --git a/examples/text-generation/README.md b/examples/text-generation/README.md index b99d3d8a6a..dc6081c5e6 100755 --- a/examples/text-generation/README.md +++ b/examples/text-generation/README.md @@ -28,7 +28,7 @@ pip install -r requirements.txt Then, if you plan to use [DeepSpeed-inference](https://docs.habana.ai/en/latest/PyTorch/DeepSpeed/Inference_Using_DeepSpeed.html) (e.g. to use BLOOM/BLOOMZ), you should install DeepSpeed as follows: ```bash -pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0 +pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0 ``` diff --git a/examples/text-generation/text-generation-pipeline/README.md b/examples/text-generation/text-generation-pipeline/README.md index 9185bd9d84..a10792be2a 100644 --- a/examples/text-generation/text-generation-pipeline/README.md +++ b/examples/text-generation/text-generation-pipeline/README.md @@ -22,7 +22,7 @@ The text-generation pipeline can be used to perform text-generation by providing If you plan to use [DeepSpeed-inference](https://docs.habana.ai/en/latest/PyTorch/DeepSpeed/Inference_Using_DeepSpeed.html), you should install DeepSpeed as follows: ```bash -pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0 +pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0 ``` If you would like to use the pipeline with LangChain classes, you can install LangChain as follows: diff --git a/notebooks/AI_HW_Summit_2022.ipynb b/notebooks/AI_HW_Summit_2022.ipynb index d295b1a65c..2b9bf711b8 100644 --- a/notebooks/AI_HW_Summit_2022.ipynb +++ b/notebooks/AI_HW_Summit_2022.ipynb @@ -262,7 +262,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0" + "!pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0" ] }, { diff --git a/optimum/habana/accelerate/accelerator.py b/optimum/habana/accelerate/accelerator.py index 38e0ea9da7..2a307bdcd9 100644 --- a/optimum/habana/accelerate/accelerator.py +++ b/optimum/habana/accelerate/accelerator.py @@ -152,7 +152,7 @@ def __init__( if deepspeed_plugin: if not is_deepspeed_available(): raise ImportError( - "DeepSpeed is not installed => run `pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0`." + "DeepSpeed is not installed => run `pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0`." ) mixed_precision = ( diff --git a/optimum/habana/accelerate/state.py b/optimum/habana/accelerate/state.py index a66a6ca853..8d6c39af38 100644 --- a/optimum/habana/accelerate/state.py +++ b/optimum/habana/accelerate/state.py @@ -55,7 +55,7 @@ def __init__(self, cpu: bool = False, **kwargs): if not is_deepspeed_available(): raise ImportError( "DeepSpeed is not available, install it with: `pip install" - " git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0`." + " git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0`." ) self.distributed_type = GaudiDistributedType.DEEPSPEED import deepspeed diff --git a/optimum/habana/utils.py b/optimum/habana/utils.py index 3da3f11872..a16c36b3fb 100755 --- a/optimum/habana/utils.py +++ b/optimum/habana/utils.py @@ -31,7 +31,7 @@ logger = logging.get_logger(__name__) -CURRENTLY_VALIDATED_SYNAPSE_VERSION = version.parse("1.17.0") +CURRENTLY_VALIDATED_SYNAPSE_VERSION = version.parse("1.18.0") def to_device_dtype(my_input: Any, target_device: torch.device = None, target_dtype: torch.dtype = None):