huggingface · JingyaHuang · Apr 12, 2024 · Mar 4, 2024 · Mar 4, 2024 · Apr 8, 2024
diff --git a/optimum/exporters/neuron/convert.py b/optimum/exporters/neuron/convert.py
@@ -342,7 +342,6 @@ def export_models(
         output_path.parent.mkdir(parents=True, exist_ok=True)
 
         try:
-
             # TODO: Remove after the weights/neff separation compilation of sdxl is patched by a neuron sdk release: https://github.com/aws-neuron/aws-neuron-sdk/issues/859
             if not inline_weights_to_neff and getattr(sub_neuron_config, "is_sdxl", False):
                 logger.warning(

diff --git a/optimum/neuron/accelerate/utils/misc.py b/optimum/neuron/accelerate/utils/misc.py
@@ -44,7 +44,6 @@ def patch_accelerate_is_tpu_available():
 
 
 def create_patched_finfo(xla_downcast_bf16: bool = False, use_amp: bool = False, xla_use_bf16: bool = False):
-
     def patched_finfo(dtype):
         if xla_downcast_bf16 or use_amp or xla_use_bf16:
             return _ORIG_TORCH_FINFO(torch.bfloat16)

diff --git a/optimum/neuron/modeling_base.py b/optimum/neuron/modeling_base.py
@@ -50,12 +50,10 @@
     from ..exporters.neuron import NeuronDefaultConfig
 
 if is_neuron_available():
-
     NEURON_COMPILER_TYPE = "neuron-cc"
     NEURON_COMPILER_VERSION = get_neuroncc_version()
 
 if is_neuronx_available():
-
     NEURON_COMPILER_TYPE = "neuronx-cc"
     NEURON_COMPILER_VERSION = get_neuronxcc_version()
 

diff --git a/optimum/neuron/training_args.py b/optimum/neuron/training_args.py
@@ -180,7 +180,6 @@ def __post_init__(self):
 
     @cached_property
     def _setup_devices(self) -> "torch.device":
-
         requires_backends(self, ["torch"])
         logger.info("PyTorch: setting up devices")
         NeuronAcceleratorState._reset_state()

diff --git a/optimum/neuron/utils/misc.py b/optimum/neuron/utils/misc.py
@@ -23,6 +23,7 @@
 from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
 
 import torch
+from packaging import version
 from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, CLIPProcessor, PretrainedConfig
 from transformers.modeling_utils import _add_variant
 from transformers.utils import (
@@ -43,6 +44,7 @@
 from ...utils import is_diffusers_available, logging
 from .import_utils import is_torch_neuronx_available, is_torch_xla_available
 from .require_utils import requires_safetensors, requires_torch_xla
+from .version_utils import get_torch_version
 
 
 if is_torch_neuronx_available():
@@ -547,6 +549,12 @@ def replace_weights(
     """
     Replaces the weights in a Neuron Model with weights from another model, the original neuron model should have separated weights(by setting `inline_weights_to_neff=Talse` during the tracing).
     """
+    torch_version = get_torch_version()
+    if version.parse(torch_version) >= version.parse("2.0.0"):
+        raise RuntimeError(
+            "Weights Neff separation is not yet supported by Neuron SDK for PyTorch 2.*. You can downgrade your PyTorch version to 1.13.1."
+        )
+
     if isinstance(weights, torch.nn.Module):
         weights = weights.state_dict()
 

diff --git a/optimum/neuron/utils/testing_utils.py b/optimum/neuron/utils/testing_utils.py
@@ -16,7 +16,10 @@
 
 import unittest
 
+from packaging import version
+
 from .import_utils import is_neuron_available, is_neuronx_available
+from .version_utils import get_torch_version
 
 
 def requires_neuron(test_case):
@@ -33,6 +36,12 @@ def requires_neuron_or_neuronx(test_case):
     )(test_case)
 
 
+def requires_pytorch_1_13(test_case):
+    return unittest.skipUnless(
+        version.parse(get_torch_version()) < version.parse("2.0.0"), "test requires PyTorch < 2.0.0"
+    )(test_case)
+
+
 def is_trainium_test(test_case):
     test_case = requires_neuronx(test_case)
     try:

diff --git a/setup.py b/setup.py
@@ -56,10 +56,10 @@
     "neuronx": [
         "wheel",
         "neuronx-cc==2.13.66.0",
-        "torch-neuronx==1.13.1.1.14.0",
+        "torch-neuronx==2.1.2.2.1.0",
         "transformers-neuronx==0.10.0.21",
-        "torch==1.13.1.*",
-        "torchvision==0.14.*",
+        "torch==2.1.2.*",
+        "torchvision==0.16.*",
         "neuronx_distributed==0.7.0",
     ],
     "diffusers": ["diffusers ~= 0.26.1", "peft"],

diff --git a/tests/inference/test_modeling.py b/tests/inference/test_modeling.py
@@ -49,7 +49,7 @@
     pipeline,
 )
 from optimum.neuron.utils import NEURON_FILE_NAME, is_neuron_available, is_neuronx_available
-from optimum.neuron.utils.testing_utils import is_inferentia_test, requires_neuronx
+from optimum.neuron.utils.testing_utils import is_inferentia_test, requires_neuronx, requires_pytorch_1_13
 from optimum.utils import (
     CONFIG_NAME,
     logging,
@@ -150,6 +150,7 @@ def test_save_compiler_intermediary_files(self):
             os.listdir(save_path)
             self.assertTrue(os.path.exists(neff_path))
 
+    @requires_pytorch_1_13
     @requires_neuronx
     def test_decouple_weights_neff_and_replace_weight(self):
         with tempfile.TemporaryDirectory() as tempdir:
@@ -404,7 +405,6 @@ def test_sentence_transformers_dyn_bs(self, model_arch):
     @parameterized.expand(["clip"], skip_on_empty=True)
     @requires_neuronx
     def test_sentence_transformers_clip(self, model_arch):
-
         # Neuron model with dynamic batching
         model_id = SENTENCE_TRANSFORMERS_MODEL_NAMES[model_arch]
         input_shapes = {

diff --git a/text-generation-inference/Dockerfile b/text-generation-inference/Dockerfile
@@ -82,6 +82,7 @@ RUN apt-get update -y \
  && apt-get install -y --no-install-recommends \
     gnupg2 \
     wget \
+    python3-dev \
     && rm -rf /var/lib/apt/lists/* \
     && apt-get clean
 
@@ -102,7 +103,7 @@ ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
 
 RUN pip3 install \
     neuronx-cc==2.13.66.0 \
-    torch-neuronx==1.13.1.1.14.0 \
+    torch-neuronx==2.1.2.2.1.0 \
     transformers-neuronx==0.10.0.21 \
     --extra-index-url=https://pip.repos.neuron.amazonaws.com
 

diff --git a/text-generation-inference/integration-tests/test_gpt2.py b/text-generation-inference/integration-tests/test_gpt2.py
@@ -76,7 +76,7 @@ async def test_model_single_request(tgi_client):
         seed=42,
         decoder_input_details=True,
     )
-    assert "The purpose of the current post is" in response.generated_text
+    assert "A lot of researchers are trying to explain what it is" in response.generated_text
 
 
 @pytest.mark.asyncio

diff --git a/text-generation-inference/tests/helpers.py b/text-generation-inference/tests/helpers.py
@@ -15,7 +15,7 @@ def create_request(
     top_k: int = 50,
     top_p: float = 0.9,
     temperature: float = 1.0,
-    seed: int = 0,
+    seed: int = 42,
     repetition_penalty: float = 1.0,
 ):
     parameters = NextTokenChooserParameters(

diff --git a/text-generation-inference/tests/test_gpt2.py b/text-generation-inference/tests/test_gpt2.py
@@ -45,8 +45,8 @@ def test_info(model_path):
         ],
         [
             "It was a bright cold day in April, and the clocks were striking thirteen.",
-            198,
-            "\n",
+            383,
+            " The",
             True,
         ],
     ],
@@ -69,7 +69,7 @@ def test_prefill(input_text, token_id, token_text, do_sample, batch_size, model_
         [
             "It was a bright cold day in April, and the clocks were striking thirteen.",
             20,
-            "\n\nAt 11:45 a.m. a small group of friends gathered outside the hotel to",
+            " The sun was set just three miles south of the city. There were no houses, just shops,",
             True,
         ],
     ],

diff --git a/text-generation-inference/tests/test_llama.py b/text-generation-inference/tests/test_llama.py
@@ -69,7 +69,7 @@ def test_prefill(input_text, token_id, token_text, do_sample, batch_size, model_
         [
             "It was a bright cold day in April, and the clocks were striking thirteen.",
             20,
-            " It was time to return to the village of Uckfield to face the fury of the evil",
+            " It was the start of the new spring, a time when nature woke from her winter slumber",
             True,
         ],
     ],
-Original file line number
+Diff line change
@@ Expand Up / @@ -45,8 +45,8 @@ def test_info(model_path): @@
             ],
             [
                 "It was a bright cold day in April, and the clocks were striking thirteen.",
-,
-                "\n",
+,
+                " The",
                 True,
             ],
         ],
@@ Expand All @@
             [
                 "It was a bright cold day in April, and the clocks were striking thirteen.",
 ,
-                "\n\nAt 11:45 a.m. a small group of friends gathered outside the hotel to",
+                " The sun was set just three miles south of the city. There were no houses, just shops,",
                 True,
             ],
         ],
@@ Expand Down @@