pytorch
diff --git a/‎.circleci/config.yml
Lines changed: 9 additions & 9 deletions b/‎.circleci/config.yml
Lines changed: 9 additions & 9 deletions
diff --git a/‎.github/workflows/docgen.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/docgen.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/docker_builder.yml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/docker_builder.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎WORKSPACE
Lines changed: 5 additions & 5 deletions b/‎WORKSPACE
Lines changed: 5 additions & 5 deletions
diff --git a/‎dev_dep_versions.yml
Lines changed: 1 addition & 1 deletion b/‎dev_dep_versions.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎docker/README.md
Lines changed: 2 additions & 2 deletions b/‎docker/README.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎docker/dist-build.sh
Lines changed: 2 additions & 2 deletions b/‎docker/dist-build.sh
Lines changed: 2 additions & 2 deletions
diff --git a/‎docsrc/index.rst
Lines changed: 1 addition & 0 deletions b/‎docsrc/index.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎docsrc/tutorials/images/majestic_castle.png
494 KB b/‎docsrc/tutorials/images/majestic_castle.png
494 KB
diff --git a/‎examples/dynamo/README.rst
Lines changed: 1 addition & 0 deletions b/‎examples/dynamo/README.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/dynamo/torch_compile_stable_diffusion.py
Lines changed: 55 additions & 0 deletions b/‎examples/dynamo/torch_compile_stable_diffusion.py
Lines changed: 55 additions & 0 deletions
diff --git a/‎py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py
Lines changed: 1 addition & 0 deletions b/‎py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎py/torch_tensorrt/dynamo/lowering/passes/constant_folding.py
Lines changed: 1 addition & 12 deletions b/‎py/torch_tensorrt/dynamo/lowering/passes/constant_folding.py
Lines changed: 1 addition & 12 deletions
@@ -109,7 +109,7 @@ commands:
             sudo docker run --rm --runtime=nvidia --gpus all nvidia/cuda:11.6.2-base-ubuntu20.04 nvidia-smi
 
   install-cudnn:
-    description: "Install CUDNN 8.8.1"
+    description: "Install CUDNN 8.9.5"
     parameters:
       os:
         type: string
@@ -119,7 +119,7 @@ commands:
         default: "x86_64"
       cudnn-version:
         type: string
-        default: "8.8.1.3"
+        default: "8.9.5.30"
       cuda-version:
         type: string
         default: "cuda12.0"
@@ -198,7 +198,7 @@ commands:
         default: "cuda12.0"
       cudnn-version:
         type: string
-        default: "8.8.1.3"
+        default: "8.9.5.30"
       trt-version-short:
         type: string
         default: "8.6.1"
@@ -246,7 +246,7 @@ commands:
           default: "8.6.1"
         cudnn-version-long:
           type: string
-          default: "8.8.1.3"
+          default: "8.9.5.30"
       steps:
         - run:
             name: Set up python environment
@@ -269,10 +269,10 @@ commands:
         default: "0.16.0.dev20230703+cu121"
       torch-build-index:
         type: string
-        default: "https://download.pytorch.org/whl/nightly/cu121"
+        default: "https://download.pytorch.org/whl/cu121"
       torchvision-build-index:
         type: string
-        default: "https://download.pytorch.org/whl/nightly/cu121"
+        default: "https://download.pytorch.org/whl/cu121"
     steps:
       - run:
           name: Install Torch
@@ -1457,10 +1457,10 @@ parameters:
     default: "0.16.0.dev20230703+cu121"
   torch-build-index:
     type: string
-    default: "https://download.pytorch.org/whl/nightly/cu121"
+    default: "https://download.pytorch.org/whl/cu121"
   cudnn-version:
     type: string
-    default: "8.8.1.3"
+    default: "8.9.5.30"
   trt-version-short:
     type: string
     default: "8.6.1"
@@ -1483,7 +1483,7 @@ parameters:
     default: "https://download.pytorch.org/whl/cu117"
   cudnn-version-legacy:
     type: string
-    default: "8.8.1.3"
+    default: "8.9.5.30"
   trt-version-short-legacy:
     type: string
     default: "8.6.1"
 
@@ -30,14 +30,14 @@ jobs:
       - name: Install base deps
         run: |
           python3 -m pip install pip --upgrade
-          python3 -m pip install pyyaml numpy torch --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu121
+          python3 -m pip install pyyaml numpy torch --pre --extra-index-url https://download.pytorch.org/whl/cu121
           ./packaging/pre_build_script.sh
       - name: Get HEAD SHA
         id: vars
         run: echo "sha=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
       - name: Build Python Package
         run: |
-          python3 -m pip install --pre . --extra-index-url https://download.pytorch.org/whl/nightly/cu121
+          python3 -m pip install --pre . --extra-index-url https://download.pytorch.org/whl/cu121
       - name: Generate New Docs
         run: |
           cd docsrc
 
@@ -6,6 +6,7 @@ on:
     branches:
       - main
       - nightly
+      - release/2.1
 
 # If pushes to main are made in rapid succession,
 # cancel existing docker builds and use newer commits
 
@@ -118,7 +118,7 @@ These are the following dependencies used to verify the testcases. Torch-TensorR
 - Bazel 6.2.1
 - Libtorch 2.1.0
 - CUDA 12.1
-- cuDNN 8.8.1
+- cuDNN 8.9.5
 - TensorRT 8.6.1
 
 ## Prebuilt Binaries and Wheel files
 
@@ -54,14 +54,14 @@ http_archive(
     name = "libtorch",
     build_file = "@//third_party/libtorch:BUILD",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/test/cu121/libtorch-cxx11-abi-shared-with-deps-latest.zip"],
+    urls = ["https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.1.0%2Bcu121.zip"],
 )
 
 http_archive(
     name = "libtorch_pre_cxx11_abi",
     build_file = "@//third_party/libtorch:BUILD",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/test/cu121/libtorch-shared-with-deps-latest.zip"],
+    urls = ["https://download.pytorch.org/libtorch/cu121/libtorch-shared-with-deps-2.1.0%2Bcu121.zip"],
 )
 
 # Download these tarballs manually from the NVIDIA website
@@ -71,10 +71,10 @@ http_archive(
 http_archive(
     name = "cudnn",
     build_file = "@//third_party/cudnn/archive:BUILD",
-    sha256 = "79d77a769c7e7175abc7b5c2ed5c494148c0618a864138722c887f95c623777c",
-    strip_prefix = "cudnn-linux-x86_64-8.8.1.3_cuda12-archive",
+    sha256 = "2a2eb89a2ab51071151c6082f1e816c702167a711a9372f9f73a7b5c4b06e01a",
+    strip_prefix = "cudnn-linux-x86_64-8.9.5.30_cuda12-archive",
     urls = [
-        "https://developer.nvidia.com/downloads/compute/cudnn/secure/8.8.1/local_installers/12.0/cudnn-linux-x86_64-8.8.1.3_cuda12-archive.tar.xz",
+        "https://developer.nvidia.com/downloads/compute/cudnn/secure/8.9.5/local_installers/12.x/cudnn-linux-x86_64-8.9.5.30_cuda12-archive.tar.xz",
     ],
 )
 
 
@@ -1,3 +1,3 @@
 __cuda_version__: "12.1"
-__cudnn_version__: "8.8"
+__cudnn_version__: "8.9"
 __tensorrt_version__: "8.6"
@@ -17,14 +17,14 @@ Note: By default the container uses the `pre-cxx11-abi` version of Torch + Torch
 
 ### Instructions
 
-- The example below uses CUDNN 8.8 and TensorRT 8.6
+- The example below uses CUDNN 8.9 and TensorRT 8.6
 - See <a href="https://github.com/pytorch/TensorRT#dependencies">dependencies</a> for a list of current default dependencies.
 
 > From root of Torch-TensorRT repo
 
 Build:
 ```
-DOCKER_BUILDKIT=1 docker build --build-arg TENSORRT_VERSION=8.6 --build-arg CUDNN_VERSION=8.8 -f docker/Dockerfile -t torch_tensorrt:latest .
+DOCKER_BUILDKIT=1 docker build --build-arg TENSORRT_VERSION=8.6 --build-arg CUDNN_VERSION=8.9 -f docker/Dockerfile -t torch_tensorrt:latest .
 ```
 
 Run:
 
@@ -3,9 +3,9 @@
 TOP_DIR=$(cd $(dirname $0); pwd)/..
 
 if [[ -z "${USE_CXX11}" ]]; then
-    BUILD_CMD="python -m pip wheel .  --extra-index-url https://download.pytorch.org/whl/nightly/cu121 -w dist"
+    BUILD_CMD="python -m pip wheel .  --extra-index-url https://download.pytorch.org/whl/cu121 -w dist"
 else
-    BUILD_CMD="python -m pip wheel . --config-setting="--build-option=--use-cxx11-abi" --extra-index-url https://download.pytorch.org/whl/nightly/cu121 -w dist"
+    BUILD_CMD="python -m pip wheel . --config-setting="--build-option=--use-cxx11-abi" --extra-index-url https://download.pytorch.org/whl/cu121 -w dist"
 fi
 
 # TensorRT restricts our pip version
 
@@ -81,6 +81,7 @@ Tutorials
    tutorials/_rendered_examples/dynamo/torch_compile_resnet_example
    tutorials/_rendered_examples/dynamo/torch_compile_transformers_example
    tutorials/_rendered_examples/dynamo/torch_compile_advanced_usage
+   tutorials/_rendered_examples/dynamo/torch_compile_stable_diffusion
 
 Python API Documenation
 ------------------------
 
@@ -9,3 +9,4 @@ a number of ways you can leverage this backend to accelerate inference.
 * :ref:`torch_compile_resnet`: Compiling a ResNet model using the Torch Compile Frontend for ``torch_tensorrt.compile``
 * :ref:`torch_compile_transformer`: Compiling a Transformer model using ``torch.compile``
 * :ref:`torch_compile_advanced_usage`: Advanced usage including making a custom backend to use directly with the ``torch.compile`` API
+* :ref:`torch_compile_stable_diffusion`: Compiling a Stable Diffusion model using ``torch.compile``
@@ -0,0 +1,55 @@
+"""
+.. _torch_compile_stable_diffusion:
+
+Torch Compile Stable Diffusion
+======================================================
+
+This interactive script is intended as a sample of the Torch-TensorRT workflow with `torch.compile` on a Stable Diffusion model. A sample output is featured below:
+
+.. image:: /tutorials/images/majestic_castle.png
+   :width: 512px
+   :height: 512px
+   :scale: 50 %
+   :align: right
+"""
+
+# %%
+# Imports and Model Definition
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+import torch
+from diffusers import DiffusionPipeline
+
+import torch_tensorrt
+
+model_id = "CompVis/stable-diffusion-v1-4"
+device = "cuda:0"
+
+# Instantiate Stable Diffusion Pipeline with FP16 weights
+pipe = DiffusionPipeline.from_pretrained(
+    model_id, revision="fp16", torch_dtype=torch.float16
+)
+pipe = pipe.to(device)
+
+backend = "torch_tensorrt"
+
+# Optimize the UNet portion with Torch-TensorRT
+pipe.unet = torch.compile(
+    pipe.unet,
+    backend=backend,
+    options={
+        "truncate_long_and_double": True,
+        "precision": torch.float16,
+    },
+    dynamic=False,
+)
+
+# %%
+# Inference
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+prompt = "a majestic castle in the clouds"
+image = pipe(prompt).images[0]
+
+image.save("images/majestic_castle.png")
+image.show()
@@ -153,6 +153,7 @@
     aten.transpose.int,
     aten.tril.default,
     aten.triu.default,
+    aten.unbind,
     aten.unfold,
     aten.unfold_backward,
     aten.unfold_copy,
 
@@ -2,22 +2,11 @@
 from typing import Any, Sequence
 
 import torch
-from torch_tensorrt._utils import sanitized_torch_version
+from torch._inductor.freezing import ConstantFolder, replace_node_with_constant
 from torch_tensorrt.dynamo.lowering.passes.pass_utils import (
     clean_up_graph_after_modifications,
 )
 
-from packaging import version
-
-# Modify import location of utilities based on Torch version
-if version.parse(sanitized_torch_version()) < version.parse("2.1.1"):
-    from torch._inductor.freezing import ConstantFolder, replace_node_with_constant
-else:
-    from torch._inductor.constant_folding import (
-        ConstantFolder,
-        replace_node_with_constant,
-    )
-
 logger = logging.getLogger(__name__)