pytorch · lanluo-nvidia · Sep 19, 2025 · Sep 8, 2025 · Sep 9, 2025 · Sep 9, 2025
diff --git a/.github/scripts/filter-matrix.py b/.github/scripts/filter-matrix.py
@@ -8,7 +8,7 @@
 
 # currently we don't support python 3.13t due to tensorrt does not support 3.13t
 disabled_python_versions: List[str] = ["3.13t", "3.14", "3.14t"]
-disabled_cuda_versions: List[str] = ["cu130"]
+disabled_cuda_versions: List[str] = []
 
 # jetpack 6.2 only officially supports python 3.10 and cu126
 jetpack_python_versions: List[str] = ["3.10"]

diff --git a/MODULE.bazel b/MODULE.bazel
@@ -22,9 +22,6 @@ git_override(
     remote = "https://github.com/narendasan/rules_pkg",
 )
 
-local_repository = use_repo_rule("@bazel_tools//tools/build_defs/repo:local.bzl", "local_repository")
-
-
 new_local_repository = use_repo_rule("@bazel_tools//tools/build_defs/repo:local.bzl", "new_local_repository")
 
 # External dependency for torch_tensorrt if you already have precompiled binaries.
@@ -39,7 +36,7 @@ new_local_repository(
 new_local_repository(
     name = "cuda",
     build_file = "@//third_party/cuda:BUILD",
-    path = "/usr/local/cuda-12.9/",
+    path = "/usr/local/cuda-13.0/",
 )
 
 # for Jetson
@@ -68,7 +65,7 @@ http_archive(
     name = "libtorch",
     build_file = "@//third_party/libtorch:BUILD",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/nightly/cu129/libtorch-shared-with-deps-latest.zip"],
+    urls = ["https://download.pytorch.org/libtorch/nightly/cu130/libtorch-shared-with-deps-latest.zip"],
 )
 
 # in aarch64 platform you can get libtorch via either local or wheel file

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -2,9 +2,9 @@
 
 # Base image starts with CUDA
 #TODO: cuda version
-ARG BASE_IMG=nvidia/cuda:12.9.0-devel-ubuntu22.04
+ARG BASE_IMG=nvidia/cuda:13.0.0-devel-ubuntu22.04
 FROM ${BASE_IMG} as base
-ENV BASE_IMG=nvidia/cuda:12.9.0-devel-ubuntu22.04
+ENV BASE_IMG=nvidia/cuda:13.0.0-devel-ubuntu22.04
 
 ARG TENSORRT_VERSION
 ENV TENSORRT_VERSION=${TENSORRT_VERSION}

diff --git a/docker/dist-build.sh b/docker/dist-build.sh
@@ -4,7 +4,7 @@ set -x
 
 TOP_DIR=$(cd $(dirname $0); pwd)/..
 
-BUILD_CMD="python -m pip wheel .  --extra-index-url https://download.pytorch.org/whl/nightly/cu129 -w dist"
+BUILD_CMD="python -m pip wheel .  --extra-index-url https://download.pytorch.org/whl/nightly/cu130 -w dist"
 
 # TensorRT restricts our pip version
 cd ${TOP_DIR} \

diff --git a/examples/apps/flux_demo.py b/examples/apps/flux_demo.py
@@ -121,7 +121,7 @@ def forward_loop(mod):
 
     settings = {
         "strict": False,
-        "allow_complex_guards_as_runtime_asserts": True,
+        "prefer_deferred_runtime_asserts_over_guards": True,
         "enabled_precisions": enabled_precisions,
         "truncate_double": True,
         "min_block_size": 1,

diff --git a/examples/dynamo/torch_export_flux_dev.py b/examples/dynamo/torch_export_flux_dev.py
@@ -92,7 +92,7 @@
     kwargs=dummy_inputs,
     dynamic_shapes=dynamic_shapes,
     strict=False,
-    allow_complex_guards_as_runtime_asserts=True,
+    prefer_deferred_runtime_asserts_over_guards=True,
 )
 
 # %%

diff --git a/examples/dynamo/weight_streaming_example.py b/examples/dynamo/weight_streaming_example.py
@@ -65,7 +65,7 @@ def export_llm(model, inputs, min_seq_len=1, max_seq_len=16):
                 kwargs={"position_ids": position_ids},
                 dynamic_shapes=({1: seq_len}, {1: seq_len}),
                 strict=False,
-                allow_complex_guards_as_runtime_asserts=True,
+                prefer_deferred_runtime_asserts_over_guards=True,
             )
 
     return ep

diff --git a/py/requirements.txt b/py/requirements.txt
@@ -1,7 +1,7 @@
 numpy
 packaging
 pybind11==2.6.2
---extra-index-url https://download.pytorch.org/whl/nightly/cu129
+--extra-index-url https://download.pytorch.org/whl/nightly/cu130
 torch>=2.9.0.dev,<2.10.0
 --extra-index-url https://pypi.ngc.nvidia.com
 pyyaml

diff --git a/py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py b/py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py
@@ -68,7 +68,7 @@ def __init__(
         use_python_runtime: bool = _defaults.USE_PYTHON_RUNTIME,
         immutable_weights: bool = False,
         strict: bool = True,
-        allow_complex_guards_as_runtime_asserts: bool = False,
+        prefer_deferred_runtime_asserts_over_guards: bool = False,
         weight_streaming_budget: Optional[int] = None,
         enabled_precisions: Optional[Set[Union[torch.dtype, dtype]]] = None,
         **kwargs: Any,
@@ -134,8 +134,8 @@ def __init__(
         self.kwarg_inputs: dict[str, Any] = {}
         self.additional_settings = kwargs
         self.strict = strict
-        self.allow_complex_guards_as_runtime_asserts = (
-            allow_complex_guards_as_runtime_asserts
+        self.prefer_deferred_runtime_asserts_over_guards = (
+            prefer_deferred_runtime_asserts_over_guards
         )
         self.use_python_runtime = use_python_runtime
         self.trt_device = to_torch_tensorrt_device(device)
@@ -312,14 +312,14 @@ def refit_gm(self) -> None:
     def get_exported_program(self) -> torch.export.ExportedProgram:
 
         def export_fn() -> torch.export.ExportedProgram:
-            if self.allow_complex_guards_as_runtime_asserts:
+            if self.prefer_deferred_runtime_asserts_over_guards:
                 return _export(
                     self.original_model,
                     self.arg_inputs,
                     kwargs=self.kwarg_inputs,
                     dynamic_shapes=self._get_total_dynamic_shapes(),
                     strict=self.strict,
-                    allow_complex_guards_as_runtime_asserts=self.allow_complex_guards_as_runtime_asserts,
+                    prefer_deferred_runtime_asserts_over_guards=self.prefer_deferred_runtime_asserts_over_guards,
                 )
             else:
                 return torch.export.export(

diff --git a/pyproject.toml b/pyproject.toml
@@ -100,14 +100,19 @@ index-strategy = "unsafe-best-match"
 
 [tool.uv.sources]
 torch = [
-    { index = "pytorch-nightly-cu129", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" },
+    { index = "pytorch-nightly-cu130", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" },
     { index = "jetson-containers", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release" },
 ]
 torchvision = [
-    { index = "pytorch-nightly-cu129", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" },
+    { index = "pytorch-nightly-cu130", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" },
     { index = "jetson-containers", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release" },
 ]
 
+[[tool.uv.index]]
+name = "pytorch-nightly-cu130"
+url = "https://download.pytorch.org/whl/nightly/cu130"
+explicit = false
+
 [[tool.uv.index]]
 name = "pytorch-nightly-cu129"
 url = "https://download.pytorch.org/whl/nightly/cu129"

diff --git a/tests/py/dynamo/models/test_engine_cache.py b/tests/py/dynamo/models/test_engine_cache.py
@@ -856,7 +856,7 @@ def export_llm(model, inputs, min_seq_len=1, max_seq_len=16):
                         (inputs,),
                         dynamic_shapes=({1: seq_len},),
                         strict=False,
-                        allow_complex_guards_as_runtime_asserts=True,
+                        prefer_deferred_runtime_asserts_over_guards=True,
                     )
 
             return ep

diff --git a/tests/py/requirements.txt b/tests/py/requirements.txt
@@ -13,6 +13,6 @@ nvidia-modelopt[all]; python_version >'3.9' and python_version <'3.13'
 # flashinfer-python is not supported for python version 3.13 or higher
 # flashinfer-python is broken on python 3.9 at the moment, so skip it for now
 flashinfer-python; python_version >'3.9' and python_version <'3.13'
---extra-index-url https://download.pytorch.org/whl/nightly/cu129
+--extra-index-url https://download.pytorch.org/whl/nightly/cu130
 torchvision>=0.24.0.dev,<0.25.0
 timm>=1.0.3
diff --git a/tools/llm/test_llama_components.py b/tools/llm/test_llama_components.py
@@ -79,7 +79,7 @@ def test_llama_attention(args):
         args=(hidden_states, position_embeddings, None),
         dynamic_shapes=dynamic_shapes,
         strict=False,
-        allow_complex_guards_as_runtime_asserts=True,
+        prefer_deferred_runtime_asserts_over_guards=True,
     )
 
     with torch_tensorrt.logging.debug() if args.debug else nullcontext():
@@ -463,7 +463,7 @@ def test_llama_model(args):
         kwargs=kwarg_inputs,
         dynamic_shapes=dynamic_shapes,
         strict=False,
-        allow_complex_guards_as_runtime_asserts=True,
+        prefer_deferred_runtime_asserts_over_guards=True,
     )
 
     with torch_tensorrt.logging.debug() if args.debug else nullcontext():

diff --git a/tools/llm/utils.py b/tools/llm/utils.py
@@ -41,7 +41,7 @@ def export_llm(model, inputs, min_seq_len=1, max_seq_len=16):
                 kwargs={"position_ids": position_ids},
                 dynamic_shapes=({1: seq_len}, {1: seq_len}),
                 strict=False,
-                allow_complex_guards_as_runtime_asserts=True,
+                prefer_deferred_runtime_asserts_over_guards=True,
             )
 
     return ep

diff --git a/tools/perf/utils.py b/tools/perf/utils.py
@@ -228,7 +228,7 @@ def export_llm(model, inputs, min_seq_len=1, max_seq_len=16):
                 (inputs,),
                 dynamic_shapes=({1: seq_len},),
                 strict=False,
-                allow_complex_guards_as_runtime_asserts=True,
+                prefer_deferred_runtime_asserts_over_guards=True,
             )
 
     return ep