Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/scripts/filter-matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

# currently we don't support python 3.13t due to tensorrt does not support 3.13t
disabled_python_versions: List[str] = ["3.13t", "3.14", "3.14t"]
disabled_cuda_versions: List[str] = ["cu130"]
disabled_cuda_versions: List[str] = []

# jetpack 6.2 only officially supports python 3.10 and cu126
jetpack_python_versions: List[str] = ["3.10"]
Expand Down
7 changes: 2 additions & 5 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@ git_override(
remote = "https://github.com/narendasan/rules_pkg",
)

local_repository = use_repo_rule("@bazel_tools//tools/build_defs/repo:local.bzl", "local_repository")


new_local_repository = use_repo_rule("@bazel_tools//tools/build_defs/repo:local.bzl", "new_local_repository")

# External dependency for torch_tensorrt if you already have precompiled binaries.
Expand All @@ -39,7 +36,7 @@ new_local_repository(
new_local_repository(
name = "cuda",
build_file = "@//third_party/cuda:BUILD",
path = "/usr/local/cuda-12.9/",
path = "/usr/local/cuda-13.0/",
)

# for Jetson
Expand Down Expand Up @@ -68,7 +65,7 @@ http_archive(
name = "libtorch",
build_file = "@//third_party/libtorch:BUILD",
strip_prefix = "libtorch",
urls = ["https://download.pytorch.org/libtorch/nightly/cu129/libtorch-shared-with-deps-latest.zip"],
urls = ["https://download.pytorch.org/libtorch/nightly/cu130/libtorch-shared-with-deps-latest.zip"],
)

# in aarch64 platform you can get libtorch via either local or wheel file
Expand Down
4 changes: 2 additions & 2 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

# Base image starts with CUDA
#TODO: cuda version
ARG BASE_IMG=nvidia/cuda:12.9.0-devel-ubuntu22.04
ARG BASE_IMG=nvidia/cuda:13.0.0-devel-ubuntu22.04
FROM ${BASE_IMG} as base
ENV BASE_IMG=nvidia/cuda:12.9.0-devel-ubuntu22.04
ENV BASE_IMG=nvidia/cuda:13.0.0-devel-ubuntu22.04

ARG TENSORRT_VERSION
ENV TENSORRT_VERSION=${TENSORRT_VERSION}
Expand Down
2 changes: 1 addition & 1 deletion docker/dist-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set -x

TOP_DIR=$(cd $(dirname $0); pwd)/..

BUILD_CMD="python -m pip wheel . --extra-index-url https://download.pytorch.org/whl/nightly/cu129 -w dist"
BUILD_CMD="python -m pip wheel . --extra-index-url https://download.pytorch.org/whl/nightly/cu130 -w dist"

# TensorRT restricts our pip version
cd ${TOP_DIR} \
Expand Down
2 changes: 1 addition & 1 deletion examples/apps/flux_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def forward_loop(mod):

settings = {
"strict": False,
"allow_complex_guards_as_runtime_asserts": True,
"prefer_deferred_runtime_asserts_over_guards": True,
"enabled_precisions": enabled_precisions,
"truncate_double": True,
"min_block_size": 1,
Expand Down
2 changes: 1 addition & 1 deletion examples/dynamo/torch_export_flux_dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
kwargs=dummy_inputs,
dynamic_shapes=dynamic_shapes,
strict=False,
allow_complex_guards_as_runtime_asserts=True,
prefer_deferred_runtime_asserts_over_guards=True,
)

# %%
Expand Down
2 changes: 1 addition & 1 deletion examples/dynamo/weight_streaming_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def export_llm(model, inputs, min_seq_len=1, max_seq_len=16):
kwargs={"position_ids": position_ids},
dynamic_shapes=({1: seq_len}, {1: seq_len}),
strict=False,
allow_complex_guards_as_runtime_asserts=True,
prefer_deferred_runtime_asserts_over_guards=True,
)

return ep
Expand Down
2 changes: 1 addition & 1 deletion py/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
numpy
packaging
pybind11==2.6.2
--extra-index-url https://download.pytorch.org/whl/nightly/cu129
--extra-index-url https://download.pytorch.org/whl/nightly/cu130
torch>=2.9.0.dev,<2.10.0
--extra-index-url https://pypi.ngc.nvidia.com
pyyaml
Expand Down
10 changes: 5 additions & 5 deletions py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def __init__(
use_python_runtime: bool = _defaults.USE_PYTHON_RUNTIME,
immutable_weights: bool = False,
strict: bool = True,
allow_complex_guards_as_runtime_asserts: bool = False,
prefer_deferred_runtime_asserts_over_guards: bool = False,
weight_streaming_budget: Optional[int] = None,
enabled_precisions: Optional[Set[Union[torch.dtype, dtype]]] = None,
**kwargs: Any,
Expand Down Expand Up @@ -134,8 +134,8 @@ def __init__(
self.kwarg_inputs: dict[str, Any] = {}
self.additional_settings = kwargs
self.strict = strict
self.allow_complex_guards_as_runtime_asserts = (
allow_complex_guards_as_runtime_asserts
self.prefer_deferred_runtime_asserts_over_guards = (
prefer_deferred_runtime_asserts_over_guards
)
self.use_python_runtime = use_python_runtime
self.trt_device = to_torch_tensorrt_device(device)
Expand Down Expand Up @@ -312,14 +312,14 @@ def refit_gm(self) -> None:
def get_exported_program(self) -> torch.export.ExportedProgram:

def export_fn() -> torch.export.ExportedProgram:
if self.allow_complex_guards_as_runtime_asserts:
if self.prefer_deferred_runtime_asserts_over_guards:
return _export(
self.original_model,
self.arg_inputs,
kwargs=self.kwarg_inputs,
dynamic_shapes=self._get_total_dynamic_shapes(),
strict=self.strict,
allow_complex_guards_as_runtime_asserts=self.allow_complex_guards_as_runtime_asserts,
prefer_deferred_runtime_asserts_over_guards=self.prefer_deferred_runtime_asserts_over_guards,
)
else:
return torch.export.export(
Expand Down
9 changes: 7 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,19 @@ index-strategy = "unsafe-best-match"

[tool.uv.sources]
torch = [
{ index = "pytorch-nightly-cu129", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" },
{ index = "pytorch-nightly-cu130", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" },
{ index = "jetson-containers", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release" },
]
torchvision = [
{ index = "pytorch-nightly-cu129", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" },
{ index = "pytorch-nightly-cu130", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" },
{ index = "jetson-containers", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release" },
]

[[tool.uv.index]]
name = "pytorch-nightly-cu130"
url = "https://download.pytorch.org/whl/nightly/cu130"
explicit = false

[[tool.uv.index]]
name = "pytorch-nightly-cu129"
url = "https://download.pytorch.org/whl/nightly/cu129"
Expand Down
2 changes: 1 addition & 1 deletion tests/py/dynamo/models/test_engine_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -856,7 +856,7 @@ def export_llm(model, inputs, min_seq_len=1, max_seq_len=16):
(inputs,),
dynamic_shapes=({1: seq_len},),
strict=False,
allow_complex_guards_as_runtime_asserts=True,
prefer_deferred_runtime_asserts_over_guards=True,
)

return ep
Expand Down
2 changes: 1 addition & 1 deletion tests/py/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ nvidia-modelopt[all]; python_version >'3.9' and python_version <'3.13'
# flashinfer-python is not supported for python version 3.13 or higher
# flashinfer-python is broken on python 3.9 at the moment, so skip it for now
flashinfer-python; python_version >'3.9' and python_version <'3.13'
--extra-index-url https://download.pytorch.org/whl/nightly/cu129
--extra-index-url https://download.pytorch.org/whl/nightly/cu130
torchvision>=0.24.0.dev,<0.25.0
timm>=1.0.3
4 changes: 2 additions & 2 deletions tools/llm/test_llama_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_llama_attention(args):
args=(hidden_states, position_embeddings, None),
dynamic_shapes=dynamic_shapes,
strict=False,
allow_complex_guards_as_runtime_asserts=True,
prefer_deferred_runtime_asserts_over_guards=True,
)

with torch_tensorrt.logging.debug() if args.debug else nullcontext():
Expand Down Expand Up @@ -463,7 +463,7 @@ def test_llama_model(args):
kwargs=kwarg_inputs,
dynamic_shapes=dynamic_shapes,
strict=False,
allow_complex_guards_as_runtime_asserts=True,
prefer_deferred_runtime_asserts_over_guards=True,
)

with torch_tensorrt.logging.debug() if args.debug else nullcontext():
Expand Down
2 changes: 1 addition & 1 deletion tools/llm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def export_llm(model, inputs, min_seq_len=1, max_seq_len=16):
kwargs={"position_ids": position_ids},
dynamic_shapes=({1: seq_len}, {1: seq_len}),
strict=False,
allow_complex_guards_as_runtime_asserts=True,
prefer_deferred_runtime_asserts_over_guards=True,
)

return ep
Expand Down
2 changes: 1 addition & 1 deletion tools/perf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def export_llm(model, inputs, min_seq_len=1, max_seq_len=16):
(inputs,),
dynamic_shapes=({1: seq_len},),
strict=False,
allow_complex_guards_as_runtime_asserts=True,
prefer_deferred_runtime_asserts_over_guards=True,
)

return ep
Expand Down
Loading