resolve comments

pytorch · lanluo-nvidia · Oct 6, 2024 · Oct 6, 2024 · Oct 6, 2024 · Oct 6, 2024
commit 8fc89dc31e15c6feca2a2fe5746b0fa409c92553
diff --git a/py/torch_tensorrt/_compile.py b/py/torch_tensorrt/_compile.py
@@ -12,7 +12,6 @@
 from torch_tensorrt._features import ENABLED_FEATURES
 from torch_tensorrt._Input import Input
 from torch_tensorrt.dynamo import _defaults
-from torch_tensorrt.dynamo._exporter import replace_execute_engine_no_op_node
 from torch_tensorrt.fx import InputTensorSpec
 from torch_tensorrt.fx.lower import compile as fx_compile
 from torch_tensorrt.fx.utils import LowerPrecision
@@ -34,6 +33,9 @@
     from torch_tensorrt.dynamo._compiler import (
         cross_compile_for_windows as dynamo_cross_compile_for_windows,
     )
+    from torch_tensorrt.dynamo._compiler import (
+        load_cross_compiled_exported_program as dynamo_load_cross_compiled_exported_program,
+    )
     from torch_tensorrt.dynamo._compiler import (
         save_cross_compiled_exported_program as dynamo_save_cross_compiled_exported_program,
     )
@@ -530,28 +532,7 @@ def load_cross_compiled_exported_program(file_path: str = "") -> Any:
     Raises:
         ValueError: If the api is not called in windows or there is no file or the file is a valid ExportedProgram file
     """
-    if not file_path:
-        raise ValueError("File path cannot be empty. Please provide a valid file path")
-
-    if platform.system() != "Windows" or platform.machine() != "AMD64":
-        raise ValueError(
-            "cross runtime compiled model for windows can only be loaded in Windows system"
-        )
-
-    try:
-        logger.debug(f"Loading the provided file {file_path} using torch.export.load()")
-        # TODO: think about how to handle the torch.jit.load route?
-        exp_program = torch.export.load(file_path)
-    except Exception as e:
-        logger.info(
-            f"Loading the provided file {file_path} via torch.export.load() failed with the following error: {e}",
-            exc_info=True,
-        )
-        raise ValueError(
-            f"cross_load the file {file_path} doesn't correspond to a valid ExportedProgram. Please verify the file path."
-        )
-
-    return replace_execute_engine_no_op_node(exp_program)
+    return dynamo_load_cross_compiled_exported_program(file_path)
 
 
 def load(file_path: str = "") -> Any:

diff --git a/py/torch_tensorrt/dynamo/_compiler.py b/py/torch_tensorrt/dynamo/_compiler.py
@@ -20,6 +20,7 @@
     parse_non_trt_nodes,
 )
 from torch_tensorrt.dynamo._engine_cache import BaseEngineCache, DiskEngineCache
+from torch_tensorrt.dynamo._exporter import replace_execute_engine_no_op_node
 from torch_tensorrt.dynamo.conversion import (
     CompilationSettings,
     UnsupportedOperatorException,
@@ -83,8 +84,6 @@ def cross_compile_for_windows(
     dryrun: bool = _defaults.DRYRUN,
     hardware_compatible: bool = _defaults.HARDWARE_COMPATIBLE,
     timing_cache_path: str = _defaults.TIMING_CACHE_PATH,
-    engine_cache_dir: str = _defaults.ENGINE_CACHE_DIR,
-    engine_cache_size: int = _defaults.ENGINE_CACHE_SIZE,
     use_explicit_typing: bool = _defaults.USE_EXPLICIT_TYPING,
     use_fp32_acc: bool = _defaults.USE_FP32_ACC,
     enable_weight_streaming: bool = _defaults.ENABLE_WEIGHT_STREAMING,
@@ -993,3 +992,37 @@ def save_cross_compiled_exported_program(
     exp_program = export(gm, cross_compile_flag=True)
     torch.export.save(exp_program, file_path)
     logger.debug(f"successfully saved the module for windows at {file_path}")
+
+
+def load_cross_compiled_exported_program(file_path: str = "") -> Any:
+    """
+    Load an ExportedProgram file in Windows which was previously cross compiled in Linux
+
+    Arguments:
+        file_path (str): Path to file on the disk
+
+    Raises:
+        ValueError: If the api is not called in windows or there is no file or the file is a valid ExportedProgram file
+    """
+    if not file_path:
+        raise ValueError("File path cannot be empty. Please provide a valid file path")
+
+    if platform.system() != "Windows" or platform.machine() != "AMD64":
+        raise ValueError(
+            "cross runtime compiled model for windows can only be loaded in Windows system"
+        )
+
+    try:
+        logger.debug(f"Loading the provided file {file_path} using torch.export.load()")
+        # TODO: think about how to handle the torch.jit.load route?
+        exp_program = torch.export.load(file_path)
+    except Exception as e:
+        logger.info(
+            f"Loading the provided file {file_path} via torch.export.load() failed with the following error: {e}",
+            exc_info=True,
+        )
+        raise ValueError(
+            f"cross_load the file {file_path} doesn't correspond to a valid ExportedProgram. Please verify the file path."
+        )
+
+    return replace_execute_engine_no_op_node(exp_program)
diff --git a/py/torch_tensorrt/dynamo/_exporter.py b/py/torch_tensorrt/dynamo/_exporter.py
@@ -1,7 +1,7 @@
 import base64
 import copy
 import operator
-from typing import Any, Dict, Sequence, Tuple, cast, Optional
+from typing import Any, Dict, Optional, Sequence, Tuple, cast
 
 import torch
 from torch._guards import detect_fake_mode
@@ -438,7 +438,11 @@ def inline_trt_modules_for_windows(gm: torch.fx.GraphModule) -> torch.fx.GraphMo
         trt_module_node = trt_module_node[0]
         assert trt_module_node.args
 
-        num_outputs = len(trt_module.output_shapes)
+        if "val" not in trt_module_node.meta:
+            raise ValueError(
+                f"trt_module_node: {trt_module_node.name} does not have the metadata which should be set during dynamo compile_module step."
+            )
+        num_outputs = len(trt_module_node.meta["val"])
         # Insert a call_function node to perform inference on TRT engine
         with gm.graph.inserting_before(trt_module_node):
             engine_info = trt_module._pack_engine_info()
@@ -449,20 +453,8 @@ def inline_trt_modules_for_windows(gm: torch.fx.GraphModule) -> torch.fx.GraphMo
                 torch.ops.tensorrt.no_op_placeholder_for_execute_engine.default,
                 (trt_module_node.args, *engine_info),
             )
-
-            trt_node.meta["val"] = []
             assert num_outputs > 0
-            # Generate meta data for TRT node (a FakeTensor with corresponding output shape)
-            for idx in range(num_outputs):
-                trt_node.meta["val"].append(
-                    cast(
-                        FakeTensor,
-                        torch.empty_strided(
-                            tuple(trt_module.output_shapes[idx]),
-                            tuple([1] * len(trt_module.output_shapes[idx])),
-                        ),
-                    )
-                )
+            trt_node.meta["val"] = trt_module_node.meta["val"]
 
         if num_outputs == 1:
             # Insert getitem nodes as outputs (for export serialization to work)

diff --git a/tests/py/dynamo/runtime/test_003_cross_compile_for_windows.py b/tests/py/dynamo/runtime/test_003_cross_compile_for_windows.py
@@ -26,9 +26,13 @@ def forward(self, a, b):
         model = Add().eval().cuda()
         inputs = [torch.randn(2, 3).cuda(), torch.randn(2, 3).cuda()]
         trt_ep_path = os.path.join(tempfile.gettempdir(), "trt.ep")
+        compile_spec = {
+            "inputs": inputs,
+            "min_block_size": 1,
+        }
         try:
             torch_tensorrt.cross_compile_for_windows(
-                model, file_path=trt_ep_path, inputs=inputs
+                model, file_path=trt_ep_path, **compile_spec
             )
         except Exception as e:
             pytest.fail(f"unexpected exception raised: {e}")
@@ -47,9 +51,13 @@ def forward(self, a, b):
         inputs = (torch.randn(2, 3).cuda(), torch.randn(2, 3).cuda())
         trt_ep_path = os.path.join(tempfile.gettempdir(), "trt.ep")
         exp_program = torch.export.export(model, inputs)
+        compile_spec = {
+            "inputs": inputs,
+            "min_block_size": 1,
+        }
         try:
             trt_gm = torch_tensorrt.dynamo.cross_compile_for_windows(
-                exp_program, inputs=inputs
+                exp_program, **compile_spec
             )
             torch_tensorrt.dynamo.save_cross_compiled_exported_program(
                 trt_gm, file_path=trt_ep_path