refactor: Enable require_full_compilation in Dynamo

gs-olive · gs-olive · commit 220ee6afcbe0 · 2023-07-25T18:52:09.000-07:00
diff --git a/py/torch_tensorrt/dynamo/_defaults.py b/py/torch_tensorrt/dynamo/_defaults.py
@@ -10,3 +10,4 @@
 OPTIMIZATION_LEVEL = None
 USE_PYTHON_RUNTIME = None
 TRUNCATE_LONG_AND_DOUBLE = False
+REQUIRE_FULL_COMPILATION = False
diff --git a/py/torch_tensorrt/dynamo/_settings.py b/py/torch_tensorrt/dynamo/_settings.py
@@ -12,6 +12,7 @@
     OPTIMIZATION_LEVEL,
     USE_PYTHON_RUNTIME,
     TRUNCATE_LONG_AND_DOUBLE,
+    REQUIRE_FULL_COMPILATION,
 )
 
 
@@ -28,3 +29,4 @@ class CompilationSettings:
     optimization_level: Optional[int] = OPTIMIZATION_LEVEL
     use_python_runtime: Optional[bool] = USE_PYTHON_RUNTIME
     truncate_long_and_double: bool = TRUNCATE_LONG_AND_DOUBLE
+    require_full_compilation: bool = REQUIRE_FULL_COMPILATION
diff --git a/py/torch_tensorrt/dynamo/backend/backends.py b/py/torch_tensorrt/dynamo/backend/backends.py
@@ -123,6 +123,7 @@ def _compile_module(
         verbose=settings.debug,
         min_block_size=settings.min_block_size,
         torch_executed_ops=settings.torch_executed_ops,
+        require_full_compilation=settings.require_full_compilation,
     )
 
     # Store TRT replicas of Torch subgraphs
diff --git a/py/torch_tensorrt/dynamo/compile.py b/py/torch_tensorrt/dynamo/compile.py
@@ -31,6 +31,7 @@
     OPTIMIZATION_LEVEL,
     USE_PYTHON_RUNTIME,
     TRUNCATE_LONG_AND_DOUBLE,
+    REQUIRE_FULL_COMPILATION,
 )
 
 
@@ -55,7 +56,7 @@ def compile(
     dla_global_dram_size=536870912,
     calibrator=None,
     truncate_long_and_double=TRUNCATE_LONG_AND_DOUBLE,
-    require_full_compilation=False,
+    require_full_compilation=REQUIRE_FULL_COMPILATION,
     min_block_size=MIN_BLOCK_SIZE,
     torch_executed_ops=[],
     torch_executed_modules=[],
@@ -73,7 +74,8 @@ def compile(
         "The Dynamo backend is an experimental feature, for which only the "
         + "following arguments are supported: "
         + "{enabled_precisions, debug, workspace_size, min_block_size, "
-        + "torch_executed_ops, pass_through_build_failures}"
+        + "max_aux_streams, version_compatible, optimization_level, "
+        + "torch_executed_ops, pass_through_build_failures, require_full_compilation}"
     )
 
     if not isinstance(inputs, collections.abc.Sequence):
@@ -111,6 +113,7 @@ def compile(
         "optimization_level": optimization_level,
         "use_python_runtime": use_python_runtime,
         "truncate_long_and_double": truncate_long_and_double,
+        "require_full_compilation": require_full_compilation,
     }
 
     settings = CompilationSettings(**compilation_options)
diff --git a/py/torch_tensorrt/dynamo/lowering/_partition.py b/py/torch_tensorrt/dynamo/lowering/_partition.py
@@ -4,7 +4,7 @@
 import torch
 
 from torch_tensorrt.dynamo.lowering import SUBSTITUTION_REGISTRY
-from torch_tensorrt.dynamo._defaults import MIN_BLOCK_SIZE
+from torch_tensorrt.dynamo._defaults import MIN_BLOCK_SIZE, REQUIRE_FULL_COMPILATION
 from torch.fx.passes.infra.partitioner import CapabilityBasedPartitioner, Partition
 from torch.fx.graph_module import GraphModule
 from torch.fx.node import _get_qualified_name
@@ -45,6 +45,7 @@ def __init__(
             Sequence[str]
         ] = DEFAULT_SINGLE_NODE_PARTITIONS,
         min_block_size=MIN_BLOCK_SIZE,
+        require_full_compilation=REQUIRE_FULL_COMPILATION,
     ) -> None:
         super().__init__(
             graph_module,
@@ -55,6 +56,7 @@ def __init__(
         )
 
         self.min_block_size = min_block_size
+        self.require_full_compilation = require_full_compilation
 
     def propose_partitions(self) -> List[Partition]:
         # Propose partitions using the default, then refine the results
@@ -66,6 +68,11 @@ def propose_partitions(self) -> List[Partition]:
             self.operator_support, "unsupported_operators", True
         )
 
+        if not full_support and self.require_full_compilation:
+            raise AssertionError(
+                "require_full_compilation=True was specified, but model is not fully supported"
+            )
+
         # For each partition, determine whether or not the number of computational operators
         # exceeds the threshold, and if not, remove that partition
         partitions_to_remove = {}
@@ -93,7 +100,7 @@ def propose_partitions(self) -> List[Partition]:
             if (
                 compute_node_count < self.min_block_size
                 and not exempted_partition
-                and not full_support
+                and not (full_support and self.require_full_compilation)
             ):
                 partitions_to_remove[id] = compute_node_count
 
@@ -178,6 +185,7 @@ def partition(
     verbose: bool = True,
     min_block_size: int = MIN_BLOCK_SIZE,
     torch_executed_ops: Sequence[str] = set(),
+    require_full_compilation: bool = REQUIRE_FULL_COMPILATION,
 ) -> torch.fx.GraphModule:
     """Partition an FX GraphModule with aten ops into TRT engines
     Partitioning is based on converter operator support
@@ -187,11 +195,17 @@ def partition(
         verbose: Bool representing whether to print operator support
         min_block_size: Minimum number of operators per TRT-Engine Block
         torch_executed_ops: Sequence of operations to run in Torch, regardless of converter coverage
+        require_full_compilation: Whether to require that all operators be run in TRT
     Returns:
         torch.fx.GraphModule
     """
     supported_ops = TorchTensorRTOperatorSupport(torch_executed_ops=torch_executed_ops)
-    partitioner = TRTPartitioner(gm, supported_ops, min_block_size=min_block_size)
+    partitioner = TRTPartitioner(
+        gm,
+        supported_ops,
+        min_block_size=min_block_size,
+        require_full_compilation=require_full_compilation,
+    )
 
     # Determine partitions based on user specifications and operator support
     # Then, fuse partitions and display overview of supported/unsupported operators
diff --git a/tests/py/dynamo/backend/test_partitioning.py b/tests/py/dynamo/backend/test_partitioning.py
@@ -7,6 +7,38 @@
 
 
 class TestPartitioning(TestCase):
+    def test_partition_fully_supported_one_op(self):
+        class FullySupportedOneOp(torch.nn.Module):
+            def __init__(self, *args, **kwargs) -> None:
+                super().__init__(*args, **kwargs)
+
+            def forward(self, x, y):
+                return torch.ops.aten.add.Tensor(x, y)
+
+        fx_graph = torch.fx.symbolic_trace(FullySupportedOneOp())
+        partitioned_graph = partition(deepcopy(fx_graph))
+        self.assertEquals(
+            len(list(partitioned_graph.named_children())),
+            0,
+            "Single operators should not be segmented",
+        )
+
+    def test_partition_fully_supported_one_op_require_full_compilation(self):
+        class FullySupportedOneOp(torch.nn.Module):
+            def __init__(self, *args, **kwargs) -> None:
+                super().__init__(*args, **kwargs)
+
+            def forward(self, x, y):
+                return torch.ops.aten.add.Tensor(x, y)
+
+        fx_graph = torch.fx.symbolic_trace(FullySupportedOneOp())
+        partitioned_graph = partition(deepcopy(fx_graph), require_full_compilation=True)
+        self.assertEquals(
+            len(list(partitioned_graph.named_children())),
+            1,
+            "Single operators can be segmented if full compilation is required",
+        )
+
     def test_partition_fully_supported_multi_op(self):
         class FullySupportedMultiOp(torch.nn.Module):
             def __init__(self, *args, **kwargs) -> None:

Original file line number	Diff line number	Diff line change
`@@ -123,6 +123,7 @@ def _compile_module(`
`123`	`123`	`verbose=settings.debug,`
`124`	`124`	`min_block_size=settings.min_block_size,`
`125`	`125`	`torch_executed_ops=settings.torch_executed_ops,`
	`126`	`+ require_full_compilation=settings.require_full_compilation,`
`126`	`127`	`)`
`127`	`128`
`128`	`129`	`# Store TRT replicas of Torch subgraphs`