feat: Add ATen lowering pass system

gs-olive · gs-olive · commit fa320e2586c5 · 2023-09-12T12:54:02.000-07:00
- Add documentation, testing, and lowering pass management systems for
ATen lowering passes
diff --git a/docsrc/index.rst b/docsrc/index.rst
@@ -73,6 +73,7 @@ Tutorials
    tutorials/_rendered_examples/dynamo/torch_compile_resnet_example
    tutorials/_rendered_examples/dynamo/torch_compile_transformers_example
    tutorials/_rendered_examples/dynamo/torch_compile_advanced_usage
+   tutorials/_rendered_examples/dynamo/dynamo_aten_lowering_passes
 
 Python API Documenation
 ------------------------
diff --git a/examples/dynamo/README.rst b/examples/dynamo/README.rst
@@ -9,3 +9,4 @@ a number of ways you can leverage this backend to accelerate inference.
 * :ref:`torch_compile_resnet`: Compiling a ResNet model using the Torch Compile Frontend for ``torch_tensorrt.compile``
 * :ref:`torch_compile_transformer`: Compiling a Transformer model using ``torch.compile``
 * :ref:`torch_compile_advanced_usage`: Advanced usage including making a custom backend to use directly with the ``torch.compile`` API
+:ref:`dynamo_aten_lowering_passes`: Custom modifications of a graph of ATen operators via lowering passes
diff --git a/examples/dynamo/dynamo_aten_lowering_passes.py b/examples/dynamo/dynamo_aten_lowering_passes.py
@@ -0,0 +1,97 @@
+"""
+.. _dynamo_aten_lowering_passes:
+
+Dynamo ATen Lowering Passes
+======================================================
+
+This interactive script is intended as an overview of the process by which ATen lowering passes are written and used."""
+
+# %%
+# 1. Lowering Pass Function
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+# An ATen lowering pass function in Torch-TRT must satisfy two requirements:
+# - The function must take as input a single `torch.fx.GraphModule` and return the lowered
+# `torch.fx.GraphModule`
+# - The function must leave the graph in a valid and invoke-able state, including performing any
+# necessary linting and recompilation
+#
+# See below for an example of a lowering pass which repairs graphs that have inputs which are
+# also outputs, a disallowed configuration for TRT Engines.
+
+# %%
+import logging
+
+import torch
+
+logger = logging.getLogger(__name__)
+
+
+def repair_input_as_output(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
+    """Repair scenarios where inputs are also outputs of the graph
+
+    TRT does not allow such cases, so we insert a clone (identity) layer
+    """
+    modified_graph = False
+
+    # Extract graph placeholder Tensors
+    placeholders = [
+        node
+        for node in gm.graph.nodes
+        if (
+            node.op == "placeholder"
+            and isinstance(node.type, type)
+            and issubclass(node.type, torch.Tensor)
+        )
+    ]
+
+    for placeholder in placeholders:
+        # If any placeholder has any users which are direct graph outputs
+        if len(placeholder.users) >= 1 and any(
+            user.op == "output" for user in placeholder.users
+        ):
+            modified_graph = True
+
+            # Get direct graph outputs which are direct uses of placeholders
+            direct_outputs = [user for user in placeholder.users if user.op == "output"]
+
+            # Insert clone node for placeholder to ensure placeholder is not a direct output
+            with gm.graph.inserting_after(placeholder):
+                cloned_placeholder = gm.graph.call_function(
+                    torch.ops.aten.clone.default,
+                    args=(placeholder,),
+                )
+
+            # Replace placeholder as output with cloned version
+            for output in direct_outputs:
+                output.replace_input_with(placeholder, cloned_placeholder)
+
+    # If the graph was modified, clean up the graph and ensure it is up-to-date
+    if modified_graph:
+        gm.graph.eliminate_dead_code()
+        gm.graph.lint()
+        gm.recompile()
+        logger.debug(f"Graph after repair_input_as_output:\n{gm.graph}")
+
+    return gm
+
+
+# %%
+# 2. Lowering Pass Registration
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+# To add a lowering pass, use the convenience function `add_lowering_pass` in the module
+# `torch_tensorrt.dynamo.lowering.passes`. See below for an example:
+
+# %%
+from torch_tensorrt.dynamo.lowering.passes import add_lowering_pass
+
+add_lowering_pass(repair_input_as_output)
+
+# %%
+# 3. Apply Available Lowering Passes
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+# To apply all lowering passes to a graph, the convenience function `apply_lowering_passes` in the module
+# `torch_tensorrt.dynamo.lowering.passes` can be used. This function is automatically invoked in the Torch-TRT Dynamo
+# paths. Additionally, the graph after each modifying pass is logged in the debug logs for Torch-TRT runs.
diff --git a/py/torch_tensorrt/dynamo/backend/backends.py b/py/torch_tensorrt/dynamo/backend/backends.py
@@ -7,27 +7,15 @@
 import torch
 import torch._dynamo as td
 import torch.utils._pytree as pytree
-import torch_tensorrt
 from torch._dynamo.utils import detect_fake_mode
 from torch._functorch.aot_autograd import _aot_export_function
 from torch._ops import OpOverload
 from torch_tensorrt.dynamo import CompilationSettings
 from torch_tensorrt.dynamo.compile import compile_module
-from torch_tensorrt.dynamo.lowering._decompositions import get_decompositions
+from torch_tensorrt.dynamo.lowering import apply_lowering_passes, get_decompositions
 from torch_tensorrt.dynamo.lowering._pre_aot_lowering import pre_aot_substitutions
 from torch_tensorrt.dynamo.utils import parse_dynamo_kwargs
 
-from packaging import version
-
-# Modify import location of utilities based on Torch version
-if version.parse(torch_tensorrt.sanitized_torch_version()) <= version.parse("2.1.0"):
-    from torch._inductor.freezing import ConstantFolder, replace_node_with_constant
-else:
-    from torch._inductor.constant_folding import (
-        ConstantFolder,
-        replace_node_with_constant,
-    )
-
 logger = logging.getLogger(__name__)
 
 
@@ -86,7 +74,7 @@ def _pretraced_backend(
             fake_mode, "allow_non_fake_inputs", True
         ), fake_mode:
             # Invoke AOTAutograd to translate operators to aten
-            graph_module = aot_export_for_compile(
+            gm = aot_export_for_compile(
                 gm,
                 sample_inputs,
                 decompositions=get_decompositions(
@@ -96,10 +84,10 @@ def _pretraced_backend(
 
             logger.debug("Post-AOT Autograd graph:\n" + str(gm.graph))
 
-            constant_fold(graph_module)
+            gm = apply_lowering_passes(gm)
 
             trt_compiled = compile_module(
-                graph_module,
+                gm,
                 sample_inputs,
                 settings=settings,
             )
@@ -123,35 +111,6 @@ def _pretraced_backend(
             raise
 
 
-@torch.utils._python_dispatch._disable_current_modes()  # type: ignore
-def constant_fold(gm: torch.fx.GraphModule) -> Any:
-    """Adapted from:
-    https://github.com/pytorch/pytorch/blob/3a79621c9dce17f77fbddc06aab21f6bc477f313/torch/_inductor/freezing.py#L178-L197
-
-    Folds constants in the graph module, not skipping constructors
-
-    Modifies the graph in-place and replaces node with constants
-    """
-    cf = ConstantFolder(gm, skip_constructors=False)
-    cf.run()
-
-    for node, constant in cf.node_replacements.items():
-        replace_node_with_constant(gm, node, constant)
-
-    erased_params = []
-    for node in gm.graph.nodes:
-        if node.op == "get_attr" and len(node.users) == 0:
-            delattr(gm, node.target)
-            erased_params.append(node)
-
-    for node in erased_params:
-        gm.graph.erase_node(node)
-
-    gm.graph.eliminate_dead_code()
-    gm.graph.lint()
-    gm.recompile()
-
-
 def aot_export_for_compile(
     func: torch.fx.GraphModule,
     args: Sequence[torch.Tensor],
diff --git a/py/torch_tensorrt/dynamo/lowering/__init__.py b/py/torch_tensorrt/dynamo/lowering/__init__.py
@@ -2,4 +2,5 @@
 from ._fusers import *  # noqa: F401
 from ._pre_aot_lowering import SUBSTITUTION_REGISTRY  # noqa: F401
 from ._pre_aot_lowering import register_substitution  # noqa: F401
+from .passes import add_lowering_pass, apply_lowering_passes
 from .substitutions import *  # noqa: F401
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/__init__.py b/py/torch_tensorrt/dynamo/lowering/passes/__init__.py
@@ -0,0 +1,27 @@
+from typing import Callable
+
+import torch
+from torch.fx.passes.pass_manager import PassManager
+
+from .constant_folding import constant_fold
+from .repair_input_as_output import repair_input_as_output
+
+ATEN_LOWERING_PASSES = PassManager.build_from_passlist(
+    [
+        constant_fold,
+        repair_input_as_output,
+    ]
+)
+
+
+def add_lowering_pass(
+    lowering_pass: Callable[[torch.fx.GraphModule], torch.fx.GraphModule]
+) -> None:
+    """Adds a lowering pass to the registry"""
+    ATEN_LOWERING_PASSES.add_pass(lowering_pass)
+    return
+
+
+def apply_lowering_passes(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
+    """Applies the lowering passes to a graph module, returns the modified GraphModule"""
+    return ATEN_LOWERING_PASSES(gm)
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/constant_folding.py b/py/torch_tensorrt/dynamo/lowering/passes/constant_folding.py
@@ -0,0 +1,56 @@
+import logging
+
+import torch
+import torch_tensorrt
+
+from packaging import version
+
+# Modify import location of utilities based on Torch version
+if version.parse(torch_tensorrt.sanitized_torch_version()) <= version.parse("2.1.0"):
+    from torch._inductor.freezing import ConstantFolder, replace_node_with_constant
+else:
+    from torch._inductor.constant_folding import (
+        ConstantFolder,
+        replace_node_with_constant,
+    )
+
+logger = logging.getLogger(__name__)
+
+
+@torch.utils._python_dispatch._disable_current_modes()  # type: ignore
+def constant_fold(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
+    """Adapted from:
+    https://github.com/pytorch/pytorch/blob/3a79621c9dce17f77fbddc06aab21f6bc477f313/torch/_inductor/freezing.py#L178-L197
+
+    Folds constants in the graph module, not skipping constructors
+
+    Modifies the graph in-place and replaces node with constants
+    """
+    cf = ConstantFolder(gm, skip_constructors=False)
+    cf.run()
+
+    for node, constant in cf.node_replacements.items():
+        replace_node_with_constant(gm, node, constant)
+
+    erased_params = []
+    for node in gm.graph.nodes:
+        # If get_attr node has no users, mark it for deletion
+        if node.op == "get_attr" and len(node.users) == 0:
+            # If the node's parameter is not a parameter of any other node, remove it
+            if not any(
+                other.target == node.target for other in gm.graph.nodes if other != node
+            ):
+                delattr(gm, node.target)
+            erased_params.append(node)
+
+    # Remove unused nodes from the graph
+    for node in erased_params:
+        gm.graph.erase_node(node)
+
+    gm.graph.eliminate_dead_code()
+    gm.graph.lint()
+    gm.recompile()
+
+    logger.debug(f"Graph after constant folding:\n{gm.graph}")
+
+    return gm
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/repair_input_as_output.py b/py/torch_tensorrt/dynamo/lowering/passes/repair_input_as_output.py
@@ -0,0 +1,53 @@
+import logging
+
+import torch
+
+logger = logging.getLogger(__name__)
+
+
+def repair_input_as_output(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
+    """Repair scenarios where inputs are also outputs of the graph
+
+    TRT does not allow such cases, so we insert a clone (identity) layer
+    """
+    modified_graph = False
+
+    # Extract graph placeholder Tensors
+    placeholders = [
+        node
+        for node in gm.graph.nodes
+        if (
+            node.op == "placeholder"
+            and isinstance(node.type, type)
+            and issubclass(node.type, torch.Tensor)
+        )
+    ]
+
+    for placeholder in placeholders:
+        # If any placeholder has any users which are direct graph outputs
+        if len(placeholder.users) >= 1 and any(
+            user.op == "output" for user in placeholder.users
+        ):
+            modified_graph = True
+
+            # Get direct graph outputs which are direct uses of placeholders
+            direct_outputs = [user for user in placeholder.users if user.op == "output"]
+
+            # Insert clone node for placeholder to ensure placeholder is not a direct output
+            with gm.graph.inserting_after(placeholder):
+                cloned_placeholder = gm.graph.call_function(
+                    torch.ops.aten.clone.default,
+                    args=(placeholder,),
+                )
+
+            # Replace placeholder as output with cloned version
+            for output in direct_outputs:
+                output.replace_input_with(placeholder, cloned_placeholder)
+
+    if modified_graph:
+        gm.graph.eliminate_dead_code()
+        gm.graph.lint()
+        gm.recompile()
+        logger.debug(f"Graph after repair_input_as_output:\n{gm.graph}")
+
+    return gm
diff --git a/setup.py b/setup.py
@@ -392,6 +392,7 @@ def run(self):
     "torch_tensorrt.dynamo.conversion.impl.unary",
     "torch_tensorrt.dynamo.lowering",
     "torch_tensorrt.dynamo.lowering.substitutions",
+    "torch_tensorrt.dynamo.lowering.passes",
     "torch_tensorrt.dynamo.partitioning",
     "torch_tensorrt.dynamo.runtime",
     "torch_tensorrt.dynamo.tools",
@@ -419,6 +420,7 @@ def run(self):
     "torch_tensorrt.dynamo.conversion.impl.unary": "py/torch_tensorrt/dynamo/conversion/impl/unary",
     "torch_tensorrt.dynamo.lowering": "py/torch_tensorrt/dynamo/lowering",
     "torch_tensorrt.dynamo.lowering.substitutions": "py/torch_tensorrt/dynamo/lowering/substitutions",
+    "torch_tensorrt.dynamo.lowering.passes": "py/torch_tensorrt/dynamo/lowering/passes",
     "torch_tensorrt.dynamo.partitioning": "py/torch_tensorrt/dynamo/partitioning",
     "torch_tensorrt.dynamo.runtime": "py/torch_tensorrt/dynamo/runtime",
     "torch_tensorrt.dynamo.tools": "py/torch_tensorrt/dynamo/tools",
diff --git a/tests/py/dynamo/lowering/test_aten_lowering_passes.py b/tests/py/dynamo/lowering/test_aten_lowering_passes.py
diff --git a/tests/py/dynamo/testing_utilities.py b/tests/py/dynamo/testing_utilities.py