example with mv2 (#64)

kirklandsign · facebook-github-bot · commit da1f29b1a933 · 2023-08-23T20:44:14.000-07:00
Summary: Pull Request resolved: #64 Adding export example for XNNPACK delegated models, also adding to executor runner to run Reviewed By: guangy10 Differential Revision: D48371417 fbshipit-source-id: 836e49c020aec880799fdd635b6c71f6145a0536
diff --git a/.ci/scripts/gather_test_models.py b/.ci/scripts/gather_test_models.py
@@ -9,8 +9,8 @@
 import os
 from typing import Any
 
-from examples.models import MODEL_NAME_TO_MODEL
-from examples.quantization.example import QUANT_MODEL_NAME_TO_MODEL
+from examples.models import MODEL_NAME_TO_MODEL, MODEL_NAME_TO_OPTIONS
+from executorch.examples.models.models import MODEL_NAME_TO_OPTIONS
 
 BUILD_TOOLS = [
     "buck2",
@@ -39,7 +39,9 @@ def export_models_for_ci() -> None:
     # https://docs.github.com/en/actions/using-jobs/using-a-matrix-for-your-jobs
     models = {"include": []}
     for name in MODEL_NAME_TO_MODEL.keys():
-        quantization = name in QUANT_MODEL_NAME_TO_MODEL
+        quantization = (
+            name in MODEL_NAME_TO_OPTIONS and MODEL_NAME_TO_OPTIONS[name].quantization
+        )
         for build_tool in BUILD_TOOLS:
             models["include"].append(
                 {"build-tool": build_tool, "model": name, "quantization": quantization}
diff --git a/examples/README.md b/examples/README.md
@@ -7,6 +7,7 @@ It also includes a list of modules, from a simple `Add` to a full model like `Mo
 ## Directory structure
 ```bash
 examples
+|── backend         # Contains examples for exporting delegate models and running them using custom executor runners
 ├── custom_ops      # Contains examples to register custom operators into PyTorch as well as register its kernels into Executorch runtime
 ├── executor_runner # This is an example C++ wrapper around the ET runtime
 ├── export          # Python helper scripts to illustrate export workflow
@@ -71,6 +72,9 @@ you can also find the valid quantized example models by running:
 buck2 run executorch/examples/quantization:example -- --help
 ```
 
+## XNNPACK Backend
+Please see [Backend README](backend/README) for XNNPACK quantization, export, and run workflow.
+
 ## Dependencies
 
 Various models listed in this directory have dependencies on some other packages, e.g. torchvision, torchaudio.
diff --git a/examples/backend/README b/examples/backend/README
@@ -0,0 +1,22 @@
+This README gives some examples on backend-specific model workflow.
+
+# XNNPACK Backend
+
+[XNNPACK](https://github.com/google/XNNPACK) is a library of optimized of neural network inference operators for ARM and x86 platforms. Our delegate
+lowers models to run using these highly optimized CPU operators. You can try out lowering and running some example
+models using the following command:
+
+```
+python3 -m examples.backend.xnnpack_examples --model_name="mv2" --delegate
+# For quantized model
+python3 -m examples.backend.xnnpack_examples --model_name="mv2" --quantize --delegate
+```
+
+This will produce an xnnpack_mv2.pte model that can be run using XNNPACK's operators. This will also print out
+the lowered graph, showing what parts of the models have been lowered to XNNPACK via executorch_call_delegate.
+
+You can run the model by running:
+
+```
+buck2 run examples/backend:xnn_executor_runner --model_name="mv2"
+```
diff --git a/examples/backend/TARGETS b/examples/backend/TARGETS
@@ -0,0 +1,19 @@
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
+load(":targets.bzl", "define_common_targets")
+
+oncall("executorch")
+
+define_common_targets()
+
+runtime.python_binary(
+    name = "xnnpack_examples",
+    main_src = "xnnpack_examples.py",
+    deps = [
+        "//caffe2:torch",
+        "//executorch/backends/xnnpack:xnnpack_preprocess",
+        "//executorch/backends/xnnpack/partition:xnnpack_partitioner",
+        "//executorch/examples/models:models",
+        "//executorch/examples/quantization:quant_utils",
+        "//executorch/exir/backend:backend_api",
+    ],
+)
diff --git a/examples/backend/targets.bzl b/examples/backend/targets.bzl
@@ -0,0 +1,21 @@
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_oss_build_kwargs", "runtime")
+
+def define_common_targets():
+    """Defines targets that should be shared between fbcode and xplat.
+
+    The directory containing this targets.bzl file should also contain both
+    TARGETS and BUCK files that call this function.
+    """
+
+    # executor runner for XNNPACK Backend and portable kernels.
+    runtime.cxx_binary(
+        name = "xnn_executor_runner",
+        srcs = [],
+        deps = [
+            "//executorch/examples/executor_runner:executor_runner_lib",
+            "//executorch/backends/xnnpack:xnnpack_backend",
+            "//executorch/kernels/portable:generated_lib_all_ops",
+        ],
+        define_static_target = True,
+        **get_oss_build_kwargs()
+    )
diff --git a/examples/backend/xnnpack_examples.py b/examples/backend/xnnpack_examples.py
@@ -0,0 +1,92 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Example script for exporting simple models to flatbuffer
+
+import argparse
+import logging
+
+import executorch.exir as exir
+from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
+    XnnpackFloatingPointPartitioner,
+    XnnpackQuantizedPartitioner2,
+)
+from executorch.exir.backend.backend_api import to_backend
+
+from ..models import MODEL_NAME_TO_MODEL, MODEL_NAME_TO_OPTIONS
+from ..quantization.utils import quantize
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-m",
+        "--model_name",
+        required=True,
+        help=f"Provide model name. Valid ones: {list(MODEL_NAME_TO_OPTIONS.keys())}",
+    )
+    parser.add_argument(
+        "-q",
+        "--quantize",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Flag for producing quantized or floating-point model",
+    )
+    parser.add_argument(
+        "-d",
+        "--delegate",
+        action="store_true",
+        required=False,
+        default=True,
+        help="Flag for producing XNNPACK delegated model",
+    )
+
+    args = parser.parse_args()
+
+    if not args.delegate:
+        raise NotImplementedError(
+            "T161880157: Quantization-only without delegation is not supported yet"
+        )
+
+    if args.model_name not in MODEL_NAME_TO_OPTIONS:
+        raise RuntimeError(
+            f"Model {args.model_name} is not a valid name. or not quantizable right now, "
+            "please contact executorch team if you want to learn why or how to support "
+            "quantization for the requested model"
+            f"Available models are {list(MODEL_NAME_TO_OPTIONS.keys())}."
+        )
+
+    model, example_inputs = MODEL_NAME_TO_MODEL[args.model_name]()
+    model = model.eval()
+
+    partitioner = XnnpackFloatingPointPartitioner
+    if args.quantize:
+        logging.info("Quantizing Model...")
+        model = quantize(model, example_inputs)
+        # TODO(T161849167): Partitioner will eventually be a single partitioner for both fp32 and quantized models
+        partitioner = XnnpackQuantizedPartitioner2
+
+    # TODO(T161852812): use export.utils.export_to_edge Delegate implementation is currently on an unlifted graph.
+    # It will eventually be changed to a lifted graph, in which _unlift=False,
+    edge = exir.capture(
+        model, example_inputs, exir.CaptureConfig(enable_aot=True, _unlift=True)
+    ).to_edge(exir.EdgeCompileConfig(_check_ir_validity=False))
+    logging.info(f"Exported graph:\n{edge.exported_program.graph}")
+
+    edge.exported_program = to_backend(edge.exported_program, partitioner)
+    logging.info(f"Lowered graph:\n{edge.exported_program.graph}")
+
+    exec_prog = edge.to_executorch()
+    buffer = exec_prog.buffer
+    quant_tag = "_quantize" if args.quantize else ""
+    filename = f"{args.model_name}_xnnpack_{quant_tag}.pte"
+    logging.info(f"Saving exported program to {filename}.")
+    with open(filename, "wb") as f:
+        f.write(buffer)
diff --git a/examples/models/__init__.py b/examples/models/__init__.py
@@ -4,8 +4,6 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from .models import MODEL_NAME_TO_MODEL
+from .models import MODEL_NAME_TO_MODEL, MODEL_NAME_TO_OPTIONS
 
-__all__ = [
-    MODEL_NAME_TO_MODEL,
-]
+__all__ = [MODEL_NAME_TO_MODEL, MODEL_NAME_TO_OPTIONS]
diff --git a/examples/models/models.py b/examples/models/models.py
@@ -7,6 +7,8 @@
 # @file models.py
 # Simple models for demonstration purposes.
 
+from dataclasses import dataclass
+
 from typing import Any, Tuple
 
 import torch
@@ -140,3 +142,17 @@ def gen_resnet50_model_and_inputs() -> Tuple[torch.nn.Module, Any]:
     "resnet18": gen_resnet18_model_and_inputs,
     "resnet50": gen_resnet50_model_and_inputs,
 }
+
+
+@dataclass
+class OptimizationOptions(object):
+    quantization: bool
+    xnnpack_delegation: bool
+
+
+MODEL_NAME_TO_OPTIONS = {
+    "linear": OptimizationOptions(True, True),
+    "add": OptimizationOptions(True, True),
+    "add_mul": OptimizationOptions(True, True),
+    "mv2": OptimizationOptions(True, True),
+}
diff --git a/examples/quantization/TARGETS b/examples/quantization/TARGETS
@@ -5,8 +5,22 @@ runtime.python_binary(
     main_src = "example.py",
     preload_deps = ["//executorch/kernels/quantized:aot_lib"],
     deps = [
+        ":quant_utils",
         "//caffe2:torch",
         "//executorch/examples/export:export_example",
         "//executorch/examples/models:models",
     ],
 )
+
+runtime.python_library(
+    name = "quant_utils",
+    srcs = [
+        "utils.py",
+    ],
+    visibility = [
+        "//executorch/examples/...",
+    ],
+    deps = [
+        "//caffe2:torch",
+    ],
+)
diff --git a/examples/quantization/example.py b/examples/quantization/example.py
@@ -27,30 +27,9 @@
 
 from ..export.export_example import export_to_pte
 
-from ..models import MODEL_NAME_TO_MODEL
+from ..models import MODEL_NAME_TO_MODEL, MODEL_NAME_TO_OPTIONS
 
-# Note: for mv3, the mul op is not supported in XNNPACKQuantizer, that could be supported soon
-QUANT_MODEL_NAME_TO_MODEL = {
-    name: MODEL_NAME_TO_MODEL[name] for name in ["linear", "add", "add_mul", "mv2"]
-}
-
-
-def quantize(model_name, model, example_inputs):
-    """This is the official recommended flow for quantization in pytorch 2.0 export"""
-    m = model.eval()
-    m = export.capture_pre_autograd_graph(m, copy.deepcopy(example_inputs))
-    print("original model:", m)
-    quantizer = XNNPACKQuantizer()
-    # if we set is_per_channel to True, we also need to add out_variant of quantize_per_channel/dequantize_per_channel
-    operator_config = get_symmetric_quantization_config(is_per_channel=False)
-    quantizer.set_global(operator_config)
-    m = prepare_pt2e(m, quantizer)
-    # calibration
-    m(*example_inputs)
-    m = convert_pt2e(m)
-    print("quantized model:", m)
-    # make sure we can export to flat buffer
-    export_to_pte(model_name, m, copy.deepcopy(example_inputs))
+from .utils import quantize
 
 
 def verify_xnnpack_quantizer_matching_fx_quant_model(model_name, model, example_inputs):
@@ -102,7 +81,7 @@ def verify_xnnpack_quantizer_matching_fx_quant_model(model_name, model, example_
         "-m",
         "--model_name",
         required=True,
-        help=f"Provide model name. Valid ones: {list(QUANT_MODEL_NAME_TO_MODEL.keys())}",
+        help=f"Provide model name. Valid ones: {list(MODEL_NAME_TO_OPTIONS.keys())}",
     )
     parser.add_argument(
         "-ve",
@@ -122,12 +101,12 @@ def verify_xnnpack_quantizer_matching_fx_quant_model(model_name, model, example_
     args = parser.parse_args()
     if args.so_library:
         torch.ops.load_library(args.so_library)
-    if not args.verify and args.model_name not in QUANT_MODEL_NAME_TO_MODEL:
+    if not args.verify and args.model_name not in MODEL_NAME_TO_OPTIONS:
         raise RuntimeError(
             f"Model {args.model_name} is not a valid name. or not quantizable right now, "
             "please contact executorch team if you want to learn why or how to support "
             "quantization for the requested model"
-            f"Available models are {list(QUANT_MODEL_NAME_TO_MODEL.keys())}."
+            f"Available models are {list(MODEL_NAME_TO_OPTIONS.keys())}."
         )
 
     model, example_inputs = MODEL_NAME_TO_MODEL[args.model_name]()
@@ -137,5 +116,6 @@ def verify_xnnpack_quantizer_matching_fx_quant_model(model_name, model, example_
             args.model_name, model, example_inputs
         )
 
-    quantize(args.model_name, model, example_inputs)
+    quantized_model = quantize(model, example_inputs)
+    export_to_pte(args.model_name, quantized_model, copy.deepcopy(example_inputs))
     print("finished")
diff --git a/examples/quantization/utils.py b/examples/quantization/utils.py
@@ -0,0 +1,32 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import copy
+
+import torch._export as export
+from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
+from torch.ao.quantization.quantizer.xnnpack_quantizer import (
+    get_symmetric_quantization_config,
+    XNNPACKQuantizer,
+)
+
+
+def quantize(model, example_inputs):
+    """This is the official recommended flow for quantization in pytorch 2.0 export"""
+    m = model.eval()
+    m = export.capture_pre_autograd_graph(m, copy.deepcopy(example_inputs))
+    print("original model:", m)
+    quantizer = XNNPACKQuantizer()
+    # if we set is_per_channel to True, we also need to add out_variant of quantize_per_channel/dequantize_per_channel
+    operator_config = get_symmetric_quantization_config(is_per_channel=False)
+    quantizer.set_global(operator_config)
+    m = prepare_pt2e(m, quantizer)
+    # calibration
+    m(*example_inputs)
+    m = convert_pt2e(m)
+    print("quantized model:", m)
+    # make sure we can export to flat buffer
+    return m