Skip to content

example #64

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backends/xnnpack/operators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
op_minimum,
op_multiply,
op_negate,
op_permute,
op_prelu,
op_quantize_per_tensor,
op_relu,
Expand All @@ -42,7 +43,6 @@
op_squeeze,
op_static_constant_pad,
op_static_resize_bilinear_2d,
op_static_transpose,
op_sub,
op_to_copy,
)
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@


@register_node_visitor
class StaticTransposeVisitor(NodeVisitor):
class PermuteVisitor(NodeVisitor):
target = "aten.permute_copy.default"

def __init__(self, *args) -> None:
Expand Down
9 changes: 0 additions & 9 deletions backends/xnnpack/operators/op_skip_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,3 @@ class OpSymSizeInt(OpSkipOps):
"""

target = "sym_size.int"


@register_node_visitor
class OpPermuteCopyDefault(OpSkipOps):
"""
do nothing if node is permute_copy.default
"""

target = "aten.permute_copy.default"
5 changes: 5 additions & 0 deletions backends/xnnpack/runtime/XNNCompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1517,6 +1517,7 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
if (!executor->qinputs_.empty() && flatbuffer_graph->xnodes()->size() > 0 &&
flatbuffer_graph->xnodes()->Get(0)->xnode_union_type() ==
fb_xnnpack::XNodeUnion::XNNFullyConnected) {
#ifdef ENABLE_DYNAMIC_QUANTIZATION
// This delegate is for DQLinear which supports dynamic input shapes
if (executor->getNumInputs() < 1 || executor->getNumOutputs() != 1) {
ET_LOG(
Expand All @@ -1525,6 +1526,10 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
return Error::NotSupported;
}
executor->setNeedsResizeOutput();
#else
ET_LOG(Error, "DQ Linear is not supported");
return Error::NotSupported;
#endif
}

return err;
Expand Down
7 changes: 7 additions & 0 deletions backends/xnnpack/runtime/XNNExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
*/

#include <executorch/backends/xnnpack/runtime/XNNExecutor.h>
#ifdef ENABLE_DYNAMIC_QUANTIZATION
#include <executorch/backends/xnnpack/runtime/utils/utils.h>
#endif

namespace torch {
namespace executor {
Expand All @@ -17,6 +19,7 @@ namespace delegate {
Error XNNExecutor::set_external_input(uint32_t id, Tensor* input) {
auto qinput_pair = qinputs_.find(id);
if (qinput_pair != qinputs_.end()) {
#ifdef ENABLE_DYNAMIC_QUANTIZATION
auto qinput = qinput_pair->second;
// dq the input and copy it in to qinput
float input_min, input_max;
Expand Down Expand Up @@ -60,6 +63,10 @@ Error XNNExecutor::set_external_input(uint32_t id, Tensor* input) {
{static_cast<float>(input_qparam.scale),
static_cast<int8_t>(input_qparam.zero_point)},
batch_size});
#else
ET_LOG(Error, "Dynamic Quantization is not supported");
return Error::NotSupported;
#endif
} else {
externals_.emplace_back(xnn_external_value{id, input->mutable_data_ptr()});
}
Expand Down
1 change: 1 addition & 0 deletions backends/xnnpack/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def define_common_targets():
"//executorch/extension/pybindings/...",
"@EXECUTORCH_CLIENTS",
],
preprocessor_flags = [] if runtime.is_oss else ["-DENABLE_DYNAMIC_QUANTIZATION"],
deps = [
third_party_dep("XNNPACK"),
":xnnpack_schema",
Expand Down
13 changes: 13 additions & 0 deletions backends/xnnpack/test/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,16 @@ python_unittest(
"//executorch/backends/xnnpack/test/tester:tester",
],
)

python_unittest(
name = "test_xnnpack_models",
srcs = glob([
"models/*.py",
]),
deps = [
"//caffe2:torch",
"//executorch/backends/xnnpack/partition:xnnpack_partitioner",
"//executorch/backends/xnnpack/test/tester:tester",
"//pytorch/vision:torchvision",
],
)
72 changes: 72 additions & 0 deletions backends/xnnpack/test/models/mobilenet_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import unittest

import torch
import torchvision.models as models
from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
XnnpackQuantizedPartitioner2,
)
from executorch.backends.xnnpack.test.tester import Partition, Tester
from executorch.backends.xnnpack.test.tester.tester import Export
from executorch.backends.xnnpack.utils.configs import get_xnnpack_capture_config
from torchvision.models.mobilenetv2 import MobileNet_V2_Weights


class TestXNNPACKMobileNetV2(unittest.TestCase):
export_stage = Export(get_xnnpack_capture_config(enable_aot=True))

mv2 = models.__dict__["mobilenet_v2"](weights=MobileNet_V2_Weights)
mv2 = mv2.eval()
model_inputs = (torch.ones(1, 3, 224, 244),)

all_operators = {
"executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",
"executorch_exir_dialects_edge__ops_aten_add_Tensor",
"executorch_exir_dialects_edge__ops_aten_permute_copy_default",
"executorch_exir_dialects_edge__ops_aten_addmm_default",
"executorch_exir_dialects_edge__ops_aten_mean_dim",
"executorch_exir_dialects_edge__ops_aten_hardtanh_default",
"executorch_exir_dialects_edge__ops_aten_convolution_default",
}

def test_fp32(self):

(
Tester(self.mv2, self.model_inputs)
.export(self.export_stage)
.to_edge()
.check(list(self.all_operators))
.partition()
.check(["torch.ops.executorch_call_delegate"])
.check_not(list(self.all_operators))
.to_executorch()
.serialize()
.run_method()
.compare_outputs()
)

def test_qs8_pt2e(self):
# Quantization fuses away batchnorm, so it is no longer in the graph
ops_after_quantization = self.all_operators - {
"executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",
}

(
Tester(self.mv2, self.model_inputs)
.quantize2()
.export(self.export_stage)
.to_edge()
.check(list(ops_after_quantization))
.partition(Partition(partitioner=XnnpackQuantizedPartitioner2))
.check(["torch.ops.executorch_call_delegate"])
.check_not(list(ops_after_quantization))
.to_executorch()
.serialize()
.run_method()
.compare_outputs()
)
2 changes: 1 addition & 1 deletion backends/xnnpack/test/ops/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ def test_add_quantized_pt2e(self):

(
Tester(add_module, model_inputs)
.quantize2()
.export()
.check_count({"torch.ops.aten.add.Tensor": 4})
.quantize2()
.check(["torch.ops.quantized_decomposed"])
.to_edge()
.check_count({"executorch_exir_dialects_edge__ops_aten_add_Tensor": 4})
Expand Down
20 changes: 10 additions & 10 deletions backends/xnnpack/test/tester/tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from typing import Any, Dict, List, Optional, Tuple

import torch
import torch._export as export
from executorch import exir
from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
XnnpackFloatingPointPartitioner,
Expand Down Expand Up @@ -145,23 +146,23 @@ def __init__(

self.quantizer.set_global(self.quantization_config)

self.converted_program = None
self.converted_graph = None

def run(
self, artifact: ExirExportedProgram, inputs: Optional[Tuple[torch.Tensor]]
self, artifact: torch.nn.Module, inputs: Optional[Tuple[torch.Tensor]]
) -> None:
prepared = prepare_pt2e(artifact.exported_program.graph_module, self.quantizer)
captured_graph = export.capture_pre_autograd_graph(artifact, inputs)
prepared = prepare_pt2e(captured_graph, self.quantizer)
converted = convert_pt2e(prepared)
artifact.exported_program._graph_module = converted
self.converted_program = artifact
self.converted_graph = converted

@property
def artifact(self) -> ExirExportedProgram:
return self.converted_program
def artifact(self) -> torch.fx.GraphModule:
return self.converted_graph

@property
def graph_module(self) -> str:
return self.converted_program.exported_program.graph_module
return self.converted_graph


@register_stage
Expand Down Expand Up @@ -274,12 +275,11 @@ def __init__(
self.inputs = inputs
self.stages: Dict[str, Stage] = OrderedDict.fromkeys(list(_stages_.keys()))
self.pipeline = {
self._stage_name(Quantize2): [self._stage_name(Export)],
self._stage_name(Quantize): [self._stage_name(Export)],
self._stage_name(Export): [
self._stage_name(Quantize2),
self._stage_name(ToEdge),
],
self._stage_name(Quantize2): [self._stage_name(ToEdge)],
self._stage_name(ToEdge): [self._stage_name(Partition)],
# TODO Make this Stage optional
self._stage_name(Partition): [self._stage_name(ToExecutorch)],
Expand Down
6 changes: 5 additions & 1 deletion backends/xnnpack/utils/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,8 @@ def get_xnnpack_capture_config(dynamic_shape=False, enable_aot: Optional[bool] =
if enable_aot is None:
return CaptureConfig(enable_dynamic_shape=dynamic_shape)
else:
return CaptureConfig(enable_dynamic_shape=dynamic_shape, enable_aot=enable_aot)
return CaptureConfig(
enable_dynamic_shape=dynamic_shape,
enable_aot=enable_aot,
_unlift=enable_aot,
)
13 changes: 13 additions & 0 deletions examples/backend/TARGETS
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

runtime.python_binary(
name = "xnnpack_lowering_examples",
main_src = "xnnpack_lowering_examples.py",
deps = [
"//caffe2:torch",
"//executorch/backends/xnnpack:xnnpack_preprocess",
"//executorch/backends/xnnpack/partition:xnnpack_partitioner",
"//executorch/examples/models:models",
"//executorch/exir/backend:backend_api",
],
)
100 changes: 100 additions & 0 deletions examples/backend/xnnpack_lowering_examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# Example script for exporting simple models to flatbuffer

import argparse
import copy

import executorch.exir as exir
import torch._export as export
from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
XnnpackFloatingPointPartitioner,
XnnpackQuantizedPartitioner2,
)
from executorch.exir.backend.backend_api import to_backend, validation_disabled

from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
from torch.ao.quantization.quantizer.xnnpack_quantizer import (
get_symmetric_quantization_config,
XNNPACKQuantizer,
)

from ..models import MODEL_NAME_TO_MODEL

# Note: for mv3, the mul op is not supported in XNNPACKQuantizer, that could be supported soon
XNNPACK_MODEL_NAME_TO_MODEL = {
name: MODEL_NAME_TO_MODEL[name] for name in ["linear", "add", "add_mul", "mv2"]
}


def quantize(model, example_inputs):
"""This is the official recommended flow for quantization in pytorch 2.0 export"""
m = model.eval()
m = export.capture_pre_autograd_graph(m, copy.deepcopy(example_inputs))
quantizer = XNNPACKQuantizer()
# if we set is_per_channel to True, we also need to add out_variant of quantize_per_channel/dequantize_per_channel
operator_config = get_symmetric_quantization_config(is_per_channel=False)
quantizer.set_global(operator_config)
m = prepare_pt2e(m, quantizer)
# calibration
m(*example_inputs)
m = convert_pt2e(m)
return m


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-m",
"--model_name",
required=True,
help=f"Provide model name. Valid ones: {list(XNNPACK_MODEL_NAME_TO_MODEL.keys())}",
)
parser.add_argument(
"-q",
"--quantize",
action="store_true",
required=False,
default=False,
help="Flag for producing quantized or floating-point model",
)
args = parser.parse_args()

if args.model_name not in XNNPACK_MODEL_NAME_TO_MODEL:
raise RuntimeError(
f"Model {args.model_name} is not a valid name. or not quantizable right now, "
"please contact executorch team if you want to learn why or how to support "
"quantization for the requested model"
f"Available models are {list(XNNPACK_MODEL_NAME_TO_MODEL.keys())}."
)

model, example_inputs = MODEL_NAME_TO_MODEL[args.model_name]()
model = model.eval()

partitioner = XnnpackFloatingPointPartitioner
if args.quantize:
print("Quantizing Model...")
model = quantize(model, example_inputs)
# Partitioner will eventually be a single partitioner for both fp32 and quantized models
partitioner = XnnpackQuantizedPartitioner2

edge = exir.capture(
model, example_inputs, exir.CaptureConfig(enable_aot=True, _unlift=True)
).to_edge(exir.EdgeCompileConfig(_check_ir_validity=False))
print("Exported graph:\n", edge.exported_program.graph)

with validation_disabled():
edge.exported_program = to_backend(edge.exported_program, partitioner)
print("Lowered graph:\n", edge.exported_program.graph)

exec_prog = edge.to_executorch()
buffer = exec_prog.buffer
quant_tag = "_quantize" if args.quantize else ""
filename = f"xnnpack_{args.model_name}{quant_tag}.pte"
print(f"Saving exported program to {filename}.")
with open(filename, "wb") as f:
f.write(buffer)
Loading