Skip to content

Fix Long Term Quant Testing #78

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backends/xnnpack/operators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
op_minimum,
op_multiply,
op_negate,
op_permute,
op_prelu,
op_quantize_per_tensor,
op_relu,
Expand All @@ -42,7 +43,6 @@
op_squeeze,
op_static_constant_pad,
op_static_resize_bilinear_2d,
op_static_transpose,
op_sub,
op_to_copy,
)
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@


@register_node_visitor
class StaticTransposeVisitor(NodeVisitor):
class PermuteVisitor(NodeVisitor):
target = "aten.permute_copy.default"

def __init__(self, *args) -> None:
Expand Down
9 changes: 0 additions & 9 deletions backends/xnnpack/operators/op_skip_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,3 @@ class OpSymSizeInt(OpSkipOps):
"""

target = "sym_size.int"


@register_node_visitor
class OpPermuteCopyDefault(OpSkipOps):
"""
do nothing if node is permute_copy.default
"""

target = "aten.permute_copy.default"
5 changes: 5 additions & 0 deletions backends/xnnpack/runtime/XNNCompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1517,6 +1517,7 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
if (!executor->qinputs_.empty() && flatbuffer_graph->xnodes()->size() > 0 &&
flatbuffer_graph->xnodes()->Get(0)->xnode_union_type() ==
fb_xnnpack::XNodeUnion::XNNFullyConnected) {
#ifdef ENABLE_DYNAMIC_QUANTIZATION
// This delegate is for DQLinear which supports dynamic input shapes
if (executor->getNumInputs() < 1 || executor->getNumOutputs() != 1) {
ET_LOG(
Expand All @@ -1525,6 +1526,10 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
return Error::NotSupported;
}
executor->setNeedsResizeOutput();
#else
ET_LOG(Error, "DQ Linear is not supported");
return Error::NotSupported;
#endif
}

return err;
Expand Down
7 changes: 7 additions & 0 deletions backends/xnnpack/runtime/XNNExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
*/

#include <executorch/backends/xnnpack/runtime/XNNExecutor.h>
#ifdef ENABLE_DYNAMIC_QUANTIZATION
#include <executorch/backends/xnnpack/runtime/utils/utils.h>
#endif

namespace torch {
namespace executor {
Expand All @@ -17,6 +19,7 @@ namespace delegate {
Error XNNExecutor::set_external_input(uint32_t id, Tensor* input) {
auto qinput_pair = qinputs_.find(id);
if (qinput_pair != qinputs_.end()) {
#ifdef ENABLE_DYNAMIC_QUANTIZATION
auto qinput = qinput_pair->second;
// dq the input and copy it in to qinput
float input_min, input_max;
Expand Down Expand Up @@ -60,6 +63,10 @@ Error XNNExecutor::set_external_input(uint32_t id, Tensor* input) {
{static_cast<float>(input_qparam.scale),
static_cast<int8_t>(input_qparam.zero_point)},
batch_size});
#else
ET_LOG(Error, "Dynamic Quantization is not supported");
return Error::NotSupported;
#endif
} else {
externals_.emplace_back(xnn_external_value{id, input->mutable_data_ptr()});
}
Expand Down
1 change: 1 addition & 0 deletions backends/xnnpack/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def define_common_targets():
"//executorch/extension/pybindings/...",
"@EXECUTORCH_CLIENTS",
],
preprocessor_flags = [] if runtime.is_oss else ["-DENABLE_DYNAMIC_QUANTIZATION"],
deps = [
third_party_dep("XNNPACK"),
":xnnpack_schema",
Expand Down
2 changes: 1 addition & 1 deletion backends/xnnpack/test/ops/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ def test_add_quantized_pt2e(self):

(
Tester(add_module, model_inputs)
.quantize2()
.export()
.check_count({"torch.ops.aten.add.Tensor": 4})
.quantize2()
.check(["torch.ops.quantized_decomposed"])
.to_edge()
.check_count({"executorch_exir_dialects_edge__ops_aten_add_Tensor": 4})
Expand Down
20 changes: 10 additions & 10 deletions backends/xnnpack/test/tester/tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from typing import Any, Dict, List, Optional, Tuple

import torch
import torch._export as export
from executorch import exir
from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
XnnpackFloatingPointPartitioner,
Expand Down Expand Up @@ -145,23 +146,23 @@ def __init__(

self.quantizer.set_global(self.quantization_config)

self.converted_program = None
self.converted_graph = None

def run(
self, artifact: ExirExportedProgram, inputs: Optional[Tuple[torch.Tensor]]
self, artifact: torch.nn.Module, inputs: Optional[Tuple[torch.Tensor]]
) -> None:
prepared = prepare_pt2e(artifact.exported_program.graph_module, self.quantizer)
captured_graph = export.capture_pre_autograd_graph(artifact, inputs)
prepared = prepare_pt2e(captured_graph, self.quantizer)
converted = convert_pt2e(prepared)
artifact.exported_program._graph_module = converted
self.converted_program = artifact
self.converted_graph = converted

@property
def artifact(self) -> ExirExportedProgram:
return self.converted_program
def artifact(self) -> torch.fx.GraphModule:
return self.converted_graph

@property
def graph_module(self) -> str:
return self.converted_program.exported_program.graph_module
return self.converted_graph


@register_stage
Expand Down Expand Up @@ -274,12 +275,11 @@ def __init__(
self.inputs = inputs
self.stages: Dict[str, Stage] = OrderedDict.fromkeys(list(_stages_.keys()))
self.pipeline = {
self._stage_name(Quantize2): [self._stage_name(Export)],
self._stage_name(Quantize): [self._stage_name(Export)],
self._stage_name(Export): [
self._stage_name(Quantize2),
self._stage_name(ToEdge),
],
self._stage_name(Quantize2): [self._stage_name(ToEdge)],
self._stage_name(ToEdge): [self._stage_name(Partition)],
# TODO Make this Stage optional
self._stage_name(Partition): [self._stage_name(ToExecutorch)],
Expand Down