From c001f597f571dcbc06273367c683b716203f10c3 Mon Sep 17 00:00:00 2001 From: Matthias Cremon Date: Tue, 7 May 2024 12:01:39 -0700 Subject: [PATCH] Cadence - Move primary code to backends folder (#3353) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/3353 See design and discussion in https://docs.google.com/document/d/1HPDTbN07WXB9PCdezmvSs_0Yy89D57F1pHHAF9UAgGg/edit#heading=h.828btb3wp67h. Previous folder structure: ``` executorch └── examples ├── aot ├── kernels ├── ops ├── tests ├── third-party/hifi4-nnlib └── utils ``` New folder structure: ``` executorch ├── backends │ └── cadence │ ├── aot │ ├── ops_registration │ ├── tests │ ├── utils │ ├── hifi │ │ ├── kernels │ │ ├── operators │ │ └── third-party │ │ └── nnlib │ └── [other cadence DSP families] │ ├── kernels │ ├── operators │ └── third-party │ └── [any required lib] └── examples └── cadence ├── models └── operators ``` Reviewed By: tarun292, cccclai Differential Revision: D56577399 fbshipit-source-id: a19d7d689b286c0da2ef533a17e5e66ee1eb8a26 --- {examples => backends}/cadence/CMakeLists.txt | 4 +- backends/cadence/README.md | 30 +++++++++++ backends/cadence/aot/__init__.py | 0 .../cadence/aot/compiler.py | 0 .../cadence/aot/export_example.py | 41 +++++++++++---- .../cadence/aot}/functions.yaml | 0 .../cadence/aot/ops_registrations.py | 0 .../cadence/aot/quantizer.py | 0 {examples => backends}/cadence/aot/utils.py | 0 {examples => backends}/cadence/cadence.cmake | 0 .../cadence/executor_runner.cpp | 1 + .../cadence/hifi}/kernels/CMakeLists.txt | 2 +- .../cadence/hifi}/kernels/kernels.cpp | 0 .../cadence/hifi}/kernels/kernels.h | 0 .../cadence/hifi/operators}/CMakeLists.txt | 36 ++++++------- .../hifi/operators}/dequantize_per_tensor.cpp | 0 .../cadence/hifi/operators}/op_add.cpp | 0 .../cadence/hifi/operators}/op_embedding.cpp | 0 .../cadence/hifi/operators}/op_full.cpp | 0 .../cadence/hifi/operators}/op_view_copy.cpp | 0 .../hifi/operators}/quantize_per_tensor.cpp | 0 .../hifi/operators}/quantized_conv_out.cpp | 0 .../hifi/operators}/quantized_layer_norm.cpp | 0 .../hifi/operators}/quantized_linear_out.cpp | 0 .../hifi/operators}/quantized_relu_out.cpp | 0 .../hifi/third-party/nnlib}/license.txt | 0 .../nnlib}/matmul_asym8uxasym8u_asym8u.cpp | 0 .../matmul_asym8uxasym8u_asym8u_macros.h | 0 .../nnlib}/xa_nnlib_matmul_unroll_macros.h | 0 .../cadence/utils/gen_header.py | 0 .../cadence/utils/post_compilation.py | 0 docs/source/build-run-xtensa.md | 52 ++++++++++++------- .../rnnt_predictor.py} | 4 +- .../quantized_conv1d_op.py} | 6 +-- .../quantized_linear_op.py} | 6 +-- 35 files changed, 122 insertions(+), 60 deletions(-) rename {examples => backends}/cadence/CMakeLists.txt (97%) create mode 100644 backends/cadence/README.md create mode 100644 backends/cadence/aot/__init__.py rename {examples => backends}/cadence/aot/compiler.py (100%) rename {examples => backends}/cadence/aot/export_example.py (66%) rename {examples/cadence/ops => backends/cadence/aot}/functions.yaml (100%) rename examples/cadence/aot/meta_registrations.py => backends/cadence/aot/ops_registrations.py (100%) rename {examples => backends}/cadence/aot/quantizer.py (100%) rename {examples => backends}/cadence/aot/utils.py (100%) rename {examples => backends}/cadence/cadence.cmake (100%) rename {examples => backends}/cadence/executor_runner.cpp (99%) rename {examples/cadence => backends/cadence/hifi}/kernels/CMakeLists.txt (86%) rename {examples/cadence => backends/cadence/hifi}/kernels/kernels.cpp (100%) rename {examples/cadence => backends/cadence/hifi}/kernels/kernels.h (100%) rename {examples/cadence/ops => backends/cadence/hifi/operators}/CMakeLists.txt (69%) rename {examples/cadence/ops => backends/cadence/hifi/operators}/dequantize_per_tensor.cpp (100%) rename {examples/cadence/ops => backends/cadence/hifi/operators}/op_add.cpp (100%) rename {examples/cadence/ops => backends/cadence/hifi/operators}/op_embedding.cpp (100%) rename {examples/cadence/ops => backends/cadence/hifi/operators}/op_full.cpp (100%) rename {examples/cadence/ops => backends/cadence/hifi/operators}/op_view_copy.cpp (100%) rename {examples/cadence/ops => backends/cadence/hifi/operators}/quantize_per_tensor.cpp (100%) rename {examples/cadence/ops => backends/cadence/hifi/operators}/quantized_conv_out.cpp (100%) rename {examples/cadence/ops => backends/cadence/hifi/operators}/quantized_layer_norm.cpp (100%) rename {examples/cadence/ops => backends/cadence/hifi/operators}/quantized_linear_out.cpp (100%) rename {examples/cadence/ops => backends/cadence/hifi/operators}/quantized_relu_out.cpp (100%) rename {examples/cadence/third-party/nnlib-hifi4 => backends/cadence/hifi/third-party/nnlib}/license.txt (100%) rename {examples/cadence/third-party/nnlib-hifi4 => backends/cadence/hifi/third-party/nnlib}/matmul_asym8uxasym8u_asym8u.cpp (100%) rename {examples/cadence/third-party/nnlib-hifi4 => backends/cadence/hifi/third-party/nnlib}/matmul_asym8uxasym8u_asym8u_macros.h (100%) rename {examples/cadence/third-party/nnlib-hifi4 => backends/cadence/hifi/third-party/nnlib}/xa_nnlib_matmul_unroll_macros.h (100%) rename {examples => backends}/cadence/utils/gen_header.py (100%) rename {examples => backends}/cadence/utils/post_compilation.py (100%) rename examples/cadence/{tests/rnnt_predictor_quantized_example.py => models/rnnt_predictor.py} (92%) rename examples/cadence/{tests/quantized_conv1d_example.py => operators/quantized_conv1d_op.py} (90%) rename examples/cadence/{tests/quantized_linear_example.py => operators/quantized_linear_op.py} (88%) diff --git a/examples/cadence/CMakeLists.txt b/backends/cadence/CMakeLists.txt similarity index 97% rename from examples/cadence/CMakeLists.txt rename to backends/cadence/CMakeLists.txt index 06f79df8f0..f1d5ccbd2e 100644 --- a/examples/cadence/CMakeLists.txt +++ b/backends/cadence/CMakeLists.txt @@ -94,8 +94,8 @@ set_property( "${CMAKE_CURRENT_LIST_DIR}/../../cmake-out/extension/runner_util/libextension_runner_util.a" ) -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ops) -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels) +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/hifi/operators) +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/hifi/kernels) # Generate the model header file add_custom_command( diff --git a/backends/cadence/README.md b/backends/cadence/README.md new file mode 100644 index 0000000000..867dbe31db --- /dev/null +++ b/backends/cadence/README.md @@ -0,0 +1,30 @@ +# Cadence DSP Backends + +## Supported DSPs (in progress) +- HiFi Audio +- ... + +## Tutorial + +Please follow the [tutorial](https://pytorch.org/executorch/main/build-run-xtensa.html) for more information on how to run models on Cadence/Xtensa DSPs. + +## Directory Structure + +``` +executorch +├── backends +│ └── cadence +│ ├── aot +│ ├── ops_registration +│ ├── tests +│ ├── utils +│ └── hifi +│ ├── kernels +│ ├── operators +│ └── third-party +│ └── nnlib +└── examples + └── cadence + ├── models + └── operators +``` diff --git a/backends/cadence/aot/__init__.py b/backends/cadence/aot/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/examples/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py similarity index 100% rename from examples/cadence/aot/compiler.py rename to backends/cadence/aot/compiler.py diff --git a/examples/cadence/aot/export_example.py b/backends/cadence/aot/export_example.py similarity index 66% rename from examples/cadence/aot/export_example.py rename to backends/cadence/aot/export_example.py index 07783af51d..e95a39b5a3 100644 --- a/examples/cadence/aot/export_example.py +++ b/backends/cadence/aot/export_example.py @@ -8,20 +8,23 @@ import logging -from .meta_registrations import * # noqa +from executorch.backends.cadence.aot.ops_registrations import * # noqa -from torch._export import capture_pre_autograd_graph -from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e - -from ...portable.utils import save_pte_program +import os +from typing import Any, Tuple -from .compiler import export_to_edge -from .quantizer import ( +from executorch.backends.cadence.aot.compiler import export_to_edge +from executorch.backends.cadence.aot.quantizer import ( CadenceBaseQuantizer, QuantFusion, ReplacePT2DequantWithCadenceDequant, ReplacePT2QuantWithCadenceQuant, ) +from executorch.exir import ExecutorchProgramManager +from torch import nn +from torch._export import capture_pre_autograd_graph +from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e + from .utils import print_ops_info @@ -29,7 +32,25 @@ logging.basicConfig(level=logging.INFO, format=FORMAT) -def export_model(model, example_inputs): +def _save_pte_program( + prog: ExecutorchProgramManager, model_name: str, output_dir: str = "" +) -> None: + if model_name.endswith(".pte"): + filename = model_name + else: + filename = os.path.join(output_dir, f"{model_name}.pte") + + try: + with open(filename, "wb") as file: + prog.write_to_file(file) + logging.info(f"Saved exported program to {filename}") + except Exception as e: + logging.error(f"Error while saving to {filename}: {e}") + + +def export_model( + model: nn.Module, example_inputs: Tuple[Any], file_name: str = "CadenceDemoModel" +): # Quantizer quantizer = CadenceBaseQuantizer() @@ -70,5 +91,5 @@ def export_model(model, example_inputs): cadence_prog_manager.exported_program().graph_module, ) - # Save the program as CadenceDemoModel.pte - save_pte_program(exec_prog, "CadenceDemoModel") + # Save the program as (default name is CadenceDemoModel.pte) + _save_pte_program(exec_prog, file_name) diff --git a/examples/cadence/ops/functions.yaml b/backends/cadence/aot/functions.yaml similarity index 100% rename from examples/cadence/ops/functions.yaml rename to backends/cadence/aot/functions.yaml diff --git a/examples/cadence/aot/meta_registrations.py b/backends/cadence/aot/ops_registrations.py similarity index 100% rename from examples/cadence/aot/meta_registrations.py rename to backends/cadence/aot/ops_registrations.py diff --git a/examples/cadence/aot/quantizer.py b/backends/cadence/aot/quantizer.py similarity index 100% rename from examples/cadence/aot/quantizer.py rename to backends/cadence/aot/quantizer.py diff --git a/examples/cadence/aot/utils.py b/backends/cadence/aot/utils.py similarity index 100% rename from examples/cadence/aot/utils.py rename to backends/cadence/aot/utils.py diff --git a/examples/cadence/cadence.cmake b/backends/cadence/cadence.cmake similarity index 100% rename from examples/cadence/cadence.cmake rename to backends/cadence/cadence.cmake diff --git a/examples/cadence/executor_runner.cpp b/backends/cadence/executor_runner.cpp similarity index 99% rename from examples/cadence/executor_runner.cpp rename to backends/cadence/executor_runner.cpp index 7144f84507..0769aeccb7 100644 --- a/examples/cadence/executor_runner.cpp +++ b/backends/cadence/executor_runner.cpp @@ -24,6 +24,7 @@ #include "pin_mux.h" #include +// patternlint-disable executorch-cpp-nostdinc #include #include diff --git a/examples/cadence/kernels/CMakeLists.txt b/backends/cadence/hifi/kernels/CMakeLists.txt similarity index 86% rename from examples/cadence/kernels/CMakeLists.txt rename to backends/cadence/hifi/kernels/CMakeLists.txt index 9bff7a234c..9d4d456d8b 100644 --- a/examples/cadence/kernels/CMakeLists.txt +++ b/backends/cadence/hifi/kernels/CMakeLists.txt @@ -8,7 +8,7 @@ add_library( cadence_kernels kernels.cpp - ${EXECUTORCH_ROOT}/examples/cadence/third-party/nnlib-hifi4/matmul_asym8uxasym8u_asym8u.cpp + ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp ) target_include_directories( diff --git a/examples/cadence/kernels/kernels.cpp b/backends/cadence/hifi/kernels/kernels.cpp similarity index 100% rename from examples/cadence/kernels/kernels.cpp rename to backends/cadence/hifi/kernels/kernels.cpp diff --git a/examples/cadence/kernels/kernels.h b/backends/cadence/hifi/kernels/kernels.h similarity index 100% rename from examples/cadence/kernels/kernels.h rename to backends/cadence/hifi/kernels/kernels.h diff --git a/examples/cadence/ops/CMakeLists.txt b/backends/cadence/hifi/operators/CMakeLists.txt similarity index 69% rename from examples/cadence/ops/CMakeLists.txt rename to backends/cadence/hifi/operators/CMakeLists.txt index 0ddf75d593..4a8e942896 100644 --- a/examples/cadence/ops/CMakeLists.txt +++ b/backends/cadence/hifi/operators/CMakeLists.txt @@ -32,8 +32,7 @@ set(_aten_ops__srcs "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_permute_copy.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/copy_ops_util.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/broadcast_util.cpp" - "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/repeat_util.cpp" -) + "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/repeat_util.cpp") add_library(aten_ops_cadence ${_aten_ops__srcs}) target_link_libraries(aten_ops_cadence PUBLIC executorch) target_link_libraries(aten_ops_cadence PRIVATE cadence_kernels) @@ -41,22 +40,18 @@ target_link_libraries(aten_ops_cadence PRIVATE cadence_kernels) # Let files say "include ". set(_common_include_directories ${EXECUTORCH_ROOT}/..) -target_include_directories( - aten_ops_cadence PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR} - ${_common_include_directories} -) +target_include_directories(aten_ops_cadence PUBLIC ${ROOT_DIR}/.. + ${CMAKE_BINARY_DIR} + ${_common_include_directories}) # Custom ops that are needed to run the test model. add_library( - custom_ops - "quantized_linear_out.cpp" "quantized_conv_out.cpp" "quantized_relu_out.cpp" - "quantized_layer_norm.cpp" "quantize_per_tensor.cpp" - "dequantize_per_tensor.cpp" -) -target_include_directories( - custom_ops PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR} - ${_common_include_directories} -) + custom_ops "quantized_linear_out.cpp" "quantized_conv_out.cpp" + "quantized_relu_out.cpp" "quantized_layer_norm.cpp" + "quantize_per_tensor.cpp" "dequantize_per_tensor.cpp") +target_include_directories(custom_ops PUBLIC ${ROOT_DIR}/.. + ${CMAKE_BINARY_DIR} + ${_common_include_directories}) target_link_libraries(custom_ops PUBLIC executorch) target_link_libraries(custom_ops PRIVATE cadence_kernels) @@ -65,14 +60,15 @@ target_link_libraries(custom_ops PRIVATE cadence_kernels) # Executorch (for runtime). Here select all ops in functions.yaml gen_selected_ops( LIB_NAME "cadence_ops_lib" OPS_SCHEMA_YAML - "${CMAKE_CURRENT_LIST_DIR}/functions.yaml" + "${CMAKE_CURRENT_LIST_DIR}/../../aot/functions.yaml" "" "" ) generate_bindings_for_kernels( - LIB_NAME "cadence_ops_lib" FUNCTIONS_YAML - ${CMAKE_CURRENT_SOURCE_DIR}/functions.yaml + LIB_NAME "cadence_ops_lib" OPS_SCHEMA_YAML + FUNCTIONS_YAML ${CMAKE_CURRENT_SOURCE_DIR}/../../aot/functions.yaml ) message("Generated files ${gen_command_sources}") gen_operators_lib( - LIB_NAME "cadence_ops_lib" KERNEL_LIBS custom_ops DEPS aten_ops_cadence -) + LIB_NAME "cadence_ops_lib" + KERNEL_LIBS custom_ops + DEPS aten_ops_cadence) diff --git a/examples/cadence/ops/dequantize_per_tensor.cpp b/backends/cadence/hifi/operators/dequantize_per_tensor.cpp similarity index 100% rename from examples/cadence/ops/dequantize_per_tensor.cpp rename to backends/cadence/hifi/operators/dequantize_per_tensor.cpp diff --git a/examples/cadence/ops/op_add.cpp b/backends/cadence/hifi/operators/op_add.cpp similarity index 100% rename from examples/cadence/ops/op_add.cpp rename to backends/cadence/hifi/operators/op_add.cpp diff --git a/examples/cadence/ops/op_embedding.cpp b/backends/cadence/hifi/operators/op_embedding.cpp similarity index 100% rename from examples/cadence/ops/op_embedding.cpp rename to backends/cadence/hifi/operators/op_embedding.cpp diff --git a/examples/cadence/ops/op_full.cpp b/backends/cadence/hifi/operators/op_full.cpp similarity index 100% rename from examples/cadence/ops/op_full.cpp rename to backends/cadence/hifi/operators/op_full.cpp diff --git a/examples/cadence/ops/op_view_copy.cpp b/backends/cadence/hifi/operators/op_view_copy.cpp similarity index 100% rename from examples/cadence/ops/op_view_copy.cpp rename to backends/cadence/hifi/operators/op_view_copy.cpp diff --git a/examples/cadence/ops/quantize_per_tensor.cpp b/backends/cadence/hifi/operators/quantize_per_tensor.cpp similarity index 100% rename from examples/cadence/ops/quantize_per_tensor.cpp rename to backends/cadence/hifi/operators/quantize_per_tensor.cpp diff --git a/examples/cadence/ops/quantized_conv_out.cpp b/backends/cadence/hifi/operators/quantized_conv_out.cpp similarity index 100% rename from examples/cadence/ops/quantized_conv_out.cpp rename to backends/cadence/hifi/operators/quantized_conv_out.cpp diff --git a/examples/cadence/ops/quantized_layer_norm.cpp b/backends/cadence/hifi/operators/quantized_layer_norm.cpp similarity index 100% rename from examples/cadence/ops/quantized_layer_norm.cpp rename to backends/cadence/hifi/operators/quantized_layer_norm.cpp diff --git a/examples/cadence/ops/quantized_linear_out.cpp b/backends/cadence/hifi/operators/quantized_linear_out.cpp similarity index 100% rename from examples/cadence/ops/quantized_linear_out.cpp rename to backends/cadence/hifi/operators/quantized_linear_out.cpp diff --git a/examples/cadence/ops/quantized_relu_out.cpp b/backends/cadence/hifi/operators/quantized_relu_out.cpp similarity index 100% rename from examples/cadence/ops/quantized_relu_out.cpp rename to backends/cadence/hifi/operators/quantized_relu_out.cpp diff --git a/examples/cadence/third-party/nnlib-hifi4/license.txt b/backends/cadence/hifi/third-party/nnlib/license.txt similarity index 100% rename from examples/cadence/third-party/nnlib-hifi4/license.txt rename to backends/cadence/hifi/third-party/nnlib/license.txt diff --git a/examples/cadence/third-party/nnlib-hifi4/matmul_asym8uxasym8u_asym8u.cpp b/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp similarity index 100% rename from examples/cadence/third-party/nnlib-hifi4/matmul_asym8uxasym8u_asym8u.cpp rename to backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp diff --git a/examples/cadence/third-party/nnlib-hifi4/matmul_asym8uxasym8u_asym8u_macros.h b/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u_macros.h similarity index 100% rename from examples/cadence/third-party/nnlib-hifi4/matmul_asym8uxasym8u_asym8u_macros.h rename to backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u_macros.h diff --git a/examples/cadence/third-party/nnlib-hifi4/xa_nnlib_matmul_unroll_macros.h b/backends/cadence/hifi/third-party/nnlib/xa_nnlib_matmul_unroll_macros.h similarity index 100% rename from examples/cadence/third-party/nnlib-hifi4/xa_nnlib_matmul_unroll_macros.h rename to backends/cadence/hifi/third-party/nnlib/xa_nnlib_matmul_unroll_macros.h diff --git a/examples/cadence/utils/gen_header.py b/backends/cadence/utils/gen_header.py similarity index 100% rename from examples/cadence/utils/gen_header.py rename to backends/cadence/utils/gen_header.py diff --git a/examples/cadence/utils/post_compilation.py b/backends/cadence/utils/post_compilation.py similarity index 100% rename from examples/cadence/utils/post_compilation.py rename to backends/cadence/utils/post_compilation.py diff --git a/docs/source/build-run-xtensa.md b/docs/source/build-run-xtensa.md index 17fd6049f9..e46f52b682 100644 --- a/docs/source/build-run-xtensa.md +++ b/docs/source/build-run-xtensa.md @@ -64,18 +64,32 @@ Step 2. Make sure you have completed the ExecuTorch setup tutorials linked to at The working tree is: ``` -examples/cadence/ -├── aot -├── kernels -├── ops -├── tests -├── third-party -└── utils +executorch +├── backends +│ └── cadence +│ ├── aot +│ ├── ops_registration +│ ├── tests +│ ├── utils +│ ├── hifi +│ │ ├── kernels +│ │ ├── operators +│ │ └── third-party +│ │ └── hifi4-nnlib +│ └── [other cadence DSP families] +│ ├── kernels +│ ├── operators +│ └── third-party +│ └── [any required lib] +└── examples + └── cadence + ├── models + └── operators ``` ***AoT (Ahead-of-Time) Components***: -The AoT folder contains all of the python scripts and functions needed to export the model to an ExecuTorch `.pte` file. In our case, [export_example.py](https://github.com/pytorch/executorch/blob/main/examples/cadence/aot/export_example.py) is an API that takes a model (nn.Module) and representative inputs and runs it through the quantizer (from [quantizer.py](https://github.com/pytorch/executorch/blob/main/examples/cadence/aot/quantizer.py)). Then a few compiler passes, also defined in [quantizer.py](https://github.com/pytorch/executorch/blob/main/examples/cadence/aot/quantizer.py), will replace operators with custom ones that are supported and optimized on the chip. Any operator needed to compute things should be defined in [meta_registrations.py](https://github.com/pytorch/executorch/blob/main/examples/cadence/aot/meta_registrations.py) and have corresponding implemetations in the other folders. +The AoT folder contains all of the python scripts and functions needed to export the model to an ExecuTorch `.pte` file. In our case, [export_example.py](https://github.com/pytorch/executorch/blob/main/backends/cadence/aot/export_example.py) is an API that takes a model (nn.Module) and representative inputs and runs it through the quantizer (from [quantizer.py](https://github.com/pytorch/executorch/blob/main/backends/cadence/aot/quantizer.py)). Then a few compiler passes, also defined in [quantizer.py](https://github.com/pytorch/executorch/blob/main/backends/cadence/aot/quantizer.py), will replace operators with custom ones that are supported and optimized on the chip. Any operator needed to compute things should be defined in [ops_registrations.py](https://github.com/pytorch/executorch/blob/main/backends/cadence/aot/ops_registrations.py) and have corresponding implemetations in the other folders. ***Operators***: @@ -101,27 +115,27 @@ python3 -m examples.portable.scripts.export --model_name="add" ***Quantized Operators***: The other, more complex model are custom operators, including: - - a quantized [linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) operation. The model is defined [here](https://github.com/pytorch/executorch/blob/main/examples/cadence/tests/quantized_linear_example.py#L28). Linear is the backbone of most Automatic Speech Recognition (ASR) models. - - a quantized [conv1d](https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html) operation. The model is defined [here](https://github.com/pytorch/executorch/blob/main/examples/cadence/tests/quantized_conv1d_example.py#L36). Convolutions are important in wake word and many denoising models. + - a quantized [linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) operation. The model is defined [here](https://github.com/pytorch/executorch/blob/main/examples/cadence/operators/quantized_linear_op.py#L28). Linear is the backbone of most Automatic Speech Recognition (ASR) models. + - a quantized [conv1d](https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html) operation. The model is defined [here](https://github.com/pytorch/executorch/blob/main/examples/cadence/operators/quantized_conv1d_op.py#L36). Convolutions are important in wake word and many denoising models. -In both cases the generated file is called `XtensaDemoModel.pte`. +In both cases the generated file is called `CadenceDemoModel.pte`. ```bash cd executorch -python3 -m examples.cadence.tests.quantized__example +python3 -m examples.cadence.operators.quantized__op ``` ***Small Model: RNNT predictor***: The torchaudio [RNNT-emformer](https://pytorch.org/audio/stable/tutorials/online_asr_tutorial.html) model is an Automatic Speech Recognition (ASR) model, comprised of three different submodels: an encoder, a predictor and a joiner. -The predictor is a sequence of basic ops (embedding, ReLU, linear, layer norm) and can be exported using: +The [predictor](https://github.com/pytorch/executorch/blob/main/examples/cadence/models/rnnt_predictor.py) is a sequence of basic ops (embedding, ReLU, linear, layer norm) and can be exported using: ```bash cd executorch -python3 -m examples.cadence.tests.rnnt_predictor_quantized_example +python3 -m examples.cadence.models.rnnt_predictor ``` -The generated file is called `XtensaDemoModel.pte`. +The generated file is called `CadenceDemoModel.pte`. ### Runtime @@ -150,7 +164,7 @@ In order to run the CMake build, you need the path to the following: cd executorch rm -rf cmake-out # prebuild and install executorch library -cmake -DCMAKE_TOOLCHAIN_FILE=/examples/cadence/cadence.cmake \ +cmake -DCMAKE_TOOLCHAIN_FILE=/backends/cadence/cadence.cmake \ -DCMAKE_INSTALL_PREFIX=cmake-out \ -DCMAKE_BUILD_TYPE=Debug \ -DPYTHON_EXECUTABLE=python3 \ @@ -163,10 +177,10 @@ cmake -DCMAKE_TOOLCHAIN_FILE=/examples/cadence/cadence.cmake -DFLATC_EXECUTABLE="$(which flatc)" \ -Bcmake-out . -cmake --build cmake-out -j8 --target install --config Debug +cmake --build cmake-out -j --target install --config Debug # build cadence runner cmake -DCMAKE_BUILD_TYPE=Debug \ - -DCMAKE_TOOLCHAIN_FILE=/examples/cadence/cadence.cmake \ + -DCMAKE_TOOLCHAIN_FILE=/examples/backends/cadence.cmake \ -DCMAKE_PREFIX_PATH=/cmake-out \ -DMODEL_PATH= \ -DNXP_SDK_ROOT_DIR= -DEXECUTORCH_BUILD_FLATC=0 \ @@ -212,6 +226,6 @@ First 20 elements of output 0 In this tutorial, you have learned how to export a quantized operation, build the ExecuTorch runtime and run this model on the Xtensa HiFi4 DSP chip. -The (quantized linear) model in this tutorial is a typical operation appearing in ASR models, and can be extended to a complete ASR model by creating the model as a new test and adding the needed operators/kernels to [operators](https://github.com/pytorch/executorch/blob/main/examples/cadence/ops) and [kernels](https://github.com/pytorch/executorch/blob/main/examples/cadence/kernels). +The (quantized linear) model in this tutorial is a typical operation appearing in ASR models, and can be extended to a complete ASR model by creating the model as a new test and adding the needed operators/kernels to [operators](https://github.com/pytorch/executorch/blob/main/backends/cadence/hifi/operators) and [kernels](https://github.com/pytorch/executorch/blob/main/backends/cadence/hifi/kernels). Other models can be created following the same structure, always assuming that operators and kernels are available. diff --git a/examples/cadence/tests/rnnt_predictor_quantized_example.py b/examples/cadence/models/rnnt_predictor.py similarity index 92% rename from examples/cadence/tests/rnnt_predictor_quantized_example.py rename to examples/cadence/models/rnnt_predictor.py index fd94f48f88..d83f593d3e 100644 --- a/examples/cadence/tests/rnnt_predictor_quantized_example.py +++ b/examples/cadence/models/rnnt_predictor.py @@ -10,11 +10,11 @@ import torch -from ..aot.meta_registrations import * # noqa +from executorch.backends.cadence.aot.ops_registrations import * # noqa from typing import Tuple -from ..aot.export_example import export_model +from executorch.backends.cadence.aot.export_example import export_model FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s" diff --git a/examples/cadence/tests/quantized_conv1d_example.py b/examples/cadence/operators/quantized_conv1d_op.py similarity index 90% rename from examples/cadence/tests/quantized_conv1d_example.py rename to examples/cadence/operators/quantized_conv1d_op.py index 47ec4e9b66..3247cb690d 100644 --- a/examples/cadence/tests/quantized_conv1d_example.py +++ b/examples/cadence/operators/quantized_conv1d_op.py @@ -8,11 +8,11 @@ import logging -from ..aot.meta_registrations import * # noqa - import torch -from ..aot.export_example import export_model +from executorch.backends.cadence.aot.ops_registrations import * # noqa + +from executorch.backends.cadence.aot.export_example import export_model FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s" diff --git a/examples/cadence/tests/quantized_linear_example.py b/examples/cadence/operators/quantized_linear_op.py similarity index 88% rename from examples/cadence/tests/quantized_linear_example.py rename to examples/cadence/operators/quantized_linear_op.py index 916b684173..583359cdc6 100644 --- a/examples/cadence/tests/quantized_linear_example.py +++ b/examples/cadence/operators/quantized_linear_op.py @@ -8,11 +8,11 @@ import logging -from ..aot.meta_registrations import * # noqa - import torch -from ..aot.export_example import export_model +from executorch.backends.cadence.aot.ops_registrations import * # noqa + +from executorch.backends.cadence.aot.export_example import export_model FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"