From c001f597f571dcbc06273367c683b716203f10c3 Mon Sep 17 00:00:00 2001
From: Matthias Cremon <matthiascremon@meta.com>
Date: Tue, 7 May 2024 12:01:39 -0700
Subject: [PATCH] Cadence - Move primary code to backends folder (#3353)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
Pull Request resolved: https://github.com/pytorch/executorch/pull/3353

See design and discussion in https://docs.google.com/document/d/1HPDTbN07WXB9PCdezmvSs_0Yy89D57F1pHHAF9UAgGg/edit#heading=h.828btb3wp67h.

Previous folder structure:
```
executorch
└── examples
    ├── aot
    ├── kernels
    ├── ops
    ├── tests
    ├── third-party/hifi4-nnlib
    └── utils
```

New folder structure:
```
executorch
├── backends
│   └── cadence
│       ├── aot
│       ├── ops_registration
│       ├── tests
│       ├── utils
│       ├── hifi
│       │   ├── kernels
│       │   ├── operators
│       │   └── third-party
│       │       └── nnlib
│       └── [other cadence DSP families]
│           ├── kernels
│           ├── operators
│           └── third-party
│               └── [any required lib]
└── examples
    └── cadence
        ├── models
        └── operators
```

Reviewed By: tarun292, cccclai

Differential Revision: D56577399

fbshipit-source-id: a19d7d689b286c0da2ef533a17e5e66ee1eb8a26
---
 {examples => backends}/cadence/CMakeLists.txt |  4 +-
 backends/cadence/README.md                    | 30 +++++++++++
 backends/cadence/aot/__init__.py              |  0
 .../cadence/aot/compiler.py                   |  0
 .../cadence/aot/export_example.py             | 41 +++++++++++----
 .../cadence/aot}/functions.yaml               |  0
 .../cadence/aot/ops_registrations.py          |  0
 .../cadence/aot/quantizer.py                  |  0
 {examples => backends}/cadence/aot/utils.py   |  0
 {examples => backends}/cadence/cadence.cmake  |  0
 .../cadence/executor_runner.cpp               |  1 +
 .../cadence/hifi}/kernels/CMakeLists.txt      |  2 +-
 .../cadence/hifi}/kernels/kernels.cpp         |  0
 .../cadence/hifi}/kernels/kernels.h           |  0
 .../cadence/hifi/operators}/CMakeLists.txt    | 36 ++++++-------
 .../hifi/operators}/dequantize_per_tensor.cpp |  0
 .../cadence/hifi/operators}/op_add.cpp        |  0
 .../cadence/hifi/operators}/op_embedding.cpp  |  0
 .../cadence/hifi/operators}/op_full.cpp       |  0
 .../cadence/hifi/operators}/op_view_copy.cpp  |  0
 .../hifi/operators}/quantize_per_tensor.cpp   |  0
 .../hifi/operators}/quantized_conv_out.cpp    |  0
 .../hifi/operators}/quantized_layer_norm.cpp  |  0
 .../hifi/operators}/quantized_linear_out.cpp  |  0
 .../hifi/operators}/quantized_relu_out.cpp    |  0
 .../hifi/third-party/nnlib}/license.txt       |  0
 .../nnlib}/matmul_asym8uxasym8u_asym8u.cpp    |  0
 .../matmul_asym8uxasym8u_asym8u_macros.h      |  0
 .../nnlib}/xa_nnlib_matmul_unroll_macros.h    |  0
 .../cadence/utils/gen_header.py               |  0
 .../cadence/utils/post_compilation.py         |  0
 docs/source/build-run-xtensa.md               | 52 ++++++++++++-------
 .../rnnt_predictor.py}                        |  4 +-
 .../quantized_conv1d_op.py}                   |  6 +--
 .../quantized_linear_op.py}                   |  6 +--
 35 files changed, 122 insertions(+), 60 deletions(-)
 rename {examples => backends}/cadence/CMakeLists.txt (97%)
 create mode 100644 backends/cadence/README.md
 create mode 100644 backends/cadence/aot/__init__.py
 rename {examples => backends}/cadence/aot/compiler.py (100%)
 rename {examples => backends}/cadence/aot/export_example.py (66%)
 rename {examples/cadence/ops => backends/cadence/aot}/functions.yaml (100%)
 rename examples/cadence/aot/meta_registrations.py => backends/cadence/aot/ops_registrations.py (100%)
 rename {examples => backends}/cadence/aot/quantizer.py (100%)
 rename {examples => backends}/cadence/aot/utils.py (100%)
 rename {examples => backends}/cadence/cadence.cmake (100%)
 rename {examples => backends}/cadence/executor_runner.cpp (99%)
 rename {examples/cadence => backends/cadence/hifi}/kernels/CMakeLists.txt (86%)
 rename {examples/cadence => backends/cadence/hifi}/kernels/kernels.cpp (100%)
 rename {examples/cadence => backends/cadence/hifi}/kernels/kernels.h (100%)
 rename {examples/cadence/ops => backends/cadence/hifi/operators}/CMakeLists.txt (69%)
 rename {examples/cadence/ops => backends/cadence/hifi/operators}/dequantize_per_tensor.cpp (100%)
 rename {examples/cadence/ops => backends/cadence/hifi/operators}/op_add.cpp (100%)
 rename {examples/cadence/ops => backends/cadence/hifi/operators}/op_embedding.cpp (100%)
 rename {examples/cadence/ops => backends/cadence/hifi/operators}/op_full.cpp (100%)
 rename {examples/cadence/ops => backends/cadence/hifi/operators}/op_view_copy.cpp (100%)
 rename {examples/cadence/ops => backends/cadence/hifi/operators}/quantize_per_tensor.cpp (100%)
 rename {examples/cadence/ops => backends/cadence/hifi/operators}/quantized_conv_out.cpp (100%)
 rename {examples/cadence/ops => backends/cadence/hifi/operators}/quantized_layer_norm.cpp (100%)
 rename {examples/cadence/ops => backends/cadence/hifi/operators}/quantized_linear_out.cpp (100%)
 rename {examples/cadence/ops => backends/cadence/hifi/operators}/quantized_relu_out.cpp (100%)
 rename {examples/cadence/third-party/nnlib-hifi4 => backends/cadence/hifi/third-party/nnlib}/license.txt (100%)
 rename {examples/cadence/third-party/nnlib-hifi4 => backends/cadence/hifi/third-party/nnlib}/matmul_asym8uxasym8u_asym8u.cpp (100%)
 rename {examples/cadence/third-party/nnlib-hifi4 => backends/cadence/hifi/third-party/nnlib}/matmul_asym8uxasym8u_asym8u_macros.h (100%)
 rename {examples/cadence/third-party/nnlib-hifi4 => backends/cadence/hifi/third-party/nnlib}/xa_nnlib_matmul_unroll_macros.h (100%)
 rename {examples => backends}/cadence/utils/gen_header.py (100%)
 rename {examples => backends}/cadence/utils/post_compilation.py (100%)
 rename examples/cadence/{tests/rnnt_predictor_quantized_example.py => models/rnnt_predictor.py} (92%)
 rename examples/cadence/{tests/quantized_conv1d_example.py => operators/quantized_conv1d_op.py} (90%)
 rename examples/cadence/{tests/quantized_linear_example.py => operators/quantized_linear_op.py} (88%)

diff --git a/examples/cadence/CMakeLists.txt b/backends/cadence/CMakeLists.txt
similarity index 97%
rename from examples/cadence/CMakeLists.txt
rename to backends/cadence/CMakeLists.txt
index 06f79df8f0..f1d5ccbd2e 100644
--- a/examples/cadence/CMakeLists.txt
+++ b/backends/cadence/CMakeLists.txt
@@ -94,8 +94,8 @@ set_property(
     "${CMAKE_CURRENT_LIST_DIR}/../../cmake-out/extension/runner_util/libextension_runner_util.a"
 )
 
-add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ops)
-add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels)
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/hifi/operators)
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/hifi/kernels)
 
 # Generate the model header file
 add_custom_command(
diff --git a/backends/cadence/README.md b/backends/cadence/README.md
new file mode 100644
index 0000000000..867dbe31db
--- /dev/null
+++ b/backends/cadence/README.md
@@ -0,0 +1,30 @@
+# Cadence DSP Backends
+
+## Supported DSPs (in progress)
+- HiFi Audio
+- ...
+
+## Tutorial
+
+Please follow the [tutorial](https://pytorch.org/executorch/main/build-run-xtensa.html) for more information on how to run models on Cadence/Xtensa DSPs.
+
+## Directory Structure
+
+```
+executorch
+├── backends
+│   └── cadence
+│       ├── aot
+│       ├── ops_registration
+│       ├── tests
+│       ├── utils
+│       └── hifi
+│           ├── kernels
+│           ├── operators
+│           └── third-party
+│               └── nnlib
+└── examples
+    └── cadence
+        ├── models
+        └── operators
+```
diff --git a/backends/cadence/aot/__init__.py b/backends/cadence/aot/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/examples/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py
similarity index 100%
rename from examples/cadence/aot/compiler.py
rename to backends/cadence/aot/compiler.py
diff --git a/examples/cadence/aot/export_example.py b/backends/cadence/aot/export_example.py
similarity index 66%
rename from examples/cadence/aot/export_example.py
rename to backends/cadence/aot/export_example.py
index 07783af51d..e95a39b5a3 100644
--- a/examples/cadence/aot/export_example.py
+++ b/backends/cadence/aot/export_example.py
@@ -8,20 +8,23 @@
 
 import logging
 
-from .meta_registrations import *  # noqa
+from executorch.backends.cadence.aot.ops_registrations import *  # noqa
 
-from torch._export import capture_pre_autograd_graph
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
-
-from ...portable.utils import save_pte_program
+import os
+from typing import Any, Tuple
 
-from .compiler import export_to_edge
-from .quantizer import (
+from executorch.backends.cadence.aot.compiler import export_to_edge
+from executorch.backends.cadence.aot.quantizer import (
     CadenceBaseQuantizer,
     QuantFusion,
     ReplacePT2DequantWithCadenceDequant,
     ReplacePT2QuantWithCadenceQuant,
 )
+from executorch.exir import ExecutorchProgramManager
+from torch import nn
+from torch._export import capture_pre_autograd_graph
+from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
+
 from .utils import print_ops_info
 
 
@@ -29,7 +32,25 @@
 logging.basicConfig(level=logging.INFO, format=FORMAT)
 
 
-def export_model(model, example_inputs):
+def _save_pte_program(
+    prog: ExecutorchProgramManager, model_name: str, output_dir: str = ""
+) -> None:
+    if model_name.endswith(".pte"):
+        filename = model_name
+    else:
+        filename = os.path.join(output_dir, f"{model_name}.pte")
+
+    try:
+        with open(filename, "wb") as file:
+            prog.write_to_file(file)
+            logging.info(f"Saved exported program to {filename}")
+    except Exception as e:
+        logging.error(f"Error while saving to {filename}: {e}")
+
+
+def export_model(
+    model: nn.Module, example_inputs: Tuple[Any], file_name: str = "CadenceDemoModel"
+):
     # Quantizer
     quantizer = CadenceBaseQuantizer()
 
@@ -70,5 +91,5 @@ def export_model(model, example_inputs):
         cadence_prog_manager.exported_program().graph_module,
     )
 
-    # Save the program as CadenceDemoModel.pte
-    save_pte_program(exec_prog, "CadenceDemoModel")
+    # Save the program as (default name is CadenceDemoModel.pte)
+    _save_pte_program(exec_prog, file_name)
diff --git a/examples/cadence/ops/functions.yaml b/backends/cadence/aot/functions.yaml
similarity index 100%
rename from examples/cadence/ops/functions.yaml
rename to backends/cadence/aot/functions.yaml
diff --git a/examples/cadence/aot/meta_registrations.py b/backends/cadence/aot/ops_registrations.py
similarity index 100%
rename from examples/cadence/aot/meta_registrations.py
rename to backends/cadence/aot/ops_registrations.py
diff --git a/examples/cadence/aot/quantizer.py b/backends/cadence/aot/quantizer.py
similarity index 100%
rename from examples/cadence/aot/quantizer.py
rename to backends/cadence/aot/quantizer.py
diff --git a/examples/cadence/aot/utils.py b/backends/cadence/aot/utils.py
similarity index 100%
rename from examples/cadence/aot/utils.py
rename to backends/cadence/aot/utils.py
diff --git a/examples/cadence/cadence.cmake b/backends/cadence/cadence.cmake
similarity index 100%
rename from examples/cadence/cadence.cmake
rename to backends/cadence/cadence.cmake
diff --git a/examples/cadence/executor_runner.cpp b/backends/cadence/executor_runner.cpp
similarity index 99%
rename from examples/cadence/executor_runner.cpp
rename to backends/cadence/executor_runner.cpp
index 7144f84507..0769aeccb7 100644
--- a/examples/cadence/executor_runner.cpp
+++ b/backends/cadence/executor_runner.cpp
@@ -24,6 +24,7 @@
 #include "pin_mux.h"
 
 #include <memory>
+// patternlint-disable executorch-cpp-nostdinc
 #include <vector>
 
 #include <executorch/extension/data_loader/buffer_data_loader.h>
diff --git a/examples/cadence/kernels/CMakeLists.txt b/backends/cadence/hifi/kernels/CMakeLists.txt
similarity index 86%
rename from examples/cadence/kernels/CMakeLists.txt
rename to backends/cadence/hifi/kernels/CMakeLists.txt
index 9bff7a234c..9d4d456d8b 100644
--- a/examples/cadence/kernels/CMakeLists.txt
+++ b/backends/cadence/hifi/kernels/CMakeLists.txt
@@ -8,7 +8,7 @@
 add_library(
   cadence_kernels
   kernels.cpp
-  ${EXECUTORCH_ROOT}/examples/cadence/third-party/nnlib-hifi4/matmul_asym8uxasym8u_asym8u.cpp
+  ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp
 )
 
 target_include_directories(
diff --git a/examples/cadence/kernels/kernels.cpp b/backends/cadence/hifi/kernels/kernels.cpp
similarity index 100%
rename from examples/cadence/kernels/kernels.cpp
rename to backends/cadence/hifi/kernels/kernels.cpp
diff --git a/examples/cadence/kernels/kernels.h b/backends/cadence/hifi/kernels/kernels.h
similarity index 100%
rename from examples/cadence/kernels/kernels.h
rename to backends/cadence/hifi/kernels/kernels.h
diff --git a/examples/cadence/ops/CMakeLists.txt b/backends/cadence/hifi/operators/CMakeLists.txt
similarity index 69%
rename from examples/cadence/ops/CMakeLists.txt
rename to backends/cadence/hifi/operators/CMakeLists.txt
index 0ddf75d593..4a8e942896 100644
--- a/examples/cadence/ops/CMakeLists.txt
+++ b/backends/cadence/hifi/operators/CMakeLists.txt
@@ -32,8 +32,7 @@ set(_aten_ops__srcs
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_permute_copy.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/copy_ops_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/broadcast_util.cpp"
-    "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/repeat_util.cpp"
-)
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/repeat_util.cpp")
 add_library(aten_ops_cadence ${_aten_ops__srcs})
 target_link_libraries(aten_ops_cadence PUBLIC executorch)
 target_link_libraries(aten_ops_cadence PRIVATE cadence_kernels)
@@ -41,22 +40,18 @@ target_link_libraries(aten_ops_cadence PRIVATE cadence_kernels)
 # Let files say "include <executorch/path/to/header.h>".
 set(_common_include_directories ${EXECUTORCH_ROOT}/..)
 
-target_include_directories(
-  aten_ops_cadence PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR}
-                          ${_common_include_directories}
-)
+target_include_directories(aten_ops_cadence PUBLIC ${ROOT_DIR}/..
+                                                ${CMAKE_BINARY_DIR}
+                                                ${_common_include_directories})
 
 # Custom ops that are needed to run the test model.
 add_library(
-  custom_ops
-  "quantized_linear_out.cpp" "quantized_conv_out.cpp" "quantized_relu_out.cpp"
-  "quantized_layer_norm.cpp" "quantize_per_tensor.cpp"
-  "dequantize_per_tensor.cpp"
-)
-target_include_directories(
-  custom_ops PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR}
-                    ${_common_include_directories}
-)
+  custom_ops "quantized_linear_out.cpp" "quantized_conv_out.cpp"
+  "quantized_relu_out.cpp" "quantized_layer_norm.cpp"
+  "quantize_per_tensor.cpp" "dequantize_per_tensor.cpp")
+target_include_directories(custom_ops PUBLIC ${ROOT_DIR}/..
+                                             ${CMAKE_BINARY_DIR}
+                                             ${_common_include_directories})
 
 target_link_libraries(custom_ops PUBLIC executorch)
 target_link_libraries(custom_ops PRIVATE cadence_kernels)
@@ -65,14 +60,15 @@ target_link_libraries(custom_ops PRIVATE cadence_kernels)
 # Executorch (for runtime). Here select all ops in functions.yaml
 gen_selected_ops(
   LIB_NAME "cadence_ops_lib" OPS_SCHEMA_YAML
-  "${CMAKE_CURRENT_LIST_DIR}/functions.yaml"
+  "${CMAKE_CURRENT_LIST_DIR}/../../aot/functions.yaml" "" ""
 )
 generate_bindings_for_kernels(
-  LIB_NAME "cadence_ops_lib" FUNCTIONS_YAML
-  ${CMAKE_CURRENT_SOURCE_DIR}/functions.yaml
+  LIB_NAME "cadence_ops_lib" OPS_SCHEMA_YAML
+  FUNCTIONS_YAML ${CMAKE_CURRENT_SOURCE_DIR}/../../aot/functions.yaml
 )
 message("Generated files ${gen_command_sources}")
 
 gen_operators_lib(
-  LIB_NAME "cadence_ops_lib" KERNEL_LIBS custom_ops DEPS aten_ops_cadence
-)
+  LIB_NAME "cadence_ops_lib"
+  KERNEL_LIBS custom_ops
+  DEPS aten_ops_cadence)
diff --git a/examples/cadence/ops/dequantize_per_tensor.cpp b/backends/cadence/hifi/operators/dequantize_per_tensor.cpp
similarity index 100%
rename from examples/cadence/ops/dequantize_per_tensor.cpp
rename to backends/cadence/hifi/operators/dequantize_per_tensor.cpp
diff --git a/examples/cadence/ops/op_add.cpp b/backends/cadence/hifi/operators/op_add.cpp
similarity index 100%
rename from examples/cadence/ops/op_add.cpp
rename to backends/cadence/hifi/operators/op_add.cpp
diff --git a/examples/cadence/ops/op_embedding.cpp b/backends/cadence/hifi/operators/op_embedding.cpp
similarity index 100%
rename from examples/cadence/ops/op_embedding.cpp
rename to backends/cadence/hifi/operators/op_embedding.cpp
diff --git a/examples/cadence/ops/op_full.cpp b/backends/cadence/hifi/operators/op_full.cpp
similarity index 100%
rename from examples/cadence/ops/op_full.cpp
rename to backends/cadence/hifi/operators/op_full.cpp
diff --git a/examples/cadence/ops/op_view_copy.cpp b/backends/cadence/hifi/operators/op_view_copy.cpp
similarity index 100%
rename from examples/cadence/ops/op_view_copy.cpp
rename to backends/cadence/hifi/operators/op_view_copy.cpp
diff --git a/examples/cadence/ops/quantize_per_tensor.cpp b/backends/cadence/hifi/operators/quantize_per_tensor.cpp
similarity index 100%
rename from examples/cadence/ops/quantize_per_tensor.cpp
rename to backends/cadence/hifi/operators/quantize_per_tensor.cpp
diff --git a/examples/cadence/ops/quantized_conv_out.cpp b/backends/cadence/hifi/operators/quantized_conv_out.cpp
similarity index 100%
rename from examples/cadence/ops/quantized_conv_out.cpp
rename to backends/cadence/hifi/operators/quantized_conv_out.cpp
diff --git a/examples/cadence/ops/quantized_layer_norm.cpp b/backends/cadence/hifi/operators/quantized_layer_norm.cpp
similarity index 100%
rename from examples/cadence/ops/quantized_layer_norm.cpp
rename to backends/cadence/hifi/operators/quantized_layer_norm.cpp
diff --git a/examples/cadence/ops/quantized_linear_out.cpp b/backends/cadence/hifi/operators/quantized_linear_out.cpp
similarity index 100%
rename from examples/cadence/ops/quantized_linear_out.cpp
rename to backends/cadence/hifi/operators/quantized_linear_out.cpp
diff --git a/examples/cadence/ops/quantized_relu_out.cpp b/backends/cadence/hifi/operators/quantized_relu_out.cpp
similarity index 100%
rename from examples/cadence/ops/quantized_relu_out.cpp
rename to backends/cadence/hifi/operators/quantized_relu_out.cpp
diff --git a/examples/cadence/third-party/nnlib-hifi4/license.txt b/backends/cadence/hifi/third-party/nnlib/license.txt
similarity index 100%
rename from examples/cadence/third-party/nnlib-hifi4/license.txt
rename to backends/cadence/hifi/third-party/nnlib/license.txt
diff --git a/examples/cadence/third-party/nnlib-hifi4/matmul_asym8uxasym8u_asym8u.cpp b/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp
similarity index 100%
rename from examples/cadence/third-party/nnlib-hifi4/matmul_asym8uxasym8u_asym8u.cpp
rename to backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp
diff --git a/examples/cadence/third-party/nnlib-hifi4/matmul_asym8uxasym8u_asym8u_macros.h b/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u_macros.h
similarity index 100%
rename from examples/cadence/third-party/nnlib-hifi4/matmul_asym8uxasym8u_asym8u_macros.h
rename to backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u_macros.h
diff --git a/examples/cadence/third-party/nnlib-hifi4/xa_nnlib_matmul_unroll_macros.h b/backends/cadence/hifi/third-party/nnlib/xa_nnlib_matmul_unroll_macros.h
similarity index 100%
rename from examples/cadence/third-party/nnlib-hifi4/xa_nnlib_matmul_unroll_macros.h
rename to backends/cadence/hifi/third-party/nnlib/xa_nnlib_matmul_unroll_macros.h
diff --git a/examples/cadence/utils/gen_header.py b/backends/cadence/utils/gen_header.py
similarity index 100%
rename from examples/cadence/utils/gen_header.py
rename to backends/cadence/utils/gen_header.py
diff --git a/examples/cadence/utils/post_compilation.py b/backends/cadence/utils/post_compilation.py
similarity index 100%
rename from examples/cadence/utils/post_compilation.py
rename to backends/cadence/utils/post_compilation.py
diff --git a/docs/source/build-run-xtensa.md b/docs/source/build-run-xtensa.md
index 17fd6049f9..e46f52b682 100644
--- a/docs/source/build-run-xtensa.md
+++ b/docs/source/build-run-xtensa.md
@@ -64,18 +64,32 @@ Step 2. Make sure you have completed the ExecuTorch setup tutorials linked to at
 The working tree is:
 
 ```
-examples/cadence/
-├── aot
-├── kernels
-├── ops
-├── tests
-├── third-party
-└── utils
+executorch
+├── backends
+│   └── cadence
+│       ├── aot
+│       ├── ops_registration
+│       ├── tests
+│       ├── utils
+│       ├── hifi
+│       │   ├── kernels
+│       │   ├── operators
+│       │   └── third-party
+│       │       └── hifi4-nnlib
+│       └── [other cadence DSP families]
+│           ├── kernels
+│           ├── operators
+│           └── third-party
+│               └── [any required lib]
+└── examples
+    └── cadence
+        ├── models
+        └── operators
 ```
 
 ***AoT (Ahead-of-Time) Components***:
 
-The AoT folder contains all of the python scripts and functions needed to export the model to an ExecuTorch `.pte` file. In our case, [export_example.py](https://github.com/pytorch/executorch/blob/main/examples/cadence/aot/export_example.py) is an API that takes a model (nn.Module) and representative inputs and runs it through the quantizer (from [quantizer.py](https://github.com/pytorch/executorch/blob/main/examples/cadence/aot/quantizer.py)). Then a few compiler passes, also defined in [quantizer.py](https://github.com/pytorch/executorch/blob/main/examples/cadence/aot/quantizer.py), will replace operators with custom ones that are supported and optimized on the chip. Any operator needed to compute things should be defined in [meta_registrations.py](https://github.com/pytorch/executorch/blob/main/examples/cadence/aot/meta_registrations.py) and have corresponding implemetations in the other folders.
+The AoT folder contains all of the python scripts and functions needed to export the model to an ExecuTorch `.pte` file. In our case, [export_example.py](https://github.com/pytorch/executorch/blob/main/backends/cadence/aot/export_example.py) is an API that takes a model (nn.Module) and representative inputs and runs it through the quantizer (from [quantizer.py](https://github.com/pytorch/executorch/blob/main/backends/cadence/aot/quantizer.py)). Then a few compiler passes, also defined in [quantizer.py](https://github.com/pytorch/executorch/blob/main/backends/cadence/aot/quantizer.py), will replace operators with custom ones that are supported and optimized on the chip. Any operator needed to compute things should be defined in [ops_registrations.py](https://github.com/pytorch/executorch/blob/main/backends/cadence/aot/ops_registrations.py) and have corresponding implemetations in the other folders.
 
 ***Operators***:
 
@@ -101,27 +115,27 @@ python3 -m examples.portable.scripts.export --model_name="add"
 ***Quantized Operators***:
 
 The other, more complex model are custom operators, including:
-  - a quantized [linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) operation. The model is defined [here](https://github.com/pytorch/executorch/blob/main/examples/cadence/tests/quantized_linear_example.py#L28). Linear is the backbone of most Automatic Speech Recognition (ASR) models.
-  - a quantized [conv1d](https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html) operation. The model is defined [here](https://github.com/pytorch/executorch/blob/main/examples/cadence/tests/quantized_conv1d_example.py#L36). Convolutions are important in wake word and many denoising models.
+  - a quantized [linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) operation. The model is defined [here](https://github.com/pytorch/executorch/blob/main/examples/cadence/operators/quantized_linear_op.py#L28). Linear is the backbone of most Automatic Speech Recognition (ASR) models.
+  - a quantized [conv1d](https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html) operation. The model is defined [here](https://github.com/pytorch/executorch/blob/main/examples/cadence/operators/quantized_conv1d_op.py#L36). Convolutions are important in wake word and many denoising models.
 
-In both cases the generated file is called `XtensaDemoModel.pte`.
+In both cases the generated file is called `CadenceDemoModel.pte`.
 
 ```bash
 cd executorch
-python3 -m examples.cadence.tests.quantized_<linear,conv1d>_example
+python3 -m examples.cadence.operators.quantized_<linear,conv1d>_op
 ```
 
 ***Small Model: RNNT predictor***:
 
 The torchaudio [RNNT-emformer](https://pytorch.org/audio/stable/tutorials/online_asr_tutorial.html) model is an Automatic Speech Recognition (ASR) model, comprised of three different submodels: an encoder, a predictor and a joiner.
-The predictor is a sequence of basic ops (embedding, ReLU, linear, layer norm) and can be exported using:
+The [predictor](https://github.com/pytorch/executorch/blob/main/examples/cadence/models/rnnt_predictor.py) is a sequence of basic ops (embedding, ReLU, linear, layer norm) and can be exported using:
 
 ```bash
 cd executorch
-python3 -m examples.cadence.tests.rnnt_predictor_quantized_example
+python3 -m examples.cadence.models.rnnt_predictor
 ```
 
-The generated file is called `XtensaDemoModel.pte`.
+The generated file is called `CadenceDemoModel.pte`.
 
 ### Runtime
 
@@ -150,7 +164,7 @@ In order to run the CMake build, you need the path to the following:
 cd executorch
 rm -rf cmake-out
 # prebuild and install executorch library
-cmake -DCMAKE_TOOLCHAIN_FILE=<path_to_executorch>/examples/cadence/cadence.cmake \
+cmake -DCMAKE_TOOLCHAIN_FILE=<path_to_executorch>/backends/cadence/cadence.cmake \
     -DCMAKE_INSTALL_PREFIX=cmake-out \
     -DCMAKE_BUILD_TYPE=Debug \
     -DPYTHON_EXECUTABLE=python3 \
@@ -163,10 +177,10 @@ cmake -DCMAKE_TOOLCHAIN_FILE=<path_to_executorch>/examples/cadence/cadence.cmake
     -DFLATC_EXECUTABLE="$(which flatc)" \
     -Bcmake-out .
 
-cmake --build cmake-out -j8 --target install --config Debug
+cmake --build cmake-out -j<num_cores> --target install --config Debug
 # build cadence runner
 cmake -DCMAKE_BUILD_TYPE=Debug \
-    -DCMAKE_TOOLCHAIN_FILE=<path_to_executorch>/examples/cadence/cadence.cmake \
+    -DCMAKE_TOOLCHAIN_FILE=<path_to_executorch>/examples/backends/cadence.cmake \
     -DCMAKE_PREFIX_PATH=<path_to_executorch>/cmake-out \
     -DMODEL_PATH=<path_to_program_file_generated_in_previous_step> \
     -DNXP_SDK_ROOT_DIR=<path_to_nxp_sdk_root> -DEXECUTORCH_BUILD_FLATC=0 \
@@ -212,6 +226,6 @@ First 20 elements of output 0
 
 In this tutorial, you have learned how to export a quantized operation, build the ExecuTorch runtime and run this model on the Xtensa HiFi4 DSP chip.
 
-The (quantized linear) model in this tutorial is a typical operation appearing in ASR models, and can be extended to a complete ASR model by creating the model as a new test and adding the needed operators/kernels to [operators](https://github.com/pytorch/executorch/blob/main/examples/cadence/ops) and [kernels](https://github.com/pytorch/executorch/blob/main/examples/cadence/kernels).
+The (quantized linear) model in this tutorial is a typical operation appearing in ASR models, and can be extended to a complete ASR model by creating the model as a new test and adding the needed operators/kernels to [operators](https://github.com/pytorch/executorch/blob/main/backends/cadence/hifi/operators) and [kernels](https://github.com/pytorch/executorch/blob/main/backends/cadence/hifi/kernels).
 
 Other models can be created following the same structure, always assuming that operators and kernels are available.
diff --git a/examples/cadence/tests/rnnt_predictor_quantized_example.py b/examples/cadence/models/rnnt_predictor.py
similarity index 92%
rename from examples/cadence/tests/rnnt_predictor_quantized_example.py
rename to examples/cadence/models/rnnt_predictor.py
index fd94f48f88..d83f593d3e 100644
--- a/examples/cadence/tests/rnnt_predictor_quantized_example.py
+++ b/examples/cadence/models/rnnt_predictor.py
@@ -10,11 +10,11 @@
 
 import torch
 
-from ..aot.meta_registrations import *  # noqa
+from executorch.backends.cadence.aot.ops_registrations import *  # noqa
 
 from typing import Tuple
 
-from ..aot.export_example import export_model
+from executorch.backends.cadence.aot.export_example import export_model
 
 
 FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
diff --git a/examples/cadence/tests/quantized_conv1d_example.py b/examples/cadence/operators/quantized_conv1d_op.py
similarity index 90%
rename from examples/cadence/tests/quantized_conv1d_example.py
rename to examples/cadence/operators/quantized_conv1d_op.py
index 47ec4e9b66..3247cb690d 100644
--- a/examples/cadence/tests/quantized_conv1d_example.py
+++ b/examples/cadence/operators/quantized_conv1d_op.py
@@ -8,11 +8,11 @@
 
 import logging
 
-from ..aot.meta_registrations import *  # noqa
-
 import torch
 
-from ..aot.export_example import export_model
+from executorch.backends.cadence.aot.ops_registrations import *  # noqa
+
+from executorch.backends.cadence.aot.export_example import export_model
 
 
 FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
diff --git a/examples/cadence/tests/quantized_linear_example.py b/examples/cadence/operators/quantized_linear_op.py
similarity index 88%
rename from examples/cadence/tests/quantized_linear_example.py
rename to examples/cadence/operators/quantized_linear_op.py
index 916b684173..583359cdc6 100644
--- a/examples/cadence/tests/quantized_linear_example.py
+++ b/examples/cadence/operators/quantized_linear_op.py
@@ -8,11 +8,11 @@
 
 import logging
 
-from ..aot.meta_registrations import *  # noqa
-
 import torch
 
-from ..aot.export_example import export_model
+from executorch.backends.cadence.aot.ops_registrations import *  # noqa
+
+from executorch.backends.cadence.aot.export_example import export_model
 
 
 FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"