Skip to content

Arm backend: Remove build_quantized_ops_aot_lib.sh #10350

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions backends/arm/quantizer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,16 @@

# Used in tests
from .arm_quantizer_utils import is_annotated # noqa

# Load quantized ops library.
try:
import executorch.extension.pybindings.portable_lib
import executorch.kernels.quantized # noqa
except:
import logging

logging.info(
"Failed to load portable_lib and quantized_aot_lib. To run quantized kernels AOT, either build "
"Executorch with pybindings, or load your own custom built op library using torch.ops.load_library."
)
del logging
54 changes: 0 additions & 54 deletions backends/arm/scripts/build_quantized_ops_aot_lib.sh

This file was deleted.

34 changes: 1 addition & 33 deletions backends/arm/test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@

import logging
import os
import platform
import random
import shutil
import subprocess
import sys
from typing import Any

Expand Down Expand Up @@ -81,8 +79,7 @@ def try_addoption(*args, **kwargs):


def pytest_sessionstart(session):
if not session.config.option.collectonly:
_load_libquantized_ops_aot_lib()
pass


def pytest_sessionfinish(session, exitstatus):
Expand Down Expand Up @@ -172,32 +169,3 @@ def get_option(option: str) -> Any | None:
if option in pytest._test_options: # type: ignore[attr-defined]
return pytest._test_options[option] # type: ignore[attr-defined]
return None


def _load_libquantized_ops_aot_lib():
"""
Find and load the libquantized_ops_aot_lib shared library.
"""
so_ext = {
"Darwin": "dylib",
"Linux": "so",
"Windows": "dll",
}.get(platform.system(), None)

find_lib_cmd = [
"find",
"cmake-out-aot-lib",
"-name",
f"libquantized_ops_aot_lib.{so_ext}",
]

res = subprocess.run(find_lib_cmd, capture_output=True)
if res.returncode == 0:
library_path = res.stdout.decode().strip()
import torch

torch.ops.load_library(library_path)
else:
raise RuntimeError(
f"Did not find libquantized_ops_aot_lib.{so_ext} in cmake-out-aot-lib. Did you build it?"
)
2 changes: 0 additions & 2 deletions backends/arm/test/setup_testing.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@ function build_semihosting_executorch_runner() {
find ${build_test_dir} -name "arm_executor_runner"
}

cd $et_root_dir && backends/arm/scripts/build_quantized_ops_aot_lib.sh

# Use most optimal system_configs for testing
build_semihosting_executorch_runner corstone-300 Ethos_U55_High_End_Embedded

Expand Down
3 changes: 0 additions & 3 deletions backends/arm/test/test_arm_baremetal.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,6 @@ test_pytest() { # Test ops and other things

./examples/models/llama3_2_vision/install_requirements.sh

cd "${et_root_dir}"
backends/arm/scripts/build_quantized_ops_aot_lib.sh

# Run arm baremetal pytest tests without FVP
pytest --verbose --color=yes --numprocesses=auto backends/arm/test/
echo "${TEST_SUITE_NAME}: PASS"
Expand Down
21 changes: 0 additions & 21 deletions backends/arm/test/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import argparse
import os
import platform
import subprocess
import sys

Expand Down Expand Up @@ -113,14 +112,6 @@ def build_libs(et_build_root: str, script_path: str):
"--portable_kernels=aten::_softmax.out",
]
)
run_external_cmd(
[
"bash",
os.path.join(script_path, "build_quantized_ops_aot_lib.sh"),
f"--et_build_root={et_build_root}",
"--build_type=Release",
]
)


def build_pte(
Expand All @@ -132,17 +123,6 @@ def build_pte(
build_output: str,
no_intermediate: bool,
):
soext = {"Darwin": "dylib", "Linux": "so", "Windows": "dll"}.get(
platform.system(), None
)
solibs_path = os.path.join(
et_build_root,
"cmake-out-aot-lib",
"kernels",
"quantized",
f"libquantized_ops_aot_lib.{soext}",
)
solibs = f"--so_library={solibs_path}"

intermediate = ""
if not no_intermediate:
Expand All @@ -162,7 +142,6 @@ def build_pte(
f"--output={build_output}",
f"--system_config={system_config}",
f"--memory_mode={memory_mode}",
solibs,
]
)

Expand Down
3 changes: 0 additions & 3 deletions docs/source/backends-arm-ethos-u.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@ To compile for the NPUs, the Ethos-U Vela compiler is needed. A target-specific

These dependencies can easily be downloaded using the script `examples/arm/setup.sh`.

To work with with quantized models, build the quantize_ops_aot library that contains kernels for quantization and dequantization. This can be done with the script
`backends/arm/scripts/build_quantized_ops_aot_lib.sh`.

## Using the Arm Ethos-U backend
The example below demonstrates the lowering processs of a MobileNet V2 model from torchvision for a Ethos-U55 target. Since the model is a floating point model, first quantize it using the `EthosUQuantizer`. Then, pass an instance of the `EthosUPartitioner` to `to_edge_transform_and_lower`. Both the quantizer and the partitioner need a compilation specification created using `ArmCompileSpecBuilder`.

Expand Down
25 changes: 3 additions & 22 deletions docs/source/tutorial-arm-ethos-u.md
Original file line number Diff line number Diff line change
Expand Up @@ -244,28 +244,9 @@ python3 -m examples.arm.aot_arm_compiler --model_name="add" --delegate
```

### Delegated Quantized Workflow
Before generating the `.pte` file for delegated quantized networks like MobileNetV2, you need to build the `quantized_ops_aot_lib`

You can just run the `backends/arm/scripts/build_quantized_ops_aot_lib.sh` script to build this for you or build it yourself like this.

```bash

cd <executorch_root_dir>
mkdir -p cmake-out-aot-lib
cmake -DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_XNNPACK=OFF \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED_AOT=ON \
-DPYTHON_EXECUTABLE=python3 \
-Bcmake-out-aot-lib \
"${et_root_dir}"

cmake --build cmake-out-aot-lib --parallel -- quantized_ops_aot_lib
```

After the `quantized_ops_aot_lib` build, you can run the following script to generate the `.pte` file
Generating the `.pte` file can be done using the aot_arm_compiler:
```bash
python3 -m examples.arm.aot_arm_compiler --model_name="mv2" --delegate --quantize --so_library="$(find cmake-out-aot-lib -name libquantized_ops_aot_lib.so)"
python3 -m examples.arm.aot_arm_compiler --model_name="mv2" --delegate --quantize"
# should produce ./mv2_arm_delegate_ethos-u55-128.pte
```

Expand Down Expand Up @@ -315,7 +296,7 @@ To run a `.pte` file with the Arm backend delegate call instructions, you will n

- `libexecutorch_delegate_ethos_u.a`

These libraries are generated by the `backends/arm/scripts/build_executorch.sh`, `backends/arm/scripts/build_portable_kernels.sh` and `backends/arm/scripts/build_quantized_ops_aot_lib.sh` scripts called from the `run.sh` script.
These libraries are generated by the `backends/arm/scripts/build_executorch.sh` and `backends/arm/scripts/build_portable_kernels.sh` scripts called from the `run.sh` script.

The `--portable_kernels` flag can be used to set the build flag `EXECUTORCH_SELECT_OPS_LIST` when running `backends/arm/scripts/build_portable_kernels.sh` that will decide the number of portable operators included in the build and are available at runtime. It must match with `.pte` file's requirements, otherwise you will get `Missing Operator` error at runtime.

Expand Down
8 changes: 1 addition & 7 deletions examples/arm/aot_arm_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ def get_args():
"--so_library",
required=False,
default=None,
help="Provide path to so library. E.g., cmake-out/examples/portable/custom_ops/libcustom_ops_aot_lib.so",
help="Provide path to custom .so library.",
)
parser.add_argument(
"--debug", action="store_true", help="Set the logging level to debug."
Expand Down Expand Up @@ -509,12 +509,6 @@ def get_args():
if args.debug:
logging.basicConfig(level=logging.DEBUG, format=FORMAT, force=True)

if args.quantize and not args.so_library:
logging.warning(
"Quantization enabled without supplying path to libcustom_ops_aot_lib using -s flag."
+ "This is required for running quantized models with unquantized input."
)

# if we have custom ops, register them before processing the model
if args.so_library is not None:
logging.info(f"Loading custom ops from {args.so_library}")
Expand Down
26 changes: 0 additions & 26 deletions examples/arm/ethos_u_minimal_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -115,32 +115,6 @@
"quantized_exported_program = torch.export.export_for_training(quantized_graph_module, example_inputs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The quantization nodes created in the previous cell are not built by default with ExecuTorch but must be included in the .pte-file, and so they need to be built separately. `backends/arm/scripts/build_quantized_ops_aot_lib.sh` is a utility script which does this. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import subprocess\n",
"import os\n",
"\n",
"# Setup paths\n",
"cwd_dir = os.getcwd()\n",
"et_dir = os.path.join(cwd_dir, \"..\", \"..\")\n",
"et_dir = os.path.abspath(et_dir)\n",
"script_dir = os.path.join(et_dir, \"backends\", \"arm\", \"scripts\")\n",
"\n",
"# Run build_quantized_ops_aot_lib.sh\n",
"subprocess.run(os.path.join(script_dir, \"build_quantized_ops_aot_lib.sh\"), shell=True, cwd=et_dir)"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
10 changes: 1 addition & 9 deletions examples/arm/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -152,14 +152,6 @@ fi
backends/arm/scripts/build_executorch.sh --et_build_root="${et_build_root}" --build_type=$build_type $devtools_flag
backends/arm/scripts/build_portable_kernels.sh --et_build_root="${et_build_root}" --build_type=$build_type --portable_kernels=$portable_kernels

# Build a lib quantized_ops_aot_lib
backends/arm/scripts/build_quantized_ops_aot_lib.sh --et_build_root="${et_build_root}" --build_type=$build_type

SO_EXT=$(python3 -c 'import platform; print({"Darwin": "dylib", "Linux": "so", "Windows": "dll"}.get(platform.system(), None))')
# We are using the aot_lib from build_quantization_aot_lib below
SO_LIB=$(find "${et_build_root}/cmake-out-aot-lib" -name libquantized_ops_aot_lib.${SO_EXT})


if [[ -z "$model_name" ]]; then
# the test models run, and whether to delegate
test_model=( "softmax" "add" "add3" "mv2" )
Expand Down Expand Up @@ -211,7 +203,7 @@ for i in "${!test_model[@]}"; do
model_compiler_flags="${model_compiler_flags} --model_input=${model_input}"
fi

ARM_AOT_CMD="python3 -m examples.arm.aot_arm_compiler --model_name=${model} --target=${target} ${model_compiler_flags} --intermediate=${output_folder} --output=${pte_file} --so_library=$SO_LIB --system_config=${system_config} --memory_mode=${memory_mode} $bundleio_flag"
ARM_AOT_CMD="python3 -m examples.arm.aot_arm_compiler --model_name=${model} --target=${target} ${model_compiler_flags} --intermediate=${output_folder} --output=${pte_file} --system_config=${system_config} --memory_mode=${memory_mode} $bundleio_flag"
echo "CALL ${ARM_AOT_CMD}" >&2
${ARM_AOT_CMD} 1>&2

Expand Down
Loading