pytorch · zingo · Apr 28, 2025 · Apr 11, 2025 · Apr 22, 2025 · Apr 22, 2025
@@ -13,3 +13,16 @@
 
 # Used in tests
 from .arm_quantizer_utils import is_annotated  # noqa
+
+# Load quantized ops library.
+try:
+    import executorch.extension.pybindings.portable_lib
+    import executorch.kernels.quantized  # noqa
+except:
+    import logging
+
+    logging.info(
+        "Failed to load portable_lib and quantized_aot_lib. To run quantized kernels AOT, either build "
+        "Executorch with pybindings, or load your own custom built op library using torch.ops.load_library."
+    )
+    del logging
@@ -5,10 +5,8 @@
 
 import logging
 import os
-import platform
 import random
 import shutil
-import subprocess
 import sys
 from typing import Any
 
@@ -81,8 +79,7 @@ def try_addoption(*args, **kwargs):
 
 
 def pytest_sessionstart(session):
-    if not session.config.option.collectonly:
-        _load_libquantized_ops_aot_lib()
+    pass
 
 
 def pytest_sessionfinish(session, exitstatus):
@@ -172,32 +169,3 @@ def get_option(option: str) -> Any | None:
     if option in pytest._test_options:  # type: ignore[attr-defined]
         return pytest._test_options[option]  # type: ignore[attr-defined]
     return None
-
-
-def _load_libquantized_ops_aot_lib():
-    """
-    Find and load the libquantized_ops_aot_lib shared library.
-    """
-    so_ext = {
-        "Darwin": "dylib",
-        "Linux": "so",
-        "Windows": "dll",
-    }.get(platform.system(), None)
-
-    find_lib_cmd = [
-        "find",
-        "cmake-out-aot-lib",
-        "-name",
-        f"libquantized_ops_aot_lib.{so_ext}",
-    ]
-
-    res = subprocess.run(find_lib_cmd, capture_output=True)
-    if res.returncode == 0:
-        library_path = res.stdout.decode().strip()
-        import torch
-
-        torch.ops.load_library(library_path)
-    else:
-        raise RuntimeError(
-            f"Did not find libquantized_ops_aot_lib.{so_ext} in cmake-out-aot-lib. Did you build it?"
-        )
@@ -52,8 +52,6 @@ function build_semihosting_executorch_runner() {
     find ${build_test_dir} -name "arm_executor_runner"
 }
 
-cd $et_root_dir && backends/arm/scripts/build_quantized_ops_aot_lib.sh
-
 # Use most optimal system_configs for testing
 build_semihosting_executorch_runner corstone-300 Ethos_U55_High_End_Embedded
 

@@ -74,9 +74,6 @@ test_pytest() { # Test ops and other things
 
     ./examples/models/llama3_2_vision/install_requirements.sh
 
-    cd "${et_root_dir}"
-    backends/arm/scripts/build_quantized_ops_aot_lib.sh
-
     # Run arm baremetal pytest tests without FVP
     pytest  --verbose --color=yes --numprocesses=auto backends/arm/test/
     echo "${TEST_SUITE_NAME}: PASS"

@@ -5,7 +5,6 @@
 
 import argparse
 import os
-import platform
 import subprocess
 import sys
 
@@ -113,14 +112,6 @@ def build_libs(et_build_root: str, script_path: str):
             "--portable_kernels=aten::_softmax.out",
         ]
     )
-    run_external_cmd(
-        [
-            "bash",
-            os.path.join(script_path, "build_quantized_ops_aot_lib.sh"),
-            f"--et_build_root={et_build_root}",
-            "--build_type=Release",
-        ]
-    )
 
 
 def build_pte(
@@ -132,17 +123,6 @@ def build_pte(
     build_output: str,
     no_intermediate: bool,
 ):
-    soext = {"Darwin": "dylib", "Linux": "so", "Windows": "dll"}.get(
-        platform.system(), None
-    )
-    solibs_path = os.path.join(
-        et_build_root,
-        "cmake-out-aot-lib",
-        "kernels",
-        "quantized",
-        f"libquantized_ops_aot_lib.{soext}",
-    )
-    solibs = f"--so_library={solibs_path}"
 
     intermediate = ""
     if not no_intermediate:
@@ -162,7 +142,6 @@ def build_pte(
             f"--output={build_output}",
             f"--system_config={system_config}",
             f"--memory_mode={memory_mode}",
-            solibs,
         ]
     )
 

@@ -17,9 +17,6 @@ To compile for the NPUs, the Ethos-U Vela compiler is needed. A target-specific
 
 These dependencies can easily be downloaded using the script `examples/arm/setup.sh`.
 
-To work with with quantized models, build the quantize_ops_aot library that contains kernels for quantization and dequantization. This can be done with the script
-`backends/arm/scripts/build_quantized_ops_aot_lib.sh`.
-
 ## Using the Arm Ethos-U backend
 The example below demonstrates the lowering processs of a MobileNet V2 model from torchvision for a Ethos-U55 target. Since the model is a floating point model, first quantize it using the `EthosUQuantizer`. Then, pass an instance of the `EthosUPartitioner` to `to_edge_transform_and_lower`. Both the quantizer and the partitioner need a compilation specification created using `ArmCompileSpecBuilder`.
 

@@ -244,28 +244,9 @@ python3 -m examples.arm.aot_arm_compiler --model_name="add" --delegate
 ```
 
 ### Delegated Quantized Workflow
-Before generating the `.pte` file for delegated quantized networks like MobileNetV2, you need to build the `quantized_ops_aot_lib`
-
-You can just run the `backends/arm/scripts/build_quantized_ops_aot_lib.sh` script to build this for you or build it yourself like this.
-
-```bash
-
-cd <executorch_root_dir>
-mkdir -p cmake-out-aot-lib
-cmake -DCMAKE_BUILD_TYPE=Release \
-    -DEXECUTORCH_BUILD_XNNPACK=OFF \
-    -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-    -DEXECUTORCH_BUILD_KERNELS_QUANTIZED_AOT=ON \
-    -DPYTHON_EXECUTABLE=python3 \
--Bcmake-out-aot-lib \
-    "${et_root_dir}"
-
-cmake --build cmake-out-aot-lib --parallel -- quantized_ops_aot_lib
-```
-
-After the `quantized_ops_aot_lib` build, you can run the following script to generate the `.pte` file
+Generating the `.pte` file can be done using the aot_arm_compiler:
 ```bash
-python3 -m examples.arm.aot_arm_compiler --model_name="mv2" --delegate --quantize --so_library="$(find cmake-out-aot-lib -name libquantized_ops_aot_lib.so)"
+python3 -m examples.arm.aot_arm_compiler --model_name="mv2" --delegate --quantize"
 # should produce ./mv2_arm_delegate_ethos-u55-128.pte
 ```
 
@@ -315,7 +296,7 @@ To run a `.pte` file with the Arm backend delegate call instructions, you will n
 
 - `libexecutorch_delegate_ethos_u.a`
 
-These libraries are generated by the `backends/arm/scripts/build_executorch.sh`, `backends/arm/scripts/build_portable_kernels.sh` and `backends/arm/scripts/build_quantized_ops_aot_lib.sh` scripts called from the `run.sh` script.
+These libraries are generated by the `backends/arm/scripts/build_executorch.sh` and `backends/arm/scripts/build_portable_kernels.sh` scripts called from the `run.sh` script.
 
 The `--portable_kernels` flag can be used to set the build flag `EXECUTORCH_SELECT_OPS_LIST` when running `backends/arm/scripts/build_portable_kernels.sh` that will decide the number of portable operators included in the build and are available at runtime. It must match with `.pte` file's requirements, otherwise you will get `Missing Operator` error at runtime.
 

@@ -466,7 +466,7 @@ def get_args():
         "--so_library",
         required=False,
         default=None,
-        help="Provide path to so library. E.g., cmake-out/examples/portable/custom_ops/libcustom_ops_aot_lib.so",
+        help="Provide path to custom .so library.",
     )
     parser.add_argument(
         "--debug", action="store_true", help="Set the logging level to debug."
@@ -509,12 +509,6 @@ def get_args():
     if args.debug:
         logging.basicConfig(level=logging.DEBUG, format=FORMAT, force=True)
 
-    if args.quantize and not args.so_library:
-        logging.warning(
-            "Quantization enabled without supplying path to libcustom_ops_aot_lib using -s flag."
-            + "This is required for running quantized models with unquantized input."
-        )
-
     # if we have custom ops, register them before processing the model
     if args.so_library is not None:
         logging.info(f"Loading custom ops from {args.so_library}")

@@ -115,32 +115,6 @@
     "quantized_exported_program = torch.export.export_for_training(quantized_graph_module, example_inputs)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The quantization nodes created in the previous cell are not built by default with ExecuTorch but must be included in the .pte-file, and so they need to be built separately. `backends/arm/scripts/build_quantized_ops_aot_lib.sh` is a utility script which does this. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import subprocess\n",
-    "import os\n",
-    "\n",
-    "# Setup paths\n",
-    "cwd_dir = os.getcwd()\n",
-    "et_dir = os.path.join(cwd_dir, \"..\", \"..\")\n",
-    "et_dir = os.path.abspath(et_dir)\n",
-    "script_dir = os.path.join(et_dir, \"backends\", \"arm\", \"scripts\")\n",
-    "\n",
-    "# Run build_quantized_ops_aot_lib.sh\n",
-    "subprocess.run(os.path.join(script_dir, \"build_quantized_ops_aot_lib.sh\"), shell=True, cwd=et_dir)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},

@@ -152,14 +152,6 @@ fi
 backends/arm/scripts/build_executorch.sh --et_build_root="${et_build_root}" --build_type=$build_type $devtools_flag
 backends/arm/scripts/build_portable_kernels.sh --et_build_root="${et_build_root}" --build_type=$build_type --portable_kernels=$portable_kernels
 
-# Build a lib quantized_ops_aot_lib
-backends/arm/scripts/build_quantized_ops_aot_lib.sh --et_build_root="${et_build_root}" --build_type=$build_type
-
-SO_EXT=$(python3 -c 'import platform; print({"Darwin": "dylib", "Linux": "so", "Windows": "dll"}.get(platform.system(), None))')
-# We are using the aot_lib from build_quantization_aot_lib below
-SO_LIB=$(find "${et_build_root}/cmake-out-aot-lib" -name libquantized_ops_aot_lib.${SO_EXT})
-
-
 if [[ -z "$model_name" ]]; then
     # the test models run, and whether to delegate
     test_model=( "softmax" "add" "add3" "mv2" )
@@ -211,7 +203,7 @@ for i in "${!test_model[@]}"; do
         model_compiler_flags="${model_compiler_flags} --model_input=${model_input}"
     fi
 
-    ARM_AOT_CMD="python3 -m examples.arm.aot_arm_compiler --model_name=${model} --target=${target} ${model_compiler_flags} --intermediate=${output_folder} --output=${pte_file} --so_library=$SO_LIB --system_config=${system_config} --memory_mode=${memory_mode} $bundleio_flag"
+    ARM_AOT_CMD="python3 -m examples.arm.aot_arm_compiler --model_name=${model} --target=${target} ${model_compiler_flags} --intermediate=${output_folder} --output=${pte_file} --system_config=${system_config} --memory_mode=${memory_mode} $bundleio_flag"
     echo "CALL ${ARM_AOT_CMD}" >&2
     ${ARM_AOT_CMD} 1>&2