Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -290,13 +290,8 @@
"name": "arm-ethosu-linux",
"displayName": "Build ExecuTorch for Arm Ethos-U Linux",
"inherits": ["common"],
"description": "musl declares __assert_fail with int for line; avoid NDEBUG forward-decl mismatch in Release builds",
"cacheVariables": {
"EXECUTORCH_BUILD_ARM_ETHOSU_LINUX": "ON",
"EXECUTORCH_BUILD_EXECUTOR_RUNNER": "ON",
"EXECUTORCH_BUILD_KERNELS_QUANTIZED": "ON",
"CMAKE_C_FLAGS_RELEASE": "-UNDEBUG",
"CMAKE_CXX_FLAGS_RELEASE": "-UNDEBUG",
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/arm_ethosu_linux.cmake",
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/examples/arm/ethos-u-setup/aarch64-linux-musl-toolchain.cmake"
}
}
Expand Down
89 changes: 83 additions & 6 deletions backends/arm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,27 +76,104 @@ The Arm backend can be built using the following command:
./install_executorch.sh
```

One of the following commands should also be run once to gather the necessary dependencies for your chosen target(s):
**NOTE:** While developing, it can be convenient to use `./install_executorch.sh --editable`, which creates an editable installation of ExecuTorch.

For the Ethos-U target:
### Target-specific setup and build

Pick one of the target flows below. Each flow has a one-time setup step and a build command.

### Baremetal (Ethos-U) workflow

Builds ExecuTorch runtime libraries for Cortex-M with Ethos-U acceleration.

Setup:

```
./examples/arm/setup.sh --i-agree-to-the-contained-eula
```

For the VGF target:
Build:

```
./backends/arm/scripts/build_executorch.sh
```

### VGF (Vulkan ML extensions) workflow

Setup:

```
./examples/arm/setup.sh --disable-ethos-u-deps --enable-mlsdk-deps
```

For both Ethos-U & VGF targets:
The current flow lowers to TOSA and converts to VGF for use in external projects,
so the `executor_runner` is not typically used here.

### Direct Drive (experimental, Ethos-U85 on Linux) workflow

Direct Drive enables execution on Ethos-U85 via the Linux driver stack.

Driver stack (Linux) and API:

```
https://gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-linux-driver-stack
```

An FVP with Linux is available for Direct Drive, but it must be built and run
manually. See:

```
./examples/arm/setup.sh --i-agree-to-the-contained-eula --enable-mlsdk-deps
https://corstone1000.docs.arm.com/en/corstone1000-2025.12/
```

**NOTE:** While developing, it can be convenient to use`./install_executorch.sh --editable`, which creates an editable installation of ExecuTorch.
Setup:

```
./examples/arm/setup.sh --i-agree-to-the-contained-eula --target-toolchain linux-musl
source ./examples/arm/arm-scratch/setup_path.sh
```

Build:

```
./backends/arm/scripts/build_executorch.sh \
--toolchain=aarch64-linux-musl-gcc \
--build_type=Debug
```

Note: setup selects the linux-musl toolchain; build uses the aarch64-linux-musl GCC toolchain name.

If your Yocto image enables the dropbear SSH server, you can copy the
`executor_runner` binary into the running FVP via scp:

```
scp -P 2222 arm_test/cmake-out/executor_runner root@127.0.0.1:/tmp/
```

#### Direct Drive model (PTE) workflow

Create a PTE file:

```
python3 -m examples.arm.aot_arm_compiler \
--model_name examples/arm/example_modules/add.py \
--delegate \
--quantize \
--target ethos-u85-256 \
--direct_drive
```

Copy the `executor_runner` binary and the generated PTE file to the running FVP:

```
scp -P 2222 arm_test/cmake-out/executor_runner add_arm_delegate_ethos-u85-256.pte root@127.0.0.1:/tmp/
```

Run the model on the FVP:

```
ssh -p 2222 root@127.0.0.1 -t "/tmp/executor_runner -model_path /tmp/add_arm_delegate_ethos-u85-256.pte -num_executions 1"
```

## Testing

Expand Down
58 changes: 36 additions & 22 deletions backends/arm/runtime/EthosUBackend_Cortex_A.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,19 +347,13 @@ Error platform_execute(
int output_count,
Span<executorch::runtime::EValue*> args,
char* /*ethosu_scratch*/) {
std::vector<size_t> input_copy_sizes;
std::vector<const char*> linux_input_ptrs;
if (input_count > 0) {
input_copy_sizes.resize(input_count, 0);
linux_input_ptrs.resize(input_count, nullptr);
}
std::vector<size_t> input_copy_sizes(input_count, 0);
std::vector<const char*> linux_input_ptrs(input_count, nullptr);

std::vector<size_t> output_io_bytes;
std::vector<char*> linux_output_ptrs;
if (output_count > 0) {
output_io_bytes.resize(output_count, 0);
linux_output_ptrs.resize(output_count, nullptr);
}
std::vector<size_t> output_io_bytes(output_count, 0);
std::vector<char*> linux_output_ptrs(output_count, nullptr);
std::vector<std::vector<char>> output_scratch_buffers(output_count);
std::vector<bool> output_needs_adjustment(output_count, false);

for (int i = 0; i < input_count; ++i) {
auto tensor_in = args[i]->toTensor();
Expand All @@ -380,16 +374,12 @@ Error platform_execute(
const size_t tensor_nbytes = tensor_out.nbytes();
if (i < static_cast<int>(output_io_bytes.size()) &&
output_io_bytes[i] != tensor_nbytes) {
ET_LOG(
Error,
"Ethos-U Linux backend output size mismatch for index %d: "
"driver IO bytes = %zu, tensor bytes = %zu",
i,
output_io_bytes[i],
tensor_nbytes);
return Error::InvalidState;
output_scratch_buffers[i].resize(output_io_bytes[i]);
linux_output_ptrs[i] = output_scratch_buffers[i].data();
output_needs_adjustment[i] = true;
} else {
linux_output_ptrs[i] = tensor_out.mutable_data_ptr<char>();
}
linux_output_ptrs[i] = tensor_out.mutable_data_ptr<char>();
}
}

Expand All @@ -399,13 +389,37 @@ Error platform_execute(
return Error::InvalidState;
}

return invoke_linux_driver(
Error status = invoke_linux_driver(
handles,
linux_input_ptrs,
linux_output_ptrs,
input_copy_sizes,
output_io_bytes,
state->options);
if (status != Error::Ok) {
return status;
}

if (handles.outputs != nullptr) {
for (int i = 0; i < output_count; ++i) {
if (!output_needs_adjustment[i]) {
continue;
}
auto tensor_out = args[input_count + i]->toTensor();
const size_t tensor_nbytes = tensor_out.nbytes();
Error adjust_status = copy_with_layout_adjustment(
handles.outputs->io[i],
i,
output_scratch_buffers[i].data(),
tensor_out,
tensor_nbytes);
if (adjust_status != Error::Ok) {
return adjust_status;
}
}
}

return Error::Ok;
}

} // namespace arm
Expand Down
19 changes: 19 additions & 0 deletions tools/cmake/preset/arm_ethosu_linux.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright 2026 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set_overridable_option(EXECUTORCH_BUILD_ARM_ETHOSU_LINUX ON)
set_overridable_option(EXECUTORCH_BUILD_EXECUTOR_RUNNER ON)
set_overridable_option(EXECUTORCH_BUILD_EXTENSION_EVALUE_UTIL ON)
set_overridable_option(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL ON)
set_overridable_option(EXECUTORCH_BUILD_KERNELS_QUANTIZED ON)

set(CMAKE_C_FLAGS_RELEASE
"-UNDEBUG"
CACHE STRING "Avoid NDEBUG forward-decl mismatch in musl Release builds"
)
set(CMAKE_CXX_FLAGS_RELEASE
"-UNDEBUG"
CACHE STRING "Avoid NDEBUG forward-decl mismatch in musl Release builds"
)
Loading