Skip to content

Commit f39a1bb

Browse files
authored
Arm backend: Allocate the scratch buffer runtime rather than in the pte (#10714)
This change lowers the size of the pte and allows you to allocate the scratch buffer in an array, usually in the SRAM, for more efficient memory usage on a MCU.
1 parent 94d1381 commit f39a1bb

File tree

10 files changed

+143
-81
lines changed

10 files changed

+143
-81
lines changed

backends/arm/arm_vela.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ def vela_compile(tosa_flatbuffer: bytes, args: List[str], verbose: bool = False)
7373
np_path = os.path.join(tmpdir, "output", "out_vela.npz")
7474
else:
7575
np_path = os.path.join(tmpdir, "output", "out_sg0_vela.npz")
76-
blocks = b""
7776

77+
blocks = b""
7878
with np.load(np_path, allow_pickle=False) as data:
7979
# Construct our modified output_blocks with data in a form easily
8080
# digested on the device side
@@ -92,7 +92,7 @@ def vela_compile(tosa_flatbuffer: bytes, args: List[str], verbose: bool = False)
9292
if not isinstance(data["scratch_shape"][0], np.int64):
9393
raise RuntimeError("Expected scratch to be int64")
9494
block_length = int(data["scratch_shape"][0])
95-
bin_blocks["scratch_data"] = b"\x00" * block_length
95+
bin_blocks["scratch_size"] = struct.pack("<I", block_length)
9696

9797
# Capture inputs and outputs
9898
bin_blocks["inputs"] = vela_bin_pack_io("input", data)

backends/arm/runtime/EthosUBackend.cpp

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ using executorch::runtime::FreeableBuffer;
7171
using executorch::runtime::MemoryAllocator;
7272
using executorch::runtime::Result;
7373

74+
#define ETHOSU_NUM_BASE_ADDRS 3
75+
7476
namespace executorch {
7577
namespace backends {
7678
namespace arm {
@@ -181,23 +183,33 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
181183
}
182184
EXECUTORCH_PROF_END(event_tracer, event_tracer_local_scope);
183185

186+
MemoryAllocator* temp_allocator = context.get_temp_allocator();
187+
// Use a temporary allocator for the intermediate tensors of the
188+
// computation. The allocator is released in runtime/executor/method.cpp at
189+
// the end of the execution of the Ethos-U custom delegate
190+
char* ethosu_scratch =
191+
static_cast<char*>(temp_allocator->allocate(handles.scratch_data_size));
192+
extern size_t ethosu_fast_scratch_size;
193+
extern unsigned char* ethosu_fast_scratch;
184194
ET_LOG(
185195
Debug,
186-
"EthosUBackend::execute: Running program data:\n cmd %p %zu\n weight %p %zu\n scratch %p %zu\n",
196+
"EthosUBackend::execute: Running program data:\n cmd %p %zu\n weight %p %zu\n scratch %p %zu\n fast scratch %p %zu\n",
187197
handles.cmd_data,
188198
handles.cmd_data_size,
189199
handles.weight_data,
190200
handles.weight_data_size,
191-
handles.scratch_data,
192-
handles.scratch_data_size);
201+
ethosu_scratch,
202+
handles.scratch_data_size,
203+
ethosu_fast_scratch,
204+
ethosu_fast_scratch_size);
193205

194206
// Write argument values (from EValue tensor) into Ethos-U scratch
195207
// TODO(MLETORCH-123): Optimise into direct write from Vela into the SRAM
196208
// or DRAM output for compatible data layouts.
197209
for (int i = 0; i < handles.inputs->count; i++) {
198210
auto tensor_count = 1, io_count = 1;
199211
auto tensor_in = args[i]->toTensor();
200-
char* scratch_addr = handles.scratch_data + handles.inputs->io[i].offset;
212+
char* scratch_addr = ethosu_scratch + handles.inputs->io[i].offset;
201213

202214
// We accept:
203215
bool supported = 0;
@@ -294,13 +306,17 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
294306
// Ethos-U low level driver expected order for Ethos U-55, we have
295307
// constant weight data, then scratch (which contains input and output)
296308
// scratch is written above in this function.
297-
uint64_t bases[2] = {
309+
310+
uint64_t bases[ETHOSU_NUM_BASE_ADDRS] = {
298311
static_cast<uint64_t>(
299312
reinterpret_cast<uintptr_t>((handles.weight_data))),
313+
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(ethosu_scratch)),
300314
static_cast<uint64_t>(
301-
reinterpret_cast<uintptr_t>((handles.scratch_data)))};
302-
size_t bases_size[2] = {
303-
handles.weight_data_size, handles.scratch_data_size};
315+
reinterpret_cast<uintptr_t>(ethosu_fast_scratch))};
316+
size_t bases_size[ETHOSU_NUM_BASE_ADDRS] = {
317+
handles.weight_data_size,
318+
handles.scratch_data_size,
319+
ethosu_fast_scratch_size};
304320
int result = 0;
305321
EXECUTORCH_PROF_START(
306322
event_tracer, event_tracer_local_scope, "+EthosUBackend::execute()NPU");
@@ -310,7 +326,7 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
310326
handles.cmd_data_size,
311327
bases,
312328
bases_size,
313-
2, /* fixed array of pointers to binary interface*/
329+
3, /* fixed array of pointers to binary interface*/
314330
nullptr);
315331
EXECUTORCH_PROF_END(event_tracer, event_tracer_local_scope);
316332

@@ -325,8 +341,7 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
325341
// Write outputs from scratch into EValue pointers
326342
for (int i = 0; i < handles.outputs->count; i++) {
327343
int tensor_count = 1, io_count = 1;
328-
const char* output_addr =
329-
handles.scratch_data + handles.outputs->io[i].offset;
344+
const char* output_addr = ethosu_scratch + handles.outputs->io[i].offset;
330345
// Process input EValue into scratch
331346
// Outputs are in the index immediately after inputs
332347
auto tensor_out = args[handles.inputs->count + i]->toTensor();

backends/arm/runtime/VelaBinStream.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023 Arm Limited and/or its affiliates.
2+
* Copyright 2023, 2025 Arm Limited and/or its affiliates.
33
*
44
* This source code is licensed under the BSD-style license found in the
55
* LICENSE file in the root directory of this source tree.
@@ -71,9 +71,10 @@ bool vela_bin_read(const char* data, VelaHandles* handles, int size) {
7171
} else if (!strncmp(b->name, "weight_data", strlen("weight_data"))) {
7272
handles->weight_data = b->data;
7373
handles->weight_data_size = b->size;
74-
} else if (!strncmp(b->name, "scratch_data", strlen("scratch_data"))) {
75-
handles->scratch_data = b->data;
76-
handles->scratch_data_size = b->size;
74+
} else if (!strncmp(b->name, "scratch_size", strlen("scratch_size"))) {
75+
const uint32_t* scratch_size_ptr =
76+
reinterpret_cast<const uint32_t*>(b->data);
77+
handles->scratch_data_size = *scratch_size_ptr;
7778
} else if (!strncmp(b->name, "inputs", strlen("inputs"))) {
7879
handles->inputs = (VelaIOs*)b->data;
7980
} else if (!strncmp(b->name, "outputs", strlen("outputs"))) {

backends/arm/scripts/build_executor_runner.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ then
103103
memory_mode="Shared_Sram"
104104
if [[ ${target} =~ "ethos-u85" ]]
105105
then
106-
memory_mode="Sram_Only"
106+
memory_mode="Dedicated_Sram_384KB"
107107
fi
108108
fi
109109

backends/arm/test/ops/test_conv_combos.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,28 +41,28 @@ def __init__(self):
4141
# (t, c, n, s) = (6, 96, 1, 1)
4242
# 1. 1x1 CONV2d + ReLU6 (Pointwise)
4343
self.pointwise_conv2d = torch.nn.Conv2d(
44-
in_channels=64, out_channels=384, kernel_size=1, stride=1, groups=1
45-
) ## (1, 384, 81, 81)
46-
self.batch_norm2d_16 = torch.nn.BatchNorm2d(384, affine=False)
44+
in_channels=32, out_channels=128, kernel_size=1, stride=1, groups=1
45+
) ## (1, 128, 81, 81)
46+
self.batch_norm2d_16 = torch.nn.BatchNorm2d(128, affine=False)
4747
self.relu6 = torch.nn.ReLU6()
4848

4949
# 2. 3x3 DepthwiseConv2d + ReLu6
5050
self.depthwise_conv2d = torch.nn.Conv2d(
51-
in_channels=384,
52-
out_channels=384,
51+
in_channels=128,
52+
out_channels=128,
5353
kernel_size=3,
5454
padding=1,
5555
stride=1,
56-
groups=384,
57-
) ## (1, 384, H, W)
56+
groups=128,
57+
) ## (1, 128, H, W)
5858

5959
# 3. Linear 1x1 Conv2d
6060
self.pointwise_conv2d_linear = torch.nn.Conv2d(
61-
in_channels=384, out_channels=64, kernel_size=1, stride=1, groups=1
62-
) ## (1, 64, 81, 81)
61+
in_channels=128, out_channels=32, kernel_size=1, stride=1, groups=1
62+
) ## (1, 32, 81, 81)
6363

6464
def get_inputs(self) -> Tuple[torch.Tensor]:
65-
return (torch.randn(1, 64, 81, 81),)
65+
return (torch.randn(1, 32, 81, 81),)
6666

6767
def forward(self, x):
6868
input = x

backends/arm/test/test_arm_baremetal.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -206,11 +206,11 @@ test_models_ethos-u85() { # End to End model tests using model_test.py
206206

207207
# Ethos-U85
208208
echo "${TEST_SUITE_NAME}: Test ethos-u target Ethos-U85"
209-
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=mv2 --extra_flags="-DET_ATOL=2.00 -DET_RTOL=2.00"
210-
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-512 --model=mv3 --extra_flags="-DET_ATOL=5.00 -DET_RTOL=5.00"
209+
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=mv2 --extra_flags="-DET_ATOL=2.00 -DET_RTOL=2.00"
210+
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-512 --model=mv3 --extra_flags="-DET_ATOL=5.00 -DET_RTOL=5.00"
211211
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=lstm --extra_flags="-DET_ATOL=0.03 -DET_RTOL=0.03"
212-
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=w2l --extra_flags="-DET_ATOL=0.01 -DET_RTOL=0.01"
213-
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=ic4 --extra_flags="-DET_ATOL=0.8 -DET_RTOL=0.8" --timeout=2400
212+
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=w2l --extra_flags="-DET_ATOL=0.01 -DET_RTOL=0.01"
213+
python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=ic4 --extra_flags="-DET_ATOL=0.8 -DET_RTOL=0.8" --timeout=2400
214214

215215
echo "${TEST_SUITE_NAME}: PASS"
216216
}

backends/arm/test/test_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def get_args():
8181
if "u55" in args.target:
8282
args.memory_mode = "Shared_Sram"
8383
elif "u85" in args.target:
84-
args.memory_mode = "Sram_Only"
84+
args.memory_mode = "Dedicated_Sram_384KB"
8585
else:
8686
raise RuntimeError(f"Invalid target name {args.target}")
8787

examples/arm/executor_runner/CMakeLists.txt

Lines changed: 45 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ project(arm_executor_runner)
88

99
option(SEMIHOSTING "Enable semihosting" OFF)
1010
option(ET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE "Set ET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE to specify memory alloction pool size" OFF)
11-
option(ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE "Set ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE to specify temp alloction pool size" OFF)
1211
option(ET_BUNDLE_IO "Set to compile in BundleIO support" OFF)
1312
option(ET_ATOL "Set atol to use for BundleIO testing" OFF)
1413
option(ET_RTOL "Set rtol to use for BundleIO testing" OFF)
@@ -99,20 +98,45 @@ if(NOT ${SEMIHOSTING})
9998
get_filename_component(ET_PTE_FILE_PATH ${ET_PTE_FILE_PATH} REALPATH)
10099
endif()
101100

101+
if(SYSTEM_CONFIG MATCHES "Ethos_U55")
102+
add_subdirectory(${ETHOS_SDK_PATH}/core_platform/targets/corstone-300 target)
103+
elseif(SYSTEM_CONFIG MATCHES "Ethos_U85")
104+
add_subdirectory(${ETHOS_SDK_PATH}/core_platform/targets/corstone-320 target)
105+
else()
106+
message(FATAL_ERROR "Unsupported SYSTEM_CONFIG ${SYSTEM_CONFIG}.")
107+
endif()
102108

109+
if(MEMORY_MODE MATCHES "Dedicated_Sram")
110+
target_compile_definitions(ethosu_target_common INTERFACE
111+
ETHOSU_MODEL=1
112+
ETHOSU_ARENA=1)
113+
elseif(MEMORY_MODE MATCHES "Shared_Sram" OR MEMORY_MODE MATCHES "Sram_Only")
114+
target_compile_definitions(ethosu_target_common INTERFACE
115+
ETHOSU_MODEL=1
116+
ETHOSU_ARENA=0)
117+
else()
118+
message(FATAL_ERROR "Unsupported MEMORY_MODE ${MEMORY_MODE}. Memory_mode can be Shared_Sram, Sram_Only or Dedicated_Sram(applicable for the Ethos-U85)")
119+
endif()
120+
121+
# By default, use 2MB of temporary scratch buffer
122+
# For Dedicated_Sram, use 128MB for the temporary scratch buffer and
123+
# 384KB for the fast scratch buffer(the cache, applicable only for Ethos-U65 and Ethos-U85)
124+
set(ET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE 0x200000)
125+
if(MEMORY_MODE MATCHES "Dedicated_Sram")
126+
set(ET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE 0x8000000)
127+
set(ET_ARM_BAREMETAL_FAST_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE 0x60000)
128+
endif()
129+
message(STATUS "ET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE = ${ET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE}")
130+
message(STATUS "ET_ARM_BAREMETAL_FAST_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE = ${ET_ARM_BAREMETAL_FAST_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE}")
103131

104132
# Dependencies from the Ethos-U Core This is the platform target of
105133
# Corstone-300, that includes ethosu_core_driver and bare-metal bringup
106134
# libraries. We link against ethosu_target_init which includes all of these
107135
# dependencies.
108-
if(SYSTEM_CONFIG STREQUAL "Ethos_U55_High_End_Embedded")
109-
add_subdirectory(${ETHOS_SDK_PATH}/core_platform/targets/corstone-300 target)
136+
if(SYSTEM_CONFIG MATCHES "Ethos_U55_High_End_Embedded")
110137
set(TARGET_BOARD "corstone-300")
111-
if(MEMORY_MODE STREQUAL "Shared_Sram")
138+
if(MEMORY_MODE MATCHES "Shared_Sram")
112139
target_compile_definitions(ethosu_target_common INTERFACE
113-
# ETHOSU_MODEL=0 place pte file/data in SRAM area
114-
# ETHOSU_MODEL=1 place pte file/data in DDR area
115-
ETHOSU_MODEL=1
116140
# Configure NPU architecture timing adapters
117141
# This is just example numbers and you should make this match your hardware
118142
# SRAM
@@ -144,7 +168,7 @@ if(SYSTEM_CONFIG STREQUAL "Ethos_U55_High_End_Embedded")
144168
ETHOSU_TA_HISTBIN_1=0
145169
ETHOSU_TA_HISTCNT_1=0
146170
)
147-
elseif(MEMORY_MODE STREQUAL "Sram_Only")
171+
elseif(MEMORY_MODE MATCHES "Sram_Only")
148172
target_compile_definitions(ethosu_target_common INTERFACE
149173
# This is just example numbers and you should make this match your hardware
150174
# SRAM
@@ -180,14 +204,11 @@ if(SYSTEM_CONFIG STREQUAL "Ethos_U55_High_End_Embedded")
180204
else()
181205
message(FATAL_ERROR "Unsupported memory_mode ${MEMORY_MODE} for the Ethos-U55. The Ethos-U55 supports only Shared_Sram and Sram_Only.")
182206
endif()
183-
elseif(SYSTEM_CONFIG STREQUAL "Ethos_U55_Deep_Embedded")
207+
elseif(SYSTEM_CONFIG MATCHES "Ethos_U55_Deep_Embedded")
184208
add_subdirectory(${ETHOS_SDK_PATH}/core_platform/targets/corstone-300 target)
185209
set(TARGET_BOARD "corstone-300")
186-
if(MEMORY_MODE STREQUAL "Shared_Sram")
210+
if(MEMORY_MODE MATCHES "Shared_Sram")
187211
target_compile_definitions(ethosu_target_common INTERFACE
188-
# ETHOSU_MODEL=0 place pte file/data in SRAM area
189-
# ETHOSU_MODEL=1 place pte file/data in DDR area
190-
ETHOSU_MODEL=1
191212
# Configure NPU architecture timing adapters
192213
# This is just example numbers and you should make this match your hardware
193214
# SRAM
@@ -219,9 +240,8 @@ elseif(SYSTEM_CONFIG STREQUAL "Ethos_U55_Deep_Embedded")
219240
ETHOSU_TA_HISTBIN_1=0
220241
ETHOSU_TA_HISTCNT_1=0
221242
)
222-
elseif(MEMORY_MODE STREQUAL "Sram_Only")
243+
elseif(MEMORY_MODE MATCHES "Sram_Only")
223244
target_compile_definitions(ethosu_target_common INTERFACE
224-
ETHOSU_MODEL=1
225245
# Configure NPU architecture timing adapters
226246
# This is just example numbers and you should make this match your hardware
227247
# SRAM
@@ -256,14 +276,11 @@ elseif(SYSTEM_CONFIG STREQUAL "Ethos_U55_Deep_Embedded")
256276
else()
257277
message(FATAL_ERROR "Unsupported memory_mode ${MEMORY_MODE} for the Ethos-U55. The Ethos-U55 supports only Shared_Sram and Sram_Only.")
258278
endif()
259-
elseif(SYSTEM_CONFIG STREQUAL "Ethos_U85_SYS_DRAM_Low")
279+
elseif(SYSTEM_CONFIG MATCHES "Ethos_U85_SYS_DRAM_Low")
260280
add_subdirectory(${ETHOS_SDK_PATH}/core_platform/targets/corstone-320 target)
261281
set(TARGET_BOARD "corstone-320")
262-
if(MEMORY_MODE STREQUAL "Dedicated_Sram")
282+
if(MEMORY_MODE MATCHES "Dedicated_Sram")
263283
target_compile_definitions(ethosu_target_common INTERFACE
264-
# ETHOSU_MODEL=0 place pte file/data in SRAM area
265-
# ETHOSU_MODEL=1 place pte file/data in DDR area
266-
ETHOSU_MODEL=1
267284
# Configure NPU architecture timing adapters
268285
# This is just example numbers and you should make this match your hardware
269286
# SRAM
@@ -295,11 +312,8 @@ elseif(SYSTEM_CONFIG STREQUAL "Ethos_U85_SYS_DRAM_Low")
295312
ETHOSU_TA_HISTBIN_1=0
296313
ETHOSU_TA_HISTCNT_1=0
297314
)
298-
elseif(MEMORY_MODE STREQUAL "Sram_Only")
315+
elseif(MEMORY_MODE MATCHES "Sram_Only")
299316
target_compile_definitions(ethosu_target_common INTERFACE
300-
# ETHOSU_MODEL=0 place pte file/data in SRAM area
301-
# ETHOSU_MODEL=1 place pte file/data in DDR area
302-
ETHOSU_MODEL=1
303317
# Configure NPU architecture timing adapters
304318
# This is just example numbers and you should make this match your hardware
305319
# SRAM
@@ -333,13 +347,9 @@ elseif(SYSTEM_CONFIG STREQUAL "Ethos_U85_SYS_DRAM_Low")
333347
)
334348
endif()
335349
elseif(SYSTEM_CONFIG STREQUAL "Ethos_U85_SYS_DRAM_Mid" OR SYSTEM_CONFIG STREQUAL "Ethos_U85_SYS_DRAM_High")
336-
add_subdirectory(${ETHOS_SDK_PATH}/core_platform/targets/corstone-320 target)
337350
set(TARGET_BOARD "corstone-320")
338-
if(MEMORY_MODE STREQUAL "Dedicated_Sram")
351+
if(MEMORY_MODE MATCHES "Dedicated_Sram")
339352
target_compile_definitions(ethosu_target_common INTERFACE
340-
# ETHOSU_MODEL=0 place pte file/data in SRAM area
341-
# ETHOSU_MODEL=1 place pte file/data in DDR area
342-
ETHOSU_MODEL=1
343353
# Configure NPU architecture timing adapters
344354
# This is just example numbers and you should make this match your hardware
345355
# SRAM
@@ -371,11 +381,8 @@ elseif(SYSTEM_CONFIG STREQUAL "Ethos_U85_SYS_DRAM_Mid" OR SYSTEM_CONFIG STREQUAL
371381
ETHOSU_TA_HISTBIN_1=0
372382
ETHOSU_TA_HISTCNT_1=0
373383
)
374-
elseif(MEMORY_MODE STREQUAL "Sram_Only")
384+
elseif(MEMORY_MODE MATCHES "Sram_Only")
375385
target_compile_definitions(ethosu_target_common INTERFACE
376-
# ETHOSU_MODEL=0 place pte file/data in SRAM area
377-
# ETHOSU_MODEL=1 place pte file/data in DDR area
378-
ETHOSU_MODEL=1
379386
# Configure NPU architecture timing adapters
380387
# This is just example numbers and you should make this match your hardware
381388
# SRAM
@@ -434,7 +441,7 @@ endif()
434441
# the memory traffic of Region 1 should pass via the external memory(3) and the traffic for Region 2 should pass via the SRAM(0)
435442
#
436443

437-
if(MEMORY_MODE STREQUAL "Sram_Only")
444+
if(MEMORY_MODE MATCHES "Sram_Only")
438445
target_compile_definitions(ethosu_core_driver PRIVATE
439446
NPU_QCONFIG=1
440447
NPU_REGIONCFG_0=1
@@ -445,7 +452,7 @@ if(MEMORY_MODE STREQUAL "Sram_Only")
445452
NPU_REGIONCFG_5=0
446453
NPU_REGIONCFG_6=0
447454
NPU_REGIONCFG_7=0)
448-
elseif(MEMORY_MODE STREQUAL "Dedicated_Sram")
455+
elseif(MEMORY_MODE MATCHES "Dedicated_Sram")
449456
target_compile_definitions(ethosu_core_driver PRIVATE
450457
NPU_QCONFIG=3
451458
NPU_REGIONCFG_0=3
@@ -632,8 +639,9 @@ if(ET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE)
632639
target_compile_definitions(arm_executor_runner PUBLIC ET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE=${ET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE})
633640
endif()
634641

635-
if(ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE)
636-
target_compile_definitions(arm_executor_runner PUBLIC ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE=${ET_ARM_BAREMETAL_TEMP_ALLOCATOR_POOL_SIZE})
642+
target_compile_definitions(arm_executor_runner PUBLIC ET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE=${ET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE})
643+
if(DEFINED ET_ARM_BAREMETAL_FAST_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE)
644+
target_compile_definitions(arm_executor_runner PUBLIC ET_ARM_BAREMETAL_FAST_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE=${ET_ARM_BAREMETAL_FAST_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE})
637645
endif()
638646

639647
if(ET_BUNDLE_IO)

0 commit comments

Comments
 (0)