From e280e01fc1d2f79cc4f1cda2257a51bd20605bcd Mon Sep 17 00:00:00 2001 From: Aleksei-grovety <113356454+Aleksei-grovety@users.noreply.github.com> Date: Mon, 19 Jun 2023 11:18:11 +0400 Subject: [PATCH] [microNPU][ETHOSU] Fix CopyComputeReordering pass arguments (#15063) No arguments were passed to CopyComputeReordering pass and the same parameters were used for all targets. This fix takes arguments for CopyComputeReordering pass from Vela. In networks tests, the amount of memory used for U65 has increased because now after CopyComputeReordering pass, the number of DMA commands executed in parallel has increased. --- .../backend/contrib/ethosu/tir/compiler.py | 7 +- .../relay/backend/contrib/ethosu/vela_api.py | 15 ++- .../test_ethosu/test_encode_constants.py | 102 +++++++++--------- .../contrib/test_ethosu/test_networks.py | 6 +- 4 files changed, 71 insertions(+), 59 deletions(-) diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/compiler.py b/python/tvm/relay/backend/contrib/ethosu/tir/compiler.py index 2cf45170e4e3..d47b3d4a7de6 100644 --- a/python/tvm/relay/backend/contrib/ethosu/tir/compiler.py +++ b/python/tvm/relay/backend/contrib/ethosu/tir/compiler.py @@ -18,12 +18,13 @@ """The integration of the Arm(R) Ethos(TM)-U NPU TIR compiler.""" import tvm from tvm import relay -from tvm.relay.expr_functor import ExprMutator from tvm.driver.build_module import schedule_to_module +from tvm.relay.backend.contrib.ethosu import vela_api as vapi +from tvm.relay.expr_functor import ExprMutator +from .. import util from . import passes as ethosu_passes from .scheduler import schedule -from .. import util def lower_ethosu(sch, args, const_dict, name="main"): @@ -92,7 +93,7 @@ def lower_ethosu(sch, args, const_dict, name="main"): mod = ethosu_passes.HoistAllocates()(mod) mod = tvm.tir.transform.RemoveNoOp()(mod) mod, const_dict = ethosu_passes.MergeConstants(const_dict)(mod) - mod = ethosu_passes.CopyComputeReordering()(mod) + mod = ethosu_passes.CopyComputeReordering(vapi.get_max_copy_movements())(mod) disable_storage_rewrite = curr_cfg.get("tir.disable_storage_rewrite", False) if not disable_storage_rewrite: diff --git a/python/tvm/relay/backend/contrib/ethosu/vela_api.py b/python/tvm/relay/backend/contrib/ethosu/vela_api.py index f241652e738f..45c232a4610b 100644 --- a/python/tvm/relay/backend/contrib/ethosu/vela_api.py +++ b/python/tvm/relay/backend/contrib/ethosu/vela_api.py @@ -23,13 +23,15 @@ """ import logging import math -from typing import Tuple, Optional, List +from typing import List, Optional, Tuple + import numpy as np # type: ignore from ethosu.vela import api as vapi # type: ignore +from ethosu.vela.architecture_features import Accelerator, create_default_arch import tvm -from tvm.relay.backend.contrib.ethosu import util # type: ignore from tvm.relay.backend.contrib.ethosu import tir_to_cs_translator as tirtocs +from tvm.relay.backend.contrib.ethosu import util # type: ignore # pylint: disable=invalid-name logger = logging.getLogger("Ethos-U") @@ -400,3 +402,12 @@ def get_accelerator_config() -> vapi.NpuAccelerator: accel_config_str = compiler_attrs.accelerator_config assert accel_config_str in npu_accel_str_map.keys(), f"{accel_config_str} is not supported" return npu_accel_str_map[accel_config_str] + + +def get_max_copy_movements() -> int: + """Get maximum copy movements for CopyComputeReordering pass. + max_outstanding_dma from architecture features indicates how many + DMA operations can be in-progress. + """ + arch = create_default_arch(Accelerator.from_npu_accelerator(get_accelerator_config())) + return arch.max_outstanding_dma diff --git a/tests/python/contrib/test_ethosu/test_encode_constants.py b/tests/python/contrib/test_ethosu/test_encode_constants.py index 6a8ff28e442e..4341f367f0e1 100644 --- a/tests/python/contrib/test_ethosu/test_encode_constants.py +++ b/tests/python/contrib/test_ethosu/test_encode_constants.py @@ -66,30 +66,29 @@ def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_writ @tvm.script.ir_module class WeightStreamOnlyU65: @T.prim_func - def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None: - # function attr dict - T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - # buffer definition - placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data) - ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data) - buffer_encoded_1 = T.Buffer([192], dtype="uint8") - buffer_encoded_2_1 = T.Buffer([192], dtype="uint8") - buffer_encoded_4_1 = T.Buffer([208], dtype="uint8") - buffer_encoded_6_1 = T.Buffer([192], dtype="uint8") - # body - p1_data = T.allocate([208], "uint8", "global", annotations={"disable_lower_builtin":True}) - p1 = T.Buffer([208], "uint8", data=p1_data) - p2_data = T.allocate([192], "uint8", "global", annotations={"disable_lower_builtin":True}) - p2 = T.Buffer([192], "uint8", data=p2_data) - p3 = T.Buffer([192], dtype="uint8", data=p1.data) - T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_1[0], 192, p3[0], dtype="handle")) - T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_2_1[0], 192, p2[0], dtype="handle")) - T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p3[0], 80, p3[80], 80, 12, p3[160], 16, p3[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) - T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_4_1[0], 208, p1[0], dtype="handle")) - T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 80, p2[80], 80, 12, p2[160], 16, p2[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) - T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_6_1[0], 192, p2[0], dtype="handle")) - T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 96, p1[96], 80, 12, p1[176], 16, p1[192], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) - T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 80, p2[80], 80, 12, p2[160], 16, p2[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) + def main(ifm: T.Buffer((1, 16, 16, 32), "int8"), ethosu_write: T.Buffer((1, 16, 16, 8), "int8")): + T.func_attr({"from_legacy_te_schedule": T.bool(True), "global_symbol": "main", "tir.noalias": T.bool(True)}) + p2_global_6 = T.allocate([192], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)}) + p2_global_4 = T.allocate([192], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)}) + p2_global_5 = T.allocate([208], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)}) + buffer_encoded = T.Buffer((192,), "uint8") + p2_global_3 = T.Buffer((192,), "uint8", data=p2_global_6) + T.call_extern("handle", "ethosu_copy", buffer_encoded[0], 192, p2_global_3[0]) + buffer_encoded_1 = T.Buffer((192,), "uint8") + p2_global_4_1 = T.Buffer((192,), "uint8", data=p2_global_4) + T.call_extern("handle", "ethosu_copy", buffer_encoded_1[0], 192, p2_global_4_1[0]) + buffer_encoded_2 = T.Buffer((208,), "uint8") + p2_global_5_1 = T.Buffer((208,), "uint8", data=p2_global_5) + T.call_extern("handle", "ethosu_copy", buffer_encoded_2[0], 208, p2_global_5_1[0]) + ifm_1 = T.Buffer((8192,), "int8", data=ifm.data) + ethosu_write_1 = T.Buffer((2048,), "int8", data=ethosu_write.data) + T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2_global_3[0], 80, p2_global_3[80], 80, 12, p2_global_3[160], 16, p2_global_3[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) + buffer_encoded_3 = T.Buffer((192,), "uint8") + p2_global_6_1 = T.Buffer((192,), "uint8", data=p2_global_6) + T.call_extern("handle", "ethosu_copy", buffer_encoded_3[0], 192, p2_global_6_1[0]) + T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_1[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2_global_4_1[0], 80, p2_global_4_1[80], 80, 12, p2_global_4_1[160], 16, p2_global_4_1[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) + T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_1[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2_global_5_1[0], 96, p2_global_5_1[96], 80, 12, p2_global_5_1[176], 16, p2_global_5_1[192], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) + T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_1[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2_global_6_1[0], 80, p2_global_6_1[80], 80, 12, p2_global_6_1[160], 16, p2_global_6_1[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) __tvm_meta__ = None # fmt: on @@ -387,33 +386,34 @@ def main(input_ifm: T.Buffer((1,16,16,32), "int8"), input_ethosu_write: T.Buffer @tvm.script.ir_module class MixedReadU65: @T.prim_func - def main(input_ifm: T.Buffer((1,16,16,32), "int8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8")) -> None: - # function attr dict - T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - # buffer definition - ifm = T.Buffer([8192], dtype="int8", data=input_ifm.data) - ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data) - buffer1 = T.Buffer([128], dtype="uint8") - buffer2 = T.Buffer([128], dtype="uint8") - buffer3 = T.Buffer([128], dtype="uint8") - buffer4 = T.Buffer([608], dtype="uint8") - buffer5 = T.Buffer([160], dtype="uint8") - buffer6 = T.Buffer([128], dtype="uint8") - p1_data = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin":True}) - p1 = T.Buffer([128], "uint8", data=p1_data) - p2_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True}) - p2 = T.Buffer([4096], "int8", data=p2_data) - p3_data = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin":True}) - p3 = T.Buffer([128], "uint8", data=p3_data) - T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 128, p1[0], dtype="handle")) - T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, p2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, buffer4[0], 304, buffer4[304], 304, 12, buffer5[0], 80, buffer5[80], 80, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) - T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p3[0], dtype="handle")) - T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, p2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 48, p1[48], 48, 12, p1[96], 16, p1[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) - T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 128, p1[0], dtype="handle")) - T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, p2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p3[0], 48, p3[48], 48, 12, p3[96], 16, p3[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) - T.evaluate(T.call_extern("ethosu_copy", buffer6[0], 128, p3[0], dtype="handle")) - T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, p2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 48, p1[48], 48, 12, p1[96], 16, p1[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) - T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, p2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p3[0], 48, p3[48], 48, 12, p3[96], 16, p3[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) + def main(ifm: T.Buffer((1, 16, 16, 32), "int8"), ethosu_write: T.Buffer((1, 16, 16, 8), "int8")): + T.func_attr({"from_legacy_te_schedule": T.bool(True), "global_symbol": "main", "tir.noalias": T.bool(True)}) + p5_global = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)}) + p5_global_1 = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)}) + ethosu_write_1 = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin": T.bool(True)}) + p5_global_2 = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)}) + buffer_encoded = T.Buffer((128,), "uint8") + p5_global_3 = T.Buffer((128,), "uint8", data=p5_global) + T.call_extern("handle", "ethosu_copy", buffer_encoded[0], 128, p5_global_3[0]) + buffer_encoded_1 = T.Buffer((128,), "uint8") + p5_global_4 = T.Buffer((128,), "uint8", data=p5_global_1) + T.call_extern("handle", "ethosu_copy", buffer_encoded_1[0], 128, p5_global_4[0]) + ifm_1 = T.Buffer((8192,), "int8", data=ifm.data) + ethosu_write_2 = T.Buffer((4096,), "int8", data=ethosu_write_1) + p1_encoded = T.Buffer((608,), "uint8") + p2_encoded = T.Buffer((160,), "uint8") + T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, p1_encoded[0], 304, p1_encoded[304], 304, 12, p2_encoded[0], 80, p2_encoded[80], 80, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) + buffer_encoded_2 = T.Buffer((128,), "uint8") + p5_global_5 = T.Buffer((128,), "uint8", data=p5_global_2) + T.call_extern("handle", "ethosu_copy", buffer_encoded_2[0], 128, p5_global_5[0]) + ethosu_write_3 = T.Buffer((2048,), "int8", data=ethosu_write.data) + T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_3[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5_global_3[0], 48, p5_global_3[48], 48, 12, p5_global_3[96], 16, p5_global_3[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) + buffer_encoded_3 = T.Buffer((128,), "uint8") + p5_global_6 = T.Buffer((128,), "uint8", data=p5_global) + T.call_extern("handle", "ethosu_copy", buffer_encoded_3[0], 128, p5_global_6[0]) + T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_3[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5_global_4[0], 48, p5_global_4[48], 48, 12, p5_global_4[96], 16, p5_global_4[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) + T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_3[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5_global_5[0], 48, p5_global_5[48], 48, 12, p5_global_5[96], 16, p5_global_5[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) + T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_3[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5_global_6[0], 48, p5_global_6[48], 48, 12, p5_global_6[96], 16, p5_global_6[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) __tvm_meta__ = None # fmt: on diff --git a/tests/python/contrib/test_ethosu/test_networks.py b/tests/python/contrib/test_ethosu/test_networks.py index 0df73d6dc561..a5490cbe2b1c 100644 --- a/tests/python/contrib/test_ethosu/test_networks.py +++ b/tests/python/contrib/test_ethosu/test_networks.py @@ -44,8 +44,8 @@ @pytest.mark.parametrize( "accel_type, model_url, workspace_size", [ - ("ethos-u65-256", MOBILENET_V1_URL, 1793376), - ("ethos-u65-256", MOBILENET_V2_URL, 2217152), + ("ethos-u65-256", MOBILENET_V1_URL, 2338848), + ("ethos-u65-256", MOBILENET_V2_URL, 2264320), ("ethos-u55-256", MOBILENET_V1_URL, 1793376), ("ethos-u55-256", MOBILENET_V2_URL, 2217152), ("ethos-u55-128", MOBILENET_V2_URL, 2217152), @@ -71,7 +71,7 @@ def test_networks_without_usmp(accel_type, model_url, workspace_size): @pytest.mark.parametrize( "accel_type, model_url, workspace_size", [ - ("ethos-u65-256", MOBILENET_V1_URL, 1206880), + ("ethos-u65-256", MOBILENET_V1_URL, 1311200), ("ethos-u55-256", MOBILENET_V2_URL, 1509408), ], )