From 3a497029c078de8c4a9872a4a5cff9bedc94f07d Mon Sep 17 00:00:00 2001 From: "xipeng.fan" Date: Wed, 18 Sep 2024 14:53:54 +0800 Subject: [PATCH] [soc_dump] add doc Change-Id: Icaf313113415a9bf0ad9c75abdcb609d661c815b --- .../source_en/Appx.02_tpulang_support_op.rst | 11 ++++++++--- .../source_zh/Appx.02_tpulang_support_op.rst | 10 +++++++--- python/transform/TpuLang.py | 9 +++++---- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/docs/developer_manual/source_en/Appx.02_tpulang_support_op.rst b/docs/developer_manual/source_en/Appx.02_tpulang_support_op.rst index a7fbbc43c..82828d589 100755 --- a/docs/developer_manual/source_en/Appx.02_tpulang_support_op.rst +++ b/docs/developer_manual/source_en/Appx.02_tpulang_support_op.rst @@ -3930,9 +3930,11 @@ Definition save_path: str = "", out_fixed: bool = False, dump_cmd_info: bool = True, - cmodel_skip_check: bool = True, # disable CMODEL data_check to increase processing speed + skip_check: bool = True, # disable data_check to increase processing speed + run_by_op: bool = False, # enable to run_by_op, may cause timeout error when some OPs contain too many atomic cmds is_soc: bool = False, # soc mode ONLY support {reference_data_fn=xxx.npz, dump_file=True} - enable_soc_log: bool = False, + using_memory_opt: bool = False, # required when is_soc=True + enable_soc_log: bool = False, # required when is_soc=True tmp_path: str = "/tmp", # required when is_soc=True tools_path: str = "/soc_infer", # required when is_soc=True hostname: str = None, # required when is_soc=True @@ -3955,9 +3957,12 @@ Parameters * dump_file: Bool type, representing whether save results as file. * save_path: String type, representing the abs path of saving results on host. * out_fixed: Bool type, representing whether to get results in fixed number. -* dump_cmd_info: Bool type, enable to save atomic cmd info at `save_path`. +* dump_cmd_info: Bool type, enable to save atomic cmd info at `save_path`. +* skip_check: Bool tyoe, set to True to disable data check to decrease time cost for CMODEL/PCIE mode. +* run_by_op: Bool type, enable to run_by_op, decrease time cost but may cause timeout error when some OPs contain too many atomic cmds. * cmodel_skip_check: Bool type, enable this to skip data check to speed up inference. * is_soc: Bool type, representing whether to use in soc mode. +* using_memory_opt: Bool type, enable to use memory opt, decrease memory usage at the expense of increasing time cost. Suggest to enable when running large model. * enable_soc_log: Bool type, enable to print and save log at `save_path`. * tmp_path: String type, representing the abs path of tmp files on device in soc mode. * tools_path: String type, representing the dir of soc_infer tools on device in soc mode. diff --git a/docs/developer_manual/source_zh/Appx.02_tpulang_support_op.rst b/docs/developer_manual/source_zh/Appx.02_tpulang_support_op.rst index a1d62310d..a230927b8 100755 --- a/docs/developer_manual/source_zh/Appx.02_tpulang_support_op.rst +++ b/docs/developer_manual/source_zh/Appx.02_tpulang_support_op.rst @@ -4802,9 +4802,11 @@ bmodel_inference_combine save_path: str = "", out_fixed: bool = False, dump_cmd_info: bool = True, - cmodel_skip_check: bool = True, # disable CMODEL data_check to increase processing speed + skip_check: bool = True, # disable data_check to increase processing speed + run_by_op: bool = False, # enable to run_by_op, may cause timeout error when some OPs contain too many atomic cmds is_soc: bool = False, # soc mode ONLY support {reference_data_fn=xxx.npz, dump_file=True} - enable_soc_log: bool = False, + using_memory_opt: bool = False, # required when is_soc=True + enable_soc_log: bool = False, # required when is_soc=True tmp_path: str = "/tmp", # required when is_soc=True tools_path: str = "/soc_infer", # required when is_soc=True hostname: str = None, # required when is_soc=True @@ -4828,8 +4830,10 @@ bmodel_inference_combine * save_path: String类型,表示 `dump_file=True` 时的主机(host)端保存逐层推理的.npz文件的绝对路径。 * out_fixed: Bool类型,表示逐层Tensor数据输出是否保持为定点格式。 * dump_cmd_info: Bool类型,表示将当前bmodel中包含的所有原子指令对应的final.mlir的信息保存成txt文件,保存路径在save_path下。 -* cmodel_skip_check: Bool类型, 在cmodel模式下启用此项可禁用数据对比,提高推理速度。soc模式下默认不进行数据对比。 +* skip_check: Bool类型,启用此项可禁用数据对比,提高推理速度。soc模式下默认不进行数据对比。 +* run_by_op: Bool类型,启用后按OP粒度运行,禁用时为按原子指令粒度运行。按OP粒度运行速度较快,但当一个OP中包含过多原子指令时可能会引发timeout错误。 * is_soc: Bool类型,表示是否启用soc模式进行推理。 +* using_memory_opt: Bool类型,启用后会减小在device端的内存消耗,但会增加耗时。推荐在大模型时启用。 * enable_soc_log: Bool类型,启用此项打印并在save_path下保存log日志。 * tmp_path: String类型,表示soc模式下,板卡(device)端存放临时文件的绝对路径。 * tools_path: String类型,表示soc模式下,device端存放工具的文件夹名称。 diff --git a/python/transform/TpuLang.py b/python/transform/TpuLang.py index 34378d187..04ea8d329 100755 --- a/python/transform/TpuLang.py +++ b/python/transform/TpuLang.py @@ -297,10 +297,10 @@ def bmodel_inference_combine( out_fixed: bool = False, dump_cmd_info: bool = True, skip_check: bool = True, # disable data_check to increase processing speed - run_by_op: bool = False, + run_by_op: bool = False, # enable to run_by_op, may cause timeout error when some OPs contain too many atomic cmds is_soc: bool = False, # soc mode ONLY support {reference_data_fn=xxx.npz, dump_file=True} - using_memory_opt: bool = False, - enable_soc_log: bool = False, + using_memory_opt: bool = False, # required when is_soc=True + enable_soc_log: bool = False, # required when is_soc=True tmp_path: str = "/tmp", # required when is_soc=True tools_path: str = "/soc_infer", # required when is_soc=True hostname: str = None, # required when is_soc=True @@ -405,7 +405,7 @@ def progress_get(file_name, local_path="", remote_path="", progress=None): remote_bmodel = os.path.basename(bmodel_file) remote_input = os.path.basename(input_data_fn) remote_ref = os.path.basename(reference_data_fn) - exec_command = f"cd {tools_path} && source envsetup.sh && python3 soc_bmodel_infer.py --path {tmp_path} --bmodel {remote_bmodel} --input {remote_input} --ref {remote_ref} --tool_path {tools_path}" + exec_command = f"cd {tools_path} && source envsetup.sh && nohup python3 soc_bmodel_infer.py --path {tmp_path} --bmodel {remote_bmodel} --input {remote_input} --ref {remote_ref} --tool_path {tools_path}" if out_fixed: exec_command += " --out_fixed" if enable_soc_log: @@ -414,6 +414,7 @@ def progress_get(file_name, local_path="", remote_path="", progress=None): exec_command += " --using_memory_opt" if not run_by_op: exec_command += " --run_by_atomic" + exec_command += " &" print(f"soc execute command: {exec_command}") client.get_transport().set_keepalive(30)