From 440048cff09a810bc4740a52ed7c00a32d7408d2 Mon Sep 17 00:00:00 2001
From: Hansong <107070759+kirklandsign@users.noreply.github.com>
Date: Mon, 12 Aug 2024 13:30:48 -0700
Subject: [PATCH 01/11] Add an activity for benchmarking only
Differential Revision: D60399589
Pull Request resolved: https://github.com/pytorch/executorch/pull/4443
---
.../app/src/main/AndroidManifest.xml | 9 ++
.../LlmBenchmarkRunner.java | 111 ++++++++++++++++++
.../executorchllamademo/ModelRunner.java | 98 ++++++++++++++++
.../ModelRunnerCallback.java | 24 ++++
.../main/res/layout/activity_benchmarking.xml | 16 +++
5 files changed, 258 insertions(+)
create mode 100644 examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java
create mode 100644 examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/ModelRunner.java
create mode 100644 examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/ModelRunnerCallback.java
create mode 100644 examples/demo-apps/android/LlamaDemo/app/src/main/res/layout/activity_benchmarking.xml
diff --git a/examples/demo-apps/android/LlamaDemo/app/src/main/AndroidManifest.xml b/examples/demo-apps/android/LlamaDemo/app/src/main/AndroidManifest.xml
index bb231420df..02d8503a4d 100644
--- a/examples/demo-apps/android/LlamaDemo/app/src/main/AndroidManifest.xml
+++ b/examples/demo-apps/android/LlamaDemo/app/src/main/AndroidManifest.xml
@@ -47,6 +47,15 @@
+
+
+
+
+
+
+
diff --git a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java
new file mode 100644
index 0000000000..33b230b1df
--- /dev/null
+++ b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package com.example.executorchllamademo;
+
+import android.app.Activity;
+import android.content.Intent;
+import android.os.Bundle;
+import android.util.Log;
+import android.widget.TextView;
+import androidx.annotation.NonNull;
+import java.io.FileWriter;
+import java.io.IOException;
+
+public class LlmBenchmarkRunner extends Activity implements ModelRunnerCallback {
+ ModelRunner mModelRunner;
+
+ String mPrompt;
+ TextView mTextView;
+ StatsDump mStatsDump;
+
+ @Override
+ protected void onCreate(Bundle savedInstanceState) {
+ super.onCreate(savedInstanceState);
+ setContentView(R.layout.activity_benchmarking);
+ mTextView = findViewById(R.id.log_view);
+
+ Intent intent = getIntent();
+
+ String modelPath = intent.getStringExtra("model_path");
+ String tokenizerPath = intent.getStringExtra("tokenizer_path");
+
+ float temperature = intent.getFloatExtra("temperature", 0.8f);
+ mPrompt = intent.getStringExtra("prompt");
+ if (mPrompt == null) {
+ mPrompt = "The ultimate answer";
+ }
+
+ mStatsDump = new StatsDump();
+ mModelRunner = new ModelRunner(modelPath, tokenizerPath, temperature, this);
+ mStatsDump.loadStart = System.currentTimeMillis();
+ }
+
+ @Override
+ public void onModelLoaded(int status) {
+ mStatsDump.loadEnd = System.currentTimeMillis();
+ if (status != 0) {
+ Log.e("LlmBenchmarkRunner", "Loaded failed: " + status);
+ onGenerationStopped();
+ return;
+ }
+ mStatsDump.generateStart = System.currentTimeMillis();
+ mModelRunner.generate(mPrompt);
+ }
+
+ @Override
+ public void onTokenGenerated(String token) {
+ runOnUiThread(
+ () -> {
+ mTextView.append(token);
+ });
+ }
+
+ @Override
+ public void onStats(String stats) {
+ mStatsDump.tokens = stats;
+ }
+
+ @Override
+ public void onGenerationStopped() {
+ mStatsDump.generateEnd = System.currentTimeMillis();
+ runOnUiThread(
+ () -> {
+ mTextView.append(mStatsDump.toString());
+ });
+
+ try (FileWriter writer = new FileWriter(getFilesDir() + "/benchmark_results.txt")) {
+ writer.write(mStatsDump.toString());
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+}
+
+class StatsDump {
+ long loadStart;
+ long loadEnd;
+ long generateStart;
+ long generateEnd;
+ String tokens;
+
+ @NonNull
+ @Override
+ public String toString() {
+ return "loadStart: "
+ + loadStart
+ + "\nloadEnd: "
+ + loadEnd
+ + "\ngenerateStart: "
+ + generateStart
+ + "\ngenerateEnd: "
+ + generateEnd
+ + "\n"
+ + tokens;
+ }
+}
diff --git a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/ModelRunner.java b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/ModelRunner.java
new file mode 100644
index 0000000000..4dc32d1475
--- /dev/null
+++ b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/ModelRunner.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package com.example.executorchllamademo;
+
+import android.os.Handler;
+import android.os.HandlerThread;
+import android.os.Looper;
+import android.os.Message;
+import androidx.annotation.NonNull;
+import org.pytorch.executorch.LlamaCallback;
+import org.pytorch.executorch.LlamaModule;
+
+/** A helper class to handle all model running logic within this class. */
+public class ModelRunner implements LlamaCallback {
+ LlamaModule mModule = null;
+
+ String mModelFilePath = "";
+ String mTokenizerFilePath = "";
+
+ ModelRunnerCallback mCallback = null;
+
+ HandlerThread mHandlerThread = null;
+ Handler mHandler = null;
+
+ /**
+ * ] Helper class to separate between UI logic and model runner logic. Automatically handle
+ * generate() request on worker thread.
+ *
+ * @param modelFilePath
+ * @param tokenizerFilePath
+ * @param callback
+ */
+ ModelRunner(
+ String modelFilePath,
+ String tokenizerFilePath,
+ float temperature,
+ ModelRunnerCallback callback) {
+ mModelFilePath = modelFilePath;
+ mTokenizerFilePath = tokenizerFilePath;
+ mCallback = callback;
+
+ mModule = new LlamaModule(mModelFilePath, mTokenizerFilePath, 0.8f);
+ mHandlerThread = new HandlerThread("ModelRunner");
+ mHandlerThread.start();
+ mHandler = new ModelRunnerHandler(mHandlerThread.getLooper(), this);
+
+ mHandler.sendEmptyMessage(ModelRunnerHandler.MESSAGE_LOAD_MODEL);
+ }
+
+ int generate(String prompt) {
+ Message msg = Message.obtain(mHandler, ModelRunnerHandler.MESSAGE_GENERATE, prompt);
+ msg.sendToTarget();
+ return 0;
+ }
+
+ void stop() {
+ mModule.stop();
+ }
+
+ @Override
+ public void onResult(String result) {
+ mCallback.onTokenGenerated(result);
+ }
+
+ @Override
+ public void onStats(float tps) {
+ mCallback.onStats("tokens/second: " + tps);
+ }
+}
+
+class ModelRunnerHandler extends Handler {
+ public static int MESSAGE_LOAD_MODEL = 1;
+ public static int MESSAGE_GENERATE = 2;
+
+ private final ModelRunner mModelRunner;
+
+ public ModelRunnerHandler(Looper looper, ModelRunner modelRunner) {
+ super(looper);
+ mModelRunner = modelRunner;
+ }
+
+ @Override
+ public void handleMessage(@NonNull android.os.Message msg) {
+ if (msg.what == MESSAGE_LOAD_MODEL) {
+ int status = mModelRunner.mModule.load();
+ mModelRunner.mCallback.onModelLoaded(status);
+ } else if (msg.what == MESSAGE_GENERATE) {
+ mModelRunner.mModule.generate((String) msg.obj, mModelRunner);
+ mModelRunner.mCallback.onGenerationStopped();
+ }
+ }
+}
diff --git a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/ModelRunnerCallback.java b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/ModelRunnerCallback.java
new file mode 100644
index 0000000000..c8bdc53075
--- /dev/null
+++ b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/ModelRunnerCallback.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+package com.example.executorchllamademo;
+
+/**
+ * A helper interface within the app for MainActivity and Benchmarking to handle callback from
+ * ModelRunner.
+ */
+public interface ModelRunnerCallback {
+
+ void onModelLoaded(int status);
+
+ void onTokenGenerated(String token);
+
+ void onStats(String token);
+
+ void onGenerationStopped();
+}
diff --git a/examples/demo-apps/android/LlamaDemo/app/src/main/res/layout/activity_benchmarking.xml b/examples/demo-apps/android/LlamaDemo/app/src/main/res/layout/activity_benchmarking.xml
new file mode 100644
index 0000000000..6e48b5de8b
--- /dev/null
+++ b/examples/demo-apps/android/LlamaDemo/app/src/main/res/layout/activity_benchmarking.xml
@@ -0,0 +1,16 @@
+
+
+
+
+
+
From 8f4697180d10be3103beb9a25eed32db9db693a3 Mon Sep 17 00:00:00 2001
From: Lunwen He
Date: Mon, 12 Aug 2024 13:52:17 -0700
Subject: [PATCH 02/11] allow models to use customized token ids during export
Differential Revision: D61044259
Pull Request resolved: https://github.com/pytorch/executorch/pull/4649
---
examples/models/llama2/export_llama_lib.py | 25 +++++++++++----------
examples/models/llama2/llama_transformer.py | 4 ++--
2 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/examples/models/llama2/export_llama_lib.py b/examples/models/llama2/export_llama_lib.py
index eeafa3dee3..56ca1f5873 100644
--- a/examples/models/llama2/export_llama_lib.py
+++ b/examples/models/llama2/export_llama_lib.py
@@ -553,27 +553,29 @@ def _export_llama(modelname, args) -> LLMEdgeManager: # noqa: C901
def _load_llama_model_metadata(
weight_type: WeightType,
- dtype: DType,
use_kv_cache: bool,
use_sdpa_with_kv_cache: bool,
enable_dynamic_shape: bool,
- modelArgs: ModelArgs,
+ model_args: ModelArgs,
metadata_str: Optional[str] = None,
):
is_fairseq2 = weight_type == WeightType.FAIRSEQ2
metadata = {
"append_eos_to_prompt": is_fairseq2, # For language llama, tell the runtime to always append EOS token(s) to prompt.
- "get_bos_id": 3 if is_fairseq2 else 1,
- "get_dtype": 5 if dtype == DType.fp16 else 6,
- "get_eos_id": 3 if is_fairseq2 else 2,
- "get_head_dim": modelArgs.dim // modelArgs.n_heads,
- "get_max_batch_size": modelArgs.max_batch_size,
- "get_max_seq_len": modelArgs.max_seq_len,
+ "get_bos_id": (
+ model_args.bos_idx
+ if model_args.bos_idx is not None
+ else (3 if is_fairseq2 else 1)
+ ),
+ "get_eos_id": (
+ model_args.eos_idx
+ if model_args.eos_idx is not None
+ else (3 if is_fairseq2 else 2)
+ ),
+ "get_max_seq_len": model_args.max_seq_len,
"get_n_bos": 1,
"get_n_eos": 2 if is_fairseq2 else 1,
- "get_n_kv_heads": modelArgs.n_kv_heads,
- "get_n_layers": modelArgs.n_layers,
- "get_vocab_size": modelArgs.vocab_size,
+ "get_vocab_size": model_args.vocab_size,
"use_kv_cache": use_kv_cache,
"use_sdpa_with_kv_cache": use_sdpa_with_kv_cache,
"enable_dynamic_shape": enable_dynamic_shape,
@@ -655,7 +657,6 @@ def _load_llama_model(
verbose=verbose,
metadata=_load_llama_model_metadata(
weight_type,
- dtype,
use_kv_cache,
use_sdpa_with_kv_cache,
enable_dynamic_shape,
diff --git a/examples/models/llama2/llama_transformer.py b/examples/models/llama2/llama_transformer.py
index dacf9eb1fd..99544426fd 100644
--- a/examples/models/llama2/llama_transformer.py
+++ b/examples/models/llama2/llama_transformer.py
@@ -104,8 +104,8 @@ class ModelArgs:
rope_freq_base: float = 10000.0 # The base frequency for RoPE. Keep it for BC.
use_scaled_rope: bool = False # Use scaled RoPE, introduced in llama3.1.
# Additional Model Metadata needed at runtime
- bos_idx: int = 1
- eos_idx: int = 3
+ bos_idx: Optional[int] = None
+ eos_idx: Optional[int] = None
bos_count: int = -1 # i.e., a single EOS is used as BOS
eos_count: int = 2
From 728a29ded2360761b6dc0244f81d02496539a742 Mon Sep 17 00:00:00 2001
From: Sicheng Stephen Jia
Date: Mon, 12 Aug 2024 17:04:17 -0400
Subject: [PATCH 03/11] Pack buffer-backed tensors correctly when moving into
and out of staging
Differential Revision: D61150844
Pull Request resolved: https://github.com/pytorch/executorch/pull/4673
---
.../vulkan/runtime/api/containers/Tensor.h | 8 +++
.../graph/ops/glsl/buffer_to_buffer.glsl | 1 -
.../graph/ops/glsl/buffer_to_nchw.glsl | 35 ++++++++++++
.../graph/ops/glsl/buffer_to_nchw.yaml | 18 ++++++
.../runtime/graph/ops/glsl/indexing_utils.h | 55 ++++++++++++++++---
.../graph/ops/glsl/nchw_to_buffer.glsl | 35 ++++++++++++
.../graph/ops/glsl/nchw_to_buffer.yaml | 18 ++++++
.../vulkan/runtime/graph/ops/impl/Staging.cpp | 12 +++-
.../runtime/graph/ops/utils/StagingUtils.cpp | 4 +-
backends/vulkan/test/utils/test_utils.cpp | 13 ++---
10 files changed, 177 insertions(+), 22 deletions(-)
create mode 100644 backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl
create mode 100644 backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml
create mode 100644 backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl
create mode 100644 backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml
diff --git a/backends/vulkan/runtime/api/containers/Tensor.h b/backends/vulkan/runtime/api/containers/Tensor.h
index e69a4937e5..b1a02a6d2e 100644
--- a/backends/vulkan/runtime/api/containers/Tensor.h
+++ b/backends/vulkan/runtime/api/containers/Tensor.h
@@ -277,6 +277,14 @@ class vTensor final {
return sizes_.size();
}
+ inline const std::vector& strides() const {
+ return strides_;
+ }
+
+ inline const std::vector& unsqueezed_strides() const {
+ return unsqueezed_strides_;
+ }
+
/*
* Returns a GPU buffer containing the sizes of the tensor in WHCN order.
* Note that dimensions that are not present in the tensor's sizes are set to
diff --git a/backends/vulkan/runtime/graph/ops/glsl/buffer_to_buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_buffer.glsl
index fe69501f9c..9d4b18f0d1 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/buffer_to_buffer.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_buffer.glsl
@@ -1,4 +1,3 @@
-
#version 450 core
#define PRECISION ${PRECISION}
diff --git a/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl
new file mode 100644
index 0000000000..58796879e8
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl
@@ -0,0 +1,35 @@
+#version 450 core
+
+#define PRECISION ${PRECISION}
+
+#define T ${buffer_scalar_type(DTYPE)}
+
+#include "indexing_utils.h"
+
+${define_required_extensions(DTYPE)}
+
+layout(std430) buffer;
+
+${layout_declare_tensor(0, "w", "nchw_buf", DTYPE, STORAGE)}
+${layout_declare_tensor(1, "r", "t_in", DTYPE, STORAGE)}
+${layout_declare_ubo(2, "ivec4", "in_sizes")}
+${layout_declare_ubo(3, "ivec4", "in_strides")}
+${layout_declare_ubo(4, "int", "numel")}
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+// This constant is unused in this shader but is kept so that the signature is
+// consistent with image_to_nchw.
+layout(constant_id = 3) const int UNUSED_packed_dim = W_DIM;
+
+void main() {
+ int out_id = int(gl_GlobalInvocationID.x);
+ if (out_id >= numel) {
+ return;
+ }
+
+ ivec4 t_in_idx = from_nchw_buffer_i(out_id, in_sizes);
+ const int in_id = to_buffer_id(t_in_idx, in_strides);
+
+ nchw_buf[out_id] = t_in[in_id];
+}
diff --git a/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml
new file mode 100644
index 0000000000..653bda9ccc
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml
@@ -0,0 +1,18 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+buffer_to_nchw:
+ parameter_names_with_default_values:
+ DTYPE: float
+ STORAGE: buffer
+ generate_variant_forall:
+ DTYPE:
+ - VALUE: half
+ - VALUE: float
+ - VALUE: int
+ - VALUE: int8
+ shader_variants:
+ - NAME: buffer_to_nchw
diff --git a/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h b/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h
index d3264e43a2..21eadff0b3 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h
+++ b/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h
@@ -41,6 +41,21 @@
*/
#define alignup4(x) ((x + 3) & -4)
+/*
+ * Input: (W, H, C, N) strides of a tensor
+ * Returns: the WHCN index of the fastest moving dimension
+ */
+int find_packed_dim(const ivec4 strides) {
+ int packed_dim = 0;
+ for (int i = 0; i <= 3; i++) {
+ if (strides[i] == 1) {
+ packed_dim = i;
+ break;
+ }
+ }
+ return packed_dim;
+}
+
//
// (w, h, c, n) Tensor Index <-> Contiguous Buffer Index Conversion
//
@@ -74,27 +89,49 @@ ivec4 from_nchw_buffer_i(int buf_i, ivec4 sizes) {
(buf_i / (sizes.x * sizes.y * sizes.z)));
}
+int to_nchw_buffer_i(const ivec4 tensor_idx, const ivec4 sizes) {
+ return tensor_idx.w * sizes.x * sizes.y * sizes.z +
+ tensor_idx.z * sizes.x * sizes.y + tensor_idx.y * sizes.x + tensor_idx.x;
+}
+
/*
* Input: Texel buffer index, (W, H, C, N) strides of a tensor, which dim is
* packed along a texel
- * Returns: The (x, y, z, n) texel position corresponding to the first element
- * of the texel at the specified buffer index
+ * Returns: The (w, h, c, n) tensor index corresponding to the buffer element
*/
-ivec4 to_tensor_idx(int buf_i, ivec4 strides, int packed_dim) {
+ivec4 to_tensor_idx(int buffer_id, const ivec4 strides, const int packed_dim) {
ivec4 idx;
for (int i = 3; i >= 0; i--) {
if (i != packed_dim) {
- idx[i] = buf_i / strides[i];
- buf_i %= strides[i];
+ idx[i] = buffer_id / strides[i];
+ buffer_id %= strides[i];
}
}
- idx[packed_dim] = buf_i;
+ idx[packed_dim] = buffer_id;
return idx;
}
-int to_texel_idx(const ivec4 texel_pos, ivec4 strides) {
- return texel_pos.x * strides.x + texel_pos.y * strides.y +
- texel_pos.z * strides.z + texel_pos.w * strides.w;
+/*
+ * Input: Texel buffer index, (W, H, C, N) strides of a tensor
+ * Returns: The (w, h, c, n) tensor index corresponding to the buffer element
+ *
+ * This is a convenience overload of the above function. If the packed dim is
+ * not known, it can be found by finding the first dimension with a stride of 1.
+ * However, this process adds some overhead, so if performance is a concern then
+ * the above function should be used instead so that the packed dim is provided.
+ */
+ivec4 to_tensor_idx(int buffer_id, const ivec4 strides) {
+ int packed_dim = find_packed_dim(strides);
+ return to_tensor_idx(buffer_id, strides, packed_dim);
+}
+
+/*
+ * Input: (w, h, c, n) tensor index, (W, H, C, N) strides of the tensor buffer
+ * Returns: the buffer index corresponding to the specified tensor index
+ */
+int to_buffer_id(const ivec4 tensor_idx, ivec4 strides) {
+ return tensor_idx.x * strides.x + tensor_idx.y * strides.y +
+ tensor_idx.z * strides.z + tensor_idx.w * strides.w;
}
//
diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl
new file mode 100644
index 0000000000..d861972f93
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl
@@ -0,0 +1,35 @@
+#version 450 core
+
+#define PRECISION ${PRECISION}
+
+#define T ${buffer_scalar_type(DTYPE)}
+
+#include "indexing_utils.h"
+
+${define_required_extensions(DTYPE)}
+
+layout(std430) buffer;
+
+${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
+${layout_declare_tensor(1, "r", "nchw_in", DTYPE, STORAGE)}
+${layout_declare_ubo(2, "ivec4", "out_sizes")}
+${layout_declare_ubo(3, "ivec4", "out_strides")}
+${layout_declare_ubo(4, "int", "numel")}
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+// This constant is unused in this shader but is kept so that the signature is
+// consistent with nchw_to_image.
+layout(constant_id = 3) const int UNUSED_packed_dim = W_DIM;
+
+void main() {
+ int out_id = int(gl_GlobalInvocationID.x);
+ if (out_id >= numel) {
+ return;
+ }
+
+ ivec4 out_idx = to_tensor_idx(out_id, out_strides);
+ const int in_id = to_nchw_buffer_i(out_idx, out_sizes);
+
+ t_out[out_id] = nchw_in[in_id];
+}
diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml
new file mode 100644
index 0000000000..6292ef9333
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml
@@ -0,0 +1,18 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+nchw_to_buffer:
+ parameter_names_with_default_values:
+ DTYPE: float
+ STORAGE: buffer
+ generate_variant_forall:
+ DTYPE:
+ - VALUE: half
+ - VALUE: float
+ - VALUE: int
+ - VALUE: int8
+ shader_variants:
+ - NAME: nchw_to_buffer
diff --git a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp
index b35d4b0175..b02613c208 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp
@@ -26,7 +26,10 @@ void add_staging_to_tensor_node(
vkapi::ParamsBindList ubos;
if (graph.is_buffer_storage(out_tensor)) {
- ubos.append(graph.numel_ubo(out_tensor));
+ ubos.append(
+ {graph.sizes_ubo(out_tensor),
+ graph.strides_ubo(out_tensor),
+ graph.numel_ubo(out_tensor)});
} else {
ubos.append(graph.sizes_ubo(out_tensor));
}
@@ -61,7 +64,10 @@ void add_tensor_to_staging_node(
vkapi::ParamsBindList ubos;
if (graph.is_buffer_storage(in_tensor)) {
- ubos.append(graph.numel_ubo(in_tensor));
+ ubos.append(
+ {graph.sizes_ubo(in_tensor),
+ graph.strides_ubo(in_tensor),
+ graph.numel_ubo(in_tensor)});
} else {
ubos.append(graph.sizes_ubo(in_tensor));
}
@@ -105,7 +111,7 @@ ValueRef prepack(
vkapi::ParamsBindList ubos;
if (graph.is_buffer_storage(v)) {
- ubos.append(graph.numel_ubo(v));
+ ubos.append({graph.sizes_ubo(v), graph.strides_ubo(v), graph.numel_ubo(v)});
} else {
ubos.append(graph.sizes_ubo(v));
}
diff --git a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp
index daec2666f8..294e36b9a8 100644
--- a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp
+++ b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp
@@ -107,7 +107,7 @@ vkapi::ShaderInfo get_nchw_to_tensor_shader(
}
if (v_dst.storage_type() == utils::kBuffer) {
- kernel_name = "buffer_to_buffer";
+ kernel_name = "nchw_to_buffer";
add_dtype_suffix(kernel_name, v_dst);
return VK_KERNEL_FROM_STR(kernel_name);
}
@@ -131,7 +131,7 @@ vkapi::ShaderInfo get_tensor_to_nchw_shader(
}
if (v_src.storage_type() == utils::kBuffer) {
- kernel_name = "buffer_to_buffer";
+ kernel_name = "buffer_to_nchw";
add_dtype_suffix(kernel_name, v_src);
return VK_KERNEL_FROM_STR(kernel_name);
}
diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp
index 29cd7bf995..e6f2863470 100644
--- a/backends/vulkan/test/utils/test_utils.cpp
+++ b/backends/vulkan/test/utils/test_utils.cpp
@@ -23,15 +23,13 @@ void record_nchw_to_buffer_op(
vkapi::VulkanBuffer& src_buffer,
api::vTensor& v_dst) {
vkapi::PipelineBarrier pipeline_barrier{};
- vkapi::SpecVarList specialization_constants = {
- SV(v_dst.packed_dim_whcn_idx())};
context->submit_compute_job(
get_nchw_to_tensor_shader(v_dst),
pipeline_barrier,
{uint32_t(v_dst.numel()), 1, 1},
{64, 1, 1},
- specialization_constants,
+ {},
VK_NULL_HANDLE,
0,
v_dst.buffer(
@@ -39,6 +37,8 @@ void record_nchw_to_buffer_op(
vkapi::PipelineStage::COMPUTE,
vkapi::MemoryAccessType::WRITE),
src_buffer,
+ v_dst.sizes_ubo(),
+ v_dst.strides_ubo(),
v_dst.numel_ubo());
}
@@ -47,19 +47,18 @@ void record_buffer_to_nchw_op(
api::vTensor& v_src,
vkapi::VulkanBuffer& dst_buffer) {
vkapi::PipelineBarrier pipeline_barrier{};
- vkapi::SpecVarList specialization_constants = {
- SV(v_src.packed_dim_whcn_idx())};
-
context->submit_compute_job(
get_tensor_to_nchw_shader(v_src),
pipeline_barrier,
{uint32_t(v_src.numel()), 1, 1},
{64, 1, 1},
- specialization_constants,
+ {},
VK_NULL_HANDLE,
0,
dst_buffer,
v_src.buffer(pipeline_barrier, vkapi::PipelineStage::COMPUTE),
+ v_src.sizes_ubo(),
+ v_src.strides_ubo(),
v_src.numel_ubo());
}
From 3e0eb0ff0dfd74eb373a79ddf5787441f3745a44 Mon Sep 17 00:00:00 2001
From: Lunwen He
Date: Mon, 12 Aug 2024 14:55:02 -0700
Subject: [PATCH 04/11] Do not print eos (#4654)
* allow models to use customized token ids during export (#4649)
Summary:
LLama3.1's [bos and eos](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct/blob/main/tokenizer_config.json) are different from what is hardcoded in the code. This PR updates the export flow to allow read customized token ids instead of hardcoded ones.
It also deletes a few metadata entries that are not used by the runner.
Pull Request resolved: https://github.com/pytorch/executorch/pull/4649
Differential Revision: D61044259
Pulled By: helunwencser
* Do not print eos
Summary: We don't want to print eos in the response because some eos tokens could be `<|end_of_text|>`.
Differential Revision: D61048254
---------
Co-authored-by: Lunwen He
---
examples/models/llama2/runner/runner.cpp | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/examples/models/llama2/runner/runner.cpp b/examples/models/llama2/runner/runner.cpp
index a44b56d5d3..aa711b50e2 100644
--- a/examples/models/llama2/runner/runner.cpp
+++ b/examples/models/llama2/runner/runner.cpp
@@ -228,19 +228,19 @@ Error Runner::generate(
tokens_managed.resize({1, static_cast(token_data.size())});
}
- // print the token as string, decode it with the Tokenizer object
- wrapped_callback(ET_UNWRAP(tokenizer_->decode(prev_token, cur_token)));
-
- if (shouldStop_) {
- break;
- }
-
// data-dependent terminating condition: we have n_eos_ number of EOS
if (pos >= num_prompt_tokens && cur_token == eos_id_) {
printf("\n");
ET_LOG(Info, "\nReached to the end of generation");
break;
}
+
+ // print the token as string, decode it with the Tokenizer object
+ wrapped_callback(ET_UNWRAP(tokenizer_->decode(prev_token, cur_token)));
+
+ if (shouldStop_) {
+ break;
+ }
}
stats_.inference_end_ms = util::time_in_ms();
printf("\n");
From b165c2827b03ba3c860c39295abc46fbc0d87824 Mon Sep 17 00:00:00 2001
From: Jacob Szwejbka
Date: Mon, 12 Aug 2024 15:47:47 -0700
Subject: [PATCH 05/11] Implement load_into for file data loader
Differential Revision: D61147536
Pull Request resolved: https://github.com/pytorch/executorch/pull/4671
---
extension/data_loader/file_data_loader.cpp | 109 +++++++++++++--------
extension/data_loader/file_data_loader.h | 6 ++
2 files changed, 74 insertions(+), 41 deletions(-)
diff --git a/extension/data_loader/file_data_loader.cpp b/extension/data_loader/file_data_loader.cpp
index 7b041fef00..bf06d0c9be 100644
--- a/extension/data_loader/file_data_loader.cpp
+++ b/extension/data_loader/file_data_loader.cpp
@@ -49,7 +49,6 @@ static uint8_t* align_pointer(void* ptr, size_t alignment) {
addr = (addr | (alignment - 1)) + 1;
return reinterpret_cast(addr);
}
-
} // namespace
FileDataLoader::~FileDataLoader() {
@@ -143,19 +142,6 @@ Result FileDataLoader::load(
return FreeableBuffer(nullptr, 0, /*free_fn=*/nullptr);
}
- // Seek to the right place in the file.
- off_t seek_offset = ::lseek(fd_, offset, SEEK_SET);
- if (seek_offset != offset) {
- ET_LOG(
- Error,
- "Seeking %s to offset %zu returned %zd: %s",
- file_name_,
- offset,
- (ssize_t)seek_offset,
- strerror(errno));
- return Error::AccessFailed;
- }
-
// Allocate memory for the FreeableBuffer.
size_t alloc_size = size;
if (alignment_ > alignof(std::max_align_t)) {
@@ -187,9 +173,75 @@ Result FileDataLoader::load(
buffer,
alloc_size);
+ auto err = load_into(offset, size, segment_info, aligned_buffer);
+ if (err != Error::Ok) {
+ // Free `buffer`, which is what malloc() gave us, not `aligned_buffer`.
+ std::free(buffer);
+ return err;
+ }
+
+ // We can't naively free this pointer, since it may not be what malloc() gave
+ // us. Pass the offset to the real buffer as context. This is the number of
+ // bytes that need to be subtracted from the FreeableBuffer::data() pointer to
+ // find the actual pointer to free.
+ return FreeableBuffer(
+ aligned_buffer,
+ size,
+ FreeSegment,
+ /*free_fn_context=*/
+ reinterpret_cast(
+ // Using signed types here because it will produce a signed ptrdiff_t
+ // value, though for us it will always be non-negative.
+ reinterpret_cast(aligned_buffer) -
+ reinterpret_cast(buffer)));
+}
+
+Result FileDataLoader::size() const {
+ ET_CHECK_OR_RETURN_ERROR(
+ // Probably had its value moved to another instance.
+ fd_ >= 0,
+ InvalidState,
+ "Uninitialized");
+ return file_size_;
+}
+
+__ET_NODISCARD Error FileDataLoader::load_into(
+ size_t offset,
+ size_t size,
+ __ET_UNUSED const SegmentInfo& segment_info,
+ void* buffer) {
+ ET_CHECK_OR_RETURN_ERROR(
+ // Probably had its value moved to another instance.
+ fd_ >= 0,
+ InvalidState,
+ "Uninitialized");
+ ET_CHECK_OR_RETURN_ERROR(
+ offset + size <= file_size_,
+ InvalidArgument,
+ "File %s: offset %zu + size %zu > file_size_ %zu",
+ file_name_,
+ offset,
+ size,
+ file_size_);
+ ET_CHECK_OR_RETURN_ERROR(
+ buffer != nullptr, InvalidArgument, "Provided buffer cannot be null");
+
+ // Seek to the right place in the file.
+ off_t seek_offset = ::lseek(fd_, offset, SEEK_SET);
+ if (seek_offset != offset) {
+ ET_LOG(
+ Error,
+ "Seeking %s to offset %zu returned %zd: %s",
+ file_name_,
+ offset,
+ (ssize_t)seek_offset,
+ strerror(errno));
+ return Error::AccessFailed;
+ }
+
// Read the data into the aligned address.
size_t needed = size;
- uint8_t* buf = reinterpret_cast(aligned_buffer);
+ uint8_t* buf = reinterpret_cast(buffer);
while (needed > 0) {
// Reads on macos will fail with EINVAL if size > INT32_MAX.
ssize_t nread = ::read(
@@ -211,37 +263,12 @@ Result FileDataLoader::load(
size,
offset,
nread == 0 ? "EOF" : strerror(errno));
- // Free `buffer`, which is what malloc() gave us, not `aligned_buffer`.
- std::free(buffer);
return Error::AccessFailed;
}
needed -= nread;
buf += nread;
}
-
- // We can't naively free this pointer, since it may not be what malloc() gave
- // us. Pass the offset to the real buffer as context. This is the number of
- // bytes that need to be subtracted from the FreeableBuffer::data() pointer to
- // find the actual pointer to free.
- return FreeableBuffer(
- aligned_buffer,
- size,
- FreeSegment,
- /*free_fn_context=*/
- reinterpret_cast(
- // Using signed types here because it will produce a signed ptrdiff_t
- // value, though for us it will always be non-negative.
- reinterpret_cast(aligned_buffer) -
- reinterpret_cast(buffer)));
-}
-
-Result FileDataLoader::size() const {
- ET_CHECK_OR_RETURN_ERROR(
- // Probably had its value moved to another instance.
- fd_ >= 0,
- InvalidState,
- "Uninitialized");
- return file_size_;
+ return Error::Ok;
}
} // namespace util
diff --git a/extension/data_loader/file_data_loader.h b/extension/data_loader/file_data_loader.h
index c6ab25933a..b7cfe3a1b9 100644
--- a/extension/data_loader/file_data_loader.h
+++ b/extension/data_loader/file_data_loader.h
@@ -72,6 +72,12 @@ class FileDataLoader : public DataLoader {
__ET_NODISCARD Result size() const override;
+ __ET_NODISCARD Error load_into(
+ size_t offset,
+ size_t size,
+ __ET_UNUSED const SegmentInfo& segment_info,
+ void* buffer) override;
+
private:
FileDataLoader(
int fd,
From b6de6ed49521109ac606435b940eb821e21b6af2 Mon Sep 17 00:00:00 2001
From: Guang Yang <42389959+guangy10@users.noreply.github.com>
Date: Mon, 12 Aug 2024 16:53:07 -0700
Subject: [PATCH 06/11] Fix periodic run and model name for benchmarking
Differential Revision: D61054615
Pull Request resolved: https://github.com/pytorch/executorch/pull/4642
---
.ci/scripts/test_llama.sh | 4 ++--
.github/workflows/android-perf.yml | 16 ++++++++++++++++
2 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
index ae795b12ab..4e1cb99cc0 100644
--- a/.ci/scripts/test_llama.sh
+++ b/.ci/scripts/test_llama.sh
@@ -130,9 +130,9 @@ cleanup_files() {
prepare_artifacts_upload() {
if [ -n "$UPLOAD_DIR" ]; then
echo "Preparing for uploading generated artifacs"
+ zip -j model.zip "${EXPORTED_MODEL_NAME}" tokenizer.bin
mkdir -p "${UPLOAD_DIR}"
- zip -j "model.zip" "${MODEL_NAME}" tokenizer.bin
- cp "model.zip" "${UPLOAD_DIR}"
+ mv model.zip "${UPLOAD_DIR}"
fi
}
diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
index 4f8b216a54..d8b2f70c73 100644
--- a/.github/workflows/android-perf.yml
+++ b/.github/workflows/android-perf.yml
@@ -48,11 +48,27 @@ jobs:
- name: Set parameters
id: set-parameters
shell: bash
+ env:
+ # Separate default values from the workflow dispatch. To ensure defaults are accessible
+ # during scheduled runs and to provide flexibility for different defaults between
+ # on-demand and periodic benchmarking.
+ CRON_DEFAULT_MODELS: "stories110M"
+ CRON_DEFAULT_DEVICES: "samsung_galaxy_s2x"
+ CRON_DEFAULT_DELEGATES: "xnnpack"
run: |
set -ex
MODELS="${{ inputs.models }}"
+ if [ -z "$MODELS" ]; then
+ MODELS="$CRON_DEFAULT_MODELS"
+ fi
DEVICES="${{ inputs.devices }}"
+ if [ -z "$DEVICES" ]; then
+ DEVICES="$CRON_DEFAULT_DEVICES"
+ fi
DELEGATES="${{ inputs.delegates }}"
+ if [ -z "$DELEGATES" ]; then
+ DELEGATES="$CRON_DEFAULT_DELEGATES"
+ fi
# Mapping devices to their corresponding device-pool-arn
declare -A DEVICE_POOL_ARNS
From 5e9bab8c5956249e75a0f187bf8075df97ca2555 Mon Sep 17 00:00:00 2001
From: Hansong <107070759+kirklandsign@users.noreply.github.com>
Date: Mon, 12 Aug 2024 17:07:23 -0700
Subject: [PATCH 07/11] Delete dead code
Differential Revision: D61166041
Pull Request resolved: https://github.com/pytorch/executorch/pull/4678
---------
Co-authored-by: helunwencser
---
examples/models/llama2/runner/generation.py | 78 +--------------------
1 file changed, 1 insertion(+), 77 deletions(-)
diff --git a/examples/models/llama2/runner/generation.py b/examples/models/llama2/runner/generation.py
index 56a15005ef..404ff4717e 100644
--- a/examples/models/llama2/runner/generation.py
+++ b/examples/models/llama2/runner/generation.py
@@ -14,11 +14,7 @@
import torch.nn.functional as F
from executorch.examples.models.llama2.llama_transformer import ModelArgs
-from executorch.examples.models.llama2.tokenizer.tiktoken import (
- Dialog,
- Message,
- Tokenizer,
-)
+from executorch.examples.models.llama2.tokenizer.tiktoken import Tokenizer
from executorch.extension.pybindings.portable_lib import _load_for_executorch
@@ -28,12 +24,6 @@ class CompletionPrediction(TypedDict, total=False):
logprobs: List[float] # not required
-class ChatPrediction(TypedDict, total=False):
- generation: Message
- tokens: List[str] # not required
- logprobs: List[float] # not required
-
-
def sample_top_p(probs, p):
"""
Perform top-p (nucleus) sampling on a probability distribution.
@@ -225,72 +215,6 @@ def text_completion(
]
return [{"generation": self.tokenizer.decode(t)} for t in generation_tokens]
- def chat_completion(
- self,
- dialogs: List[Dialog],
- temperature: float = 0.6,
- top_p: float = 0.9,
- max_gen_len: Optional[int] = None,
- logprobs: bool = False,
- ) -> List[ChatPrediction]:
- """
- Generate assistant responses for a list of conversational dialogs using the language generation model.
-
- Args:
- dialogs (List[Dialog]): List of conversational dialogs, where each dialog is a list of messages.
- temperature (float, optional): Temperature value for controlling randomness in sampling. Defaults to 0.6.
- top_p (float, optional): Top-p probability threshold for nucleus sampling. Defaults to 0.9.
- max_gen_len (Optional[int], optional): Maximum length of the generated response sequence.
- If not provided, it's set to the model's maximum sequence length minus 1.
- logprobs (bool, optional): Flag indicating whether to compute token log probabilities. Defaults to False.
-
- Returns:
- List[ChatPrediction]: List of chat predictions, each containing the assistant's generated response.
-
- Raises:
- AssertionError: If the last message in a dialog is not from the user.
- AssertionError: If the dialog roles are not in the required 'user', 'assistant', and optional 'system' order.
-
- Note:
- This method generates assistant responses for the provided conversational dialogs.
- It employs nucleus sampling to introduce controlled randomness in text generation.
- If logprobs is True, token log probabilities are computed for each generated token.
- """
- if max_gen_len is None:
- max_gen_len = self.model.params.max_seq_len - 1
-
- prompt_tokens = [
- self.formatter.encode_dialog_prompt(dialog) for dialog in dialogs
- ]
- generation_tokens, generation_logprobs = self.generate(
- prompt_tokens=prompt_tokens,
- max_gen_len=max_gen_len,
- temperature=temperature,
- top_p=top_p,
- logprobs=logprobs,
- )
- if logprobs:
- return [
- {
- "generation": {
- "role": "assistant",
- "content": self.tokenizer.decode(t),
- },
- "tokens": [self.tokenizer.decode([x]) for x in t],
- "logprobs": logprobs_i,
- }
- for t, logprobs_i in zip(generation_tokens, generation_logprobs)
- ]
- return [
- {
- "generation": {
- "role": "assistant",
- "content": self.tokenizer.decode(t),
- },
- }
- for t in generation_tokens
- ]
-
def build_args_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser()
From 56f843b25badf4c1f618a9b355d4885721eec512 Mon Sep 17 00:00:00 2001
From: Anthony Shoumikhin
Date: Mon, 12 Aug 2024 17:50:02 -0700
Subject: [PATCH 08/11] Move metadata util to extension/llm/runner.
Differential Revision: D61108863
Pull Request resolved: https://github.com/pytorch/executorch/pull/4664
---
examples/models/llama2/runner/runner.cpp | 2 +-
examples/models/llama2/runner/targets.bzl | 1 +
extension/{module => llm/runner}/metadata_util.h | 0
extension/llm/runner/targets.bzl | 11 +++++++++++
extension/module/targets.bzl | 1 -
5 files changed, 13 insertions(+), 2 deletions(-)
rename extension/{module => llm/runner}/metadata_util.h (100%)
diff --git a/examples/models/llama2/runner/runner.cpp b/examples/models/llama2/runner/runner.cpp
index aa711b50e2..6bbbc05736 100644
--- a/examples/models/llama2/runner/runner.cpp
+++ b/examples/models/llama2/runner/runner.cpp
@@ -16,7 +16,7 @@
#include
#endif /* ET_USE_TIKTOKEN*/
#include
-#include
+#include
#include
#include
diff --git a/examples/models/llama2/runner/targets.bzl b/examples/models/llama2/runner/targets.bzl
index 9800430b1f..2d0f1d5fe5 100644
--- a/examples/models/llama2/runner/targets.bzl
+++ b/examples/models/llama2/runner/targets.bzl
@@ -32,6 +32,7 @@ def define_common_targets():
],
exported_deps = [
"//executorch/backends/xnnpack:xnnpack_backend",
+ "//executorch/extension/llm/runner:metadata_util" + aten_suffix,
"//executorch/extension/llm/runner:stats",
"//executorch/extension/llm/runner:text_decoder_runner" + aten_suffix,
"//executorch/extension/llm/runner:text_prefiller" + aten_suffix,
diff --git a/extension/module/metadata_util.h b/extension/llm/runner/metadata_util.h
similarity index 100%
rename from extension/module/metadata_util.h
rename to extension/llm/runner/metadata_util.h
diff --git a/extension/llm/runner/targets.bzl b/extension/llm/runner/targets.bzl
index 2e37547437..30241169ae 100644
--- a/extension/llm/runner/targets.bzl
+++ b/extension/llm/runner/targets.bzl
@@ -44,3 +44,14 @@ def define_common_targets():
"//executorch/extension/runner_util:managed_tensor" + aten_suffix,
],
)
+
+ runtime.cxx_library(
+ name = "metadata_util" + aten_suffix,
+ exported_headers = ["metadata_util.h"],
+ visibility = [
+ "@EXECUTORCH_CLIENTS",
+ ],
+ exported_deps = [
+ "//executorch/extension/module:module" + aten_suffix,
+ ],
+ )
diff --git a/extension/module/targets.bzl b/extension/module/targets.bzl
index 07020b03a8..61251047dc 100644
--- a/extension/module/targets.bzl
+++ b/extension/module/targets.bzl
@@ -17,7 +17,6 @@ def define_common_targets():
],
exported_headers = [
"module.h",
- "metadata_util.h",
],
visibility = [
"@EXECUTORCH_CLIENTS",
From e71fa03091149ecaffc13fb8a67dfd2776ecc0f4 Mon Sep 17 00:00:00 2001
From: cccclai
Date: Mon, 12 Aug 2024 21:25:24 -0700
Subject: [PATCH 09/11] Add stories ci for qnn
Differential Revision: D61141050
Pull Request resolved: https://github.com/pytorch/executorch/pull/4662
---
.ci/docker/build.sh | 8 +++++
.ci/docker/ubuntu/Dockerfile | 5 +++
.ci/scripts/build-qnn-sdk.sh | 19 ++++++++++
.ci/scripts/setup-qnn-deps.sh | 29 +++++++++++++++
.ci/scripts/test_llama.sh | 24 +++++++++++++
.github/workflows/docker-builds.yml | 1 +
.github/workflows/trunk.yml | 35 +++++++++++++++++++
.../qualcomm/aot/python/PyQnnWrapperAdaptor.h | 2 +-
backends/qualcomm/scripts/build.sh | 4 +--
9 files changed, 124 insertions(+), 3 deletions(-)
create mode 100644 .ci/scripts/build-qnn-sdk.sh
create mode 100644 .ci/scripts/setup-qnn-deps.sh
diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh
index ad2f713466..59d5daa8b8 100755
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@@ -37,6 +37,10 @@ case "${IMAGE_NAME}" in
ARM_SDK=yes
CLANG_VERSION=12
;;
+ executorch-ubuntu-22.04-qnn-sdk)
+ QNN_SDK=yes
+ CLANG_VERSION=12
+ ;;
executorch-ubuntu-22.04-clang12-android)
LINTRUNNER=""
CLANG_VERSION=12
@@ -59,6 +63,9 @@ cp ../../requirements-lintrunner.txt ./
# with a new image hash when the content here is updated
cp -r ../../examples/arm/ ./arm
+# Copy qnn setup script from root to here
+cp -r ../../backends/qualcomm/ ./qualcomm
+
docker build \
--no-cache \
--progress=plain \
@@ -72,6 +79,7 @@ docker build \
--build-arg "LINTRUNNER=${LINTRUNNER:-}" \
--build-arg "BUILD_DOCS=${BUILD_DOCS}" \
--build-arg "ARM_SDK=${ARM_SDK:-}" \
+ --build-arg "QNN_SDK=${QNN_SDK:-}" \
--build-arg "ANDROID_NDK_VERSION=${ANDROID_NDK_VERSION:-}" \
-f "${OS}"/Dockerfile \
"$@" \
diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile
index 449cd14b6b..2aa9f24b67 100644
--- a/.ci/docker/ubuntu/Dockerfile
+++ b/.ci/docker/ubuntu/Dockerfile
@@ -82,5 +82,10 @@ COPY --chown=ci-user:ci-user ./arm /opt/arm
# Set up ARM SDK if needed
RUN if [ -n "${ARM_SDK}" ]; then git config --global user.email "ossci@example.com"; git config --global user.name "OSS CI"; bash /opt/arm/setup.sh --i-agree-to-the-contained-eula /opt/arm-sdk; chown -R ci-user:ci-user /opt/arm-sdk; fi
+ARG QNN_SDK
+COPY --chown=ci-user:ci-user ./qualcomm /opt/qualcomm
+# Set up QNN SDK if needed
+RUN if [ -n "${QNN_SDK}" ]; then git config --global user.email "ossci@example.com"; git config --global user.name "OSS CI"; fi
+
USER ci-user
CMD ["bash"]
diff --git a/.ci/scripts/build-qnn-sdk.sh b/.ci/scripts/build-qnn-sdk.sh
new file mode 100644
index 0000000000..d912069b06
--- /dev/null
+++ b/.ci/scripts/build-qnn-sdk.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -ex
+
+build_qnn_backend() {
+ echo "Start building qnn backend."
+ export ANDROID_NDK_ROOT=/opt/ndk
+ export QNN_SDK_ROOT=/tmp/qnn/2.23.0.240531
+ export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
+
+ bash backends/qualcomm/scripts/build.sh --skip_aarch64 --job_number 2 --release
+}
+
+build_qnn_backend
diff --git a/.ci/scripts/setup-qnn-deps.sh b/.ci/scripts/setup-qnn-deps.sh
new file mode 100644
index 0000000000..3b39e1aafe
--- /dev/null
+++ b/.ci/scripts/setup-qnn-deps.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -ex
+
+install_qnn() {
+ echo "Start installing qnn."
+ QNN_INSTALLATION_DIR=/tmp/qnn
+ mkdir -p "${QNN_INSTALLATION_DIR}"
+
+ curl -Lo /tmp/v2.23.0.24.06.24.zip "https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.23.0.24.06.24.zip"
+ echo "Finishing downloading qnn sdk."
+ unzip -qo /tmp/v2.23.0.24.06.24.zip -d /tmp
+ echo "Finishing unzip qnn sdk."
+
+
+ # Print the content for manual verification
+ ls -lah "/tmp/qairt"
+ mv "/tmp/qairt"/* "${QNN_INSTALLATION_DIR}"
+ echo "Finishing installing qnn '${QNN_INSTALLATION_DIR}' ."
+
+ ls -lah "${QNN_INSTALLATION_DIR}"
+}
+
+install_qnn
diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
index 4e1cb99cc0..30bbefb78b 100644
--- a/.ci/scripts/test_llama.sh
+++ b/.ci/scripts/test_llama.sh
@@ -72,6 +72,25 @@ fi
echo "COREML option ${COREML}"
+if [[ "${MODE}" =~ .*qnn.* ]]; then
+ QNN=ON
+ export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
+ export QNN_SDK_ROOT=/tmp/qnn/2.23.0.240531
+ export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
+ export PYTHONPATH=".."
+ cp schema/program.fbs exir/_serialize/program.fbs
+ cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
+ cp -f build-x86/backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
+ cp -f build-x86/backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
+
+else
+ QNN=OFF
+ QNN_SDK_ROOT=""
+fi
+
+echo "QNN option ${QNN}"
+echo "QNN_SDK_ROOT: ${QNN_SDK_ROOT}"
+
if [[ -z "${BUCK:-}" ]]; then
BUCK=buck2
fi
@@ -96,6 +115,8 @@ cmake_install_executorch_libraries() {
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
-DEXECUTORCH_BUILD_MPS="$MPS" \
-DEXECUTORCH_BUILD_COREML="$COREML" \
+ -DEXECUTORCH_BUILD_QNN="$QNN" \
+ -DQNN_SDK_ROOT="$QNN_SDK_ROOT" \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-Bcmake-out .
cmake --build cmake-out -j9 --target install --config Debug
@@ -176,6 +197,9 @@ fi
if [[ "${COREML}" == "ON" ]]; then
EXPORT_ARGS="${EXPORT_ARGS} -kv -v --coreml --disable_dynamic_shape"
fi
+if [[ "${QNN}" == "ON" ]]; then
+ EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
+fi
# Add dynamically linked library location
$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml
index f773f3aca8..d256af7fcd 100644
--- a/.github/workflows/docker-builds.yml
+++ b/.github/workflows/docker-builds.yml
@@ -38,6 +38,7 @@ jobs:
- docker-image-name: executorch-ubuntu-22.04-clang12
- docker-image-name: executorch-ubuntu-22.04-linter
- docker-image-name: executorch-ubuntu-22.04-arm-sdk
+ - docker-image-name: executorch-ubuntu-22.04-qnn-sdk
- docker-image-name: executorch-ubuntu-22.04-clang12-android
env:
DOCKER_IMAGE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/executorch/${{ matrix.docker-image-name }}
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
index 9b28d26048..86e44e647d 100644
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@@ -270,3 +270,38 @@ jobs:
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama2/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
+
+
+ test-llama-runner-qnn-linux:
+ name: test-llama-runner-qnn-linux
+ uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+ strategy:
+ matrix:
+ dtype: [fp32]
+ build-tool: [cmake]
+ mode: [qnn]
+ fail-fast: false
+ with:
+ runner: linux.2xlarge
+ docker-image: executorch-ubuntu-22.04-qnn-sdk
+ submodules: 'true'
+ ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+ timeout: 900
+ script: |
+ # The generic Linux job chooses to use base env, not the one setup by the image
+ CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+ conda activate "${CONDA_ENV}"
+
+ DTYPE=${{ matrix.dtype }}
+ BUILD_TOOL=${{ matrix.build-tool }}
+ MODE=${{ matrix.mode }}
+
+ PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+ PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+
+ # Setup executorch
+ PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
+ # Install requirements for export_llama
+ PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
+ # Test llama2
+ PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
diff --git a/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h b/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h
index f13b5962b7..98219d9763 100644
--- a/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h
+++ b/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h
@@ -86,7 +86,7 @@ class PyQnnOpWrapper {
break;
default:
QNN_EXECUTORCH_LOG_ERROR(
- "%s has invalid data type: %d", name, data_type);
+ "%s has invalid data type: %d", name.c_str(), data_type);
break;
}
}
diff --git a/backends/qualcomm/scripts/build.sh b/backends/qualcomm/scripts/build.sh
index be317a2d64..aafd6252e7 100755
--- a/backends/qualcomm/scripts/build.sh
+++ b/backends/qualcomm/scripts/build.sh
@@ -25,9 +25,9 @@ usage() {
[ "$1" = -h ] && usage
BUILD_X86_64="true"
-CMAKE_X86_64="cmake-out"
+CMAKE_X86_64="build-x86"
BUILD_AARCH64="true"
-CMAKE_AARCH64="cmake-out-android"
+CMAKE_AARCH64="build-android"
CLEAN="true"
BUILD_TYPE="Debug"
BUILD_JOB_NUMBER="16"
From 2117c1a2f31931ff3de06ca04d669cfeffbf4f09 Mon Sep 17 00:00:00 2001
From: cccclai
Date: Mon, 12 Aug 2024 23:09:09 -0700
Subject: [PATCH 10/11] add a list for ops to be added
Differential Revision: D61002938
Pull Request resolved: https://github.com/pytorch/executorch/pull/4624
---
backends/qualcomm/partition/common_defs.py | 4 ++++
backends/qualcomm/partition/qnn_partitioner.py | 12 +++++++++++-
2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/backends/qualcomm/partition/common_defs.py b/backends/qualcomm/partition/common_defs.py
index c60afc2dd3..353169bc18 100644
--- a/backends/qualcomm/partition/common_defs.py
+++ b/backends/qualcomm/partition/common_defs.py
@@ -16,6 +16,10 @@
exir_ops.edge.aten.copy.default,
]
+to_be_implemented_operator = [
+ exir_ops.edge.aten.where.default,
+]
+
allow_list_operator = [
_operator.getitem,
]
diff --git a/backends/qualcomm/partition/qnn_partitioner.py b/backends/qualcomm/partition/qnn_partitioner.py
index c3afc23dae..86028d0d44 100644
--- a/backends/qualcomm/partition/qnn_partitioner.py
+++ b/backends/qualcomm/partition/qnn_partitioner.py
@@ -27,7 +27,11 @@
from torch.fx.passes.infra.partitioner import Partition
from torch.fx.passes.operator_support import OperatorSupportBase
-from .common_defs import allow_list_operator, not_supported_operator
+from .common_defs import (
+ allow_list_operator,
+ not_supported_operator,
+ to_be_implemented_operator,
+)
class QnnOperatorSupport(OperatorSupportBase):
@@ -62,6 +66,12 @@ def is_node_supported(self, _, node: torch.fx.Node) -> bool:
if node.op != "call_function" or node.target in not_supported_operator:
return False
+ if node.target in to_be_implemented_operator:
+ print(
+ f"[QNN Partitioner Op Support]: {node.target.__name__} | Skipped, this op can be supported, please report an issue in https://github.com/pytorch/executorch/issues"
+ )
+ return False
+
if node.target in allow_list_operator:
return True
From 2654f596297935840144e96fe1f1f691517dc6ec Mon Sep 17 00:00:00 2001
From: cccclai
Date: Tue, 13 Aug 2024 00:33:48 -0700
Subject: [PATCH 11/11] Back out "Add stories ci for qnn"
Differential Revision: D61192484
Pull Request resolved: https://github.com/pytorch/executorch/pull/4685
---
.ci/docker/build.sh | 8 -----
.ci/docker/ubuntu/Dockerfile | 5 ---
.ci/scripts/build-qnn-sdk.sh | 19 ----------
.ci/scripts/setup-qnn-deps.sh | 29 ---------------
.ci/scripts/test_llama.sh | 24 -------------
.github/workflows/docker-builds.yml | 1 -
.github/workflows/trunk.yml | 35 -------------------
.../qualcomm/aot/python/PyQnnWrapperAdaptor.h | 2 +-
backends/qualcomm/scripts/build.sh | 4 +--
9 files changed, 3 insertions(+), 124 deletions(-)
delete mode 100644 .ci/scripts/build-qnn-sdk.sh
delete mode 100644 .ci/scripts/setup-qnn-deps.sh
diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh
index 59d5daa8b8..ad2f713466 100755
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@@ -37,10 +37,6 @@ case "${IMAGE_NAME}" in
ARM_SDK=yes
CLANG_VERSION=12
;;
- executorch-ubuntu-22.04-qnn-sdk)
- QNN_SDK=yes
- CLANG_VERSION=12
- ;;
executorch-ubuntu-22.04-clang12-android)
LINTRUNNER=""
CLANG_VERSION=12
@@ -63,9 +59,6 @@ cp ../../requirements-lintrunner.txt ./
# with a new image hash when the content here is updated
cp -r ../../examples/arm/ ./arm
-# Copy qnn setup script from root to here
-cp -r ../../backends/qualcomm/ ./qualcomm
-
docker build \
--no-cache \
--progress=plain \
@@ -79,7 +72,6 @@ docker build \
--build-arg "LINTRUNNER=${LINTRUNNER:-}" \
--build-arg "BUILD_DOCS=${BUILD_DOCS}" \
--build-arg "ARM_SDK=${ARM_SDK:-}" \
- --build-arg "QNN_SDK=${QNN_SDK:-}" \
--build-arg "ANDROID_NDK_VERSION=${ANDROID_NDK_VERSION:-}" \
-f "${OS}"/Dockerfile \
"$@" \
diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile
index 2aa9f24b67..449cd14b6b 100644
--- a/.ci/docker/ubuntu/Dockerfile
+++ b/.ci/docker/ubuntu/Dockerfile
@@ -82,10 +82,5 @@ COPY --chown=ci-user:ci-user ./arm /opt/arm
# Set up ARM SDK if needed
RUN if [ -n "${ARM_SDK}" ]; then git config --global user.email "ossci@example.com"; git config --global user.name "OSS CI"; bash /opt/arm/setup.sh --i-agree-to-the-contained-eula /opt/arm-sdk; chown -R ci-user:ci-user /opt/arm-sdk; fi
-ARG QNN_SDK
-COPY --chown=ci-user:ci-user ./qualcomm /opt/qualcomm
-# Set up QNN SDK if needed
-RUN if [ -n "${QNN_SDK}" ]; then git config --global user.email "ossci@example.com"; git config --global user.name "OSS CI"; fi
-
USER ci-user
CMD ["bash"]
diff --git a/.ci/scripts/build-qnn-sdk.sh b/.ci/scripts/build-qnn-sdk.sh
deleted file mode 100644
index d912069b06..0000000000
--- a/.ci/scripts/build-qnn-sdk.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-set -ex
-
-build_qnn_backend() {
- echo "Start building qnn backend."
- export ANDROID_NDK_ROOT=/opt/ndk
- export QNN_SDK_ROOT=/tmp/qnn/2.23.0.240531
- export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
-
- bash backends/qualcomm/scripts/build.sh --skip_aarch64 --job_number 2 --release
-}
-
-build_qnn_backend
diff --git a/.ci/scripts/setup-qnn-deps.sh b/.ci/scripts/setup-qnn-deps.sh
deleted file mode 100644
index 3b39e1aafe..0000000000
--- a/.ci/scripts/setup-qnn-deps.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-set -ex
-
-install_qnn() {
- echo "Start installing qnn."
- QNN_INSTALLATION_DIR=/tmp/qnn
- mkdir -p "${QNN_INSTALLATION_DIR}"
-
- curl -Lo /tmp/v2.23.0.24.06.24.zip "https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.23.0.24.06.24.zip"
- echo "Finishing downloading qnn sdk."
- unzip -qo /tmp/v2.23.0.24.06.24.zip -d /tmp
- echo "Finishing unzip qnn sdk."
-
-
- # Print the content for manual verification
- ls -lah "/tmp/qairt"
- mv "/tmp/qairt"/* "${QNN_INSTALLATION_DIR}"
- echo "Finishing installing qnn '${QNN_INSTALLATION_DIR}' ."
-
- ls -lah "${QNN_INSTALLATION_DIR}"
-}
-
-install_qnn
diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
index 30bbefb78b..4e1cb99cc0 100644
--- a/.ci/scripts/test_llama.sh
+++ b/.ci/scripts/test_llama.sh
@@ -72,25 +72,6 @@ fi
echo "COREML option ${COREML}"
-if [[ "${MODE}" =~ .*qnn.* ]]; then
- QNN=ON
- export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
- export QNN_SDK_ROOT=/tmp/qnn/2.23.0.240531
- export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
- export PYTHONPATH=".."
- cp schema/program.fbs exir/_serialize/program.fbs
- cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
- cp -f build-x86/backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
- cp -f build-x86/backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
-
-else
- QNN=OFF
- QNN_SDK_ROOT=""
-fi
-
-echo "QNN option ${QNN}"
-echo "QNN_SDK_ROOT: ${QNN_SDK_ROOT}"
-
if [[ -z "${BUCK:-}" ]]; then
BUCK=buck2
fi
@@ -115,8 +96,6 @@ cmake_install_executorch_libraries() {
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
-DEXECUTORCH_BUILD_MPS="$MPS" \
-DEXECUTORCH_BUILD_COREML="$COREML" \
- -DEXECUTORCH_BUILD_QNN="$QNN" \
- -DQNN_SDK_ROOT="$QNN_SDK_ROOT" \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-Bcmake-out .
cmake --build cmake-out -j9 --target install --config Debug
@@ -197,9 +176,6 @@ fi
if [[ "${COREML}" == "ON" ]]; then
EXPORT_ARGS="${EXPORT_ARGS} -kv -v --coreml --disable_dynamic_shape"
fi
-if [[ "${QNN}" == "ON" ]]; then
- EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
-fi
# Add dynamically linked library location
$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml
index d256af7fcd..f773f3aca8 100644
--- a/.github/workflows/docker-builds.yml
+++ b/.github/workflows/docker-builds.yml
@@ -38,7 +38,6 @@ jobs:
- docker-image-name: executorch-ubuntu-22.04-clang12
- docker-image-name: executorch-ubuntu-22.04-linter
- docker-image-name: executorch-ubuntu-22.04-arm-sdk
- - docker-image-name: executorch-ubuntu-22.04-qnn-sdk
- docker-image-name: executorch-ubuntu-22.04-clang12-android
env:
DOCKER_IMAGE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/executorch/${{ matrix.docker-image-name }}
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
index 86e44e647d..9b28d26048 100644
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@@ -270,38 +270,3 @@ jobs:
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama2/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
-
-
- test-llama-runner-qnn-linux:
- name: test-llama-runner-qnn-linux
- uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
- strategy:
- matrix:
- dtype: [fp32]
- build-tool: [cmake]
- mode: [qnn]
- fail-fast: false
- with:
- runner: linux.2xlarge
- docker-image: executorch-ubuntu-22.04-qnn-sdk
- submodules: 'true'
- ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
- timeout: 900
- script: |
- # The generic Linux job chooses to use base env, not the one setup by the image
- CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
- conda activate "${CONDA_ENV}"
-
- DTYPE=${{ matrix.dtype }}
- BUILD_TOOL=${{ matrix.build-tool }}
- MODE=${{ matrix.mode }}
-
- PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
- PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
-
- # Setup executorch
- PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
- # Install requirements for export_llama
- PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
- # Test llama2
- PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
diff --git a/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h b/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h
index 98219d9763..f13b5962b7 100644
--- a/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h
+++ b/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h
@@ -86,7 +86,7 @@ class PyQnnOpWrapper {
break;
default:
QNN_EXECUTORCH_LOG_ERROR(
- "%s has invalid data type: %d", name.c_str(), data_type);
+ "%s has invalid data type: %d", name, data_type);
break;
}
}
diff --git a/backends/qualcomm/scripts/build.sh b/backends/qualcomm/scripts/build.sh
index aafd6252e7..be317a2d64 100755
--- a/backends/qualcomm/scripts/build.sh
+++ b/backends/qualcomm/scripts/build.sh
@@ -25,9 +25,9 @@ usage() {
[ "$1" = -h ] && usage
BUILD_X86_64="true"
-CMAKE_X86_64="build-x86"
+CMAKE_X86_64="cmake-out"
BUILD_AARCH64="true"
-CMAKE_AARCH64="build-android"
+CMAKE_AARCH64="cmake-out-android"
CLEAN="true"
BUILD_TYPE="Debug"
BUILD_JOB_NUMBER="16"