Skip to content

Commit 0ece07d

Browse files
authored
[ET-VK][ez] Test command buffer re-encoding on resize (#10999)
## Context Add a test where `encode_execute()` is called again after resizing model inputs and propagating the new sizes. Currently, dynamic shapes are handled by simply updating the tensor metadata when sizes are updated. Compute shaders will perform the same computations with the updated tensor sizes/strides information. However, for some operators, different input sizes require different compute shaders in order to achieve maximum performance. One example of this is for matrix multiplication, where matrix-matrix multiplication typically uses a different algorithm than vector-matrix (or matrix-vector) multiplication. Therefore, for some models, it would be best to trigger a re-encoding of the command buffer upon input resize, so that different compute shaders can be selected based on the current input sizes. The actual changes for enabling shader re-selection will be introduced in the next diff. This diff simply checks that command buffer re-encoding "works as advertised". ## Changes This diff simply adds a test in `vulkan_compute_api_test` to test whether the ComputeGraph API can handle the `encode_execute` function being called multiple times. Differential Revision: [D75013781](https://our.internmc.facebook.com/intern/diff/D75013781/)
1 parent 0c8cfa9 commit 0ece07d

File tree

4 files changed

+150
-42
lines changed

4 files changed

+150
-42
lines changed

backends/vulkan/runtime/graph/ComputeGraph.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,11 @@ void ComputeGraph::prepare() {
644644
if (config_.enable_querypool) {
645645
context_->initialize_querypool();
646646
}
647+
648+
for (SharedObject& shared_object : shared_objects_) {
649+
shared_object.allocate(this);
650+
shared_object.bind_users(this);
651+
}
647652
}
648653

649654
void ComputeGraph::encode_prepack() {
@@ -668,11 +673,6 @@ void ComputeGraph::encode_execute() {
668673

669674
context_->cmd_reset_querypool();
670675

671-
for (SharedObject& shared_object : shared_objects_) {
672-
shared_object.allocate(this);
673-
shared_object.bind_users(this);
674-
}
675-
676676
for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
677677
node->encode(this);
678678
}

backends/vulkan/test/utils/test_utils.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,59 @@ void execute_graph_and_check_output(
537537
}
538538
}
539539

540+
vkcompute::ComputeGraph build_mm_graph(
541+
int B,
542+
int M,
543+
int K,
544+
int N,
545+
vkcompute::vkapi::ScalarType dtype,
546+
vkcompute::utils::StorageType in_out_stype,
547+
vkcompute::utils::GPUMemoryLayout memory_layout,
548+
const bool prepack_mat2,
549+
const float mat2_val) {
550+
using namespace vkcompute;
551+
GraphConfig config;
552+
ComputeGraph graph(config);
553+
554+
std::vector<int64_t> mat1_size = {M, K};
555+
std::vector<int64_t> mat2_size = {K, N};
556+
std::vector<int64_t> out_size = {M, N};
557+
if (B > 1) {
558+
mat1_size.resize(3);
559+
mat1_size = {B, M, K};
560+
mat2_size.resize(3);
561+
mat2_size = {B, K, N};
562+
out_size.resize(3);
563+
out_size = {B, M, N};
564+
}
565+
566+
IOValueRef mat1 =
567+
graph.add_input_tensor(mat1_size, dtype, in_out_stype, memory_layout);
568+
IOValueRef mat2{};
569+
570+
CREATE_RAND_WEIGHT_TENSOR(mat2_w, mat2_size, dtype);
571+
if (mat2_val != 0.0f) {
572+
std::fill(data_mat2_w.begin(), data_mat2_w.end(), mat2_val);
573+
}
574+
575+
if (prepack_mat2) {
576+
mat2.value = mat2_w;
577+
} else {
578+
mat2.value =
579+
graph.add_tensor(mat2_size, dtype, in_out_stype, memory_layout);
580+
mat2.staging = graph.set_input_tensor(mat2.value);
581+
}
582+
583+
IOValueRef out;
584+
out.value = graph.add_tensor(out_size, dtype, in_out_stype, memory_layout);
585+
586+
VK_GET_OP_FN("aten.mm.default")(graph, {mat1.value, mat2.value, out.value});
587+
588+
out.staging = graph.set_output_tensor(out.value);
589+
590+
return graph;
591+
}
592+
540593
bool check_close(float a, float b, float atol, float rtol) {
541594
float max = std::max(std::abs(a), std::abs(b));
542595
float diff = std::abs(a - b);

backends/vulkan/test/utils/test_utils.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
#pragma once
1010

11+
#include <random>
12+
1113
#include <gtest/gtest.h>
1214

1315
#include <executorch/backends/vulkan/runtime/api/api.h>
@@ -16,6 +18,8 @@
1618
#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
1719
#include <executorch/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h>
1820

21+
#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
22+
1923
#define CREATE_FLOAT_TEXTURE(sizes, allocate_memory) \
2024
vkcompute::api::vTensor( \
2125
vkcompute::api::context(), \
@@ -135,6 +139,22 @@ void record_matmul_texture3d(
135139
// Input & Output Utilities
136140
//
137141

142+
inline std::vector<float> create_random_float_vector(
143+
const size_t numel,
144+
const float min = 0.0f,
145+
const float max = 1.0f) {
146+
std::vector<float> result(numel);
147+
std::random_device rd;
148+
std::mt19937 gen(rd());
149+
std::uniform_real_distribution<float> dis(min, max);
150+
151+
for (size_t i = 0; i < numel; ++i) {
152+
result[i] = dis(gen);
153+
}
154+
155+
return result;
156+
}
157+
138158
inline void fill_staging(
139159
vkcompute::api::StagingBuffer& staging,
140160
float val,
@@ -232,6 +252,22 @@ void execute_graph_and_check_output(
232252
std::vector<float> input_vals,
233253
std::vector<float> expected_outputs);
234254

255+
#define CREATE_RAND_WEIGHT_TENSOR(name, sizes, dtype) \
256+
std::vector<float> data_##name = \
257+
create_random_float_buffer(utils::multiply_integers(sizes)); \
258+
ValueRef name = graph.add_tensorref(sizes, dtype, data_##name.data());
259+
260+
vkcompute::ComputeGraph build_mm_graph(
261+
int B,
262+
int M,
263+
int K,
264+
int N,
265+
vkcompute::vkapi::ScalarType dtype,
266+
vkcompute::utils::StorageType in_out_stype,
267+
vkcompute::utils::GPUMemoryLayout memory_layout,
268+
const bool prepack_mat2 = false,
269+
const float mat2_val = 0.0f);
270+
235271
//
236272
// Debugging Utilities
237273
//

backends/vulkan/test/vulkan_compute_api_test.cpp

Lines changed: 56 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2753,43 +2753,8 @@ void test_mm(
27532753
utils::StorageType storage_type,
27542754
utils::GPUMemoryLayout memory_layout,
27552755
bool prepack = true) {
2756-
GraphConfig config;
2757-
config.set_storage_type_override(storage_type);
2758-
ComputeGraph graph(config);
2759-
2760-
std::vector<int64_t> mat1_size = {M, K};
2761-
std::vector<int64_t> mat2_size = {K, N};
2762-
std::vector<int64_t> out_size = {M, N};
2763-
if (B > 1) {
2764-
mat1_size.resize(3);
2765-
mat1_size = {B, M, K};
2766-
mat2_size.resize(3);
2767-
mat2_size = {B, K, N};
2768-
out_size.resize(3);
2769-
out_size = {B, M, N};
2770-
}
2771-
2772-
IOValueRef mat2{};
2773-
2774-
CREATE_WEIGHT_TENSOR(mat2_w, mat2_size, dtype, 2.0f);
2775-
2776-
// Build graph
2777-
2778-
IOValueRef mat1 = graph.add_input_tensor(mat1_size, dtype, memory_layout);
2779-
2780-
if (prepack) {
2781-
mat2.value = mat2_w;
2782-
} else {
2783-
mat2.value = graph.add_tensor(mat2_size, dtype, memory_layout);
2784-
mat2.staging = graph.set_input_tensor(mat2.value);
2785-
}
2786-
2787-
IOValueRef out;
2788-
out.value = graph.add_tensor(out_size, dtype, memory_layout);
2789-
2790-
VK_GET_OP_FN("aten.mm.default")(graph, {mat1.value, mat2.value, out.value});
2791-
2792-
out.staging = graph.set_output_tensor(out.value);
2756+
ComputeGraph graph = build_mm_graph(
2757+
B, M, K, N, dtype, storage_type, memory_layout, prepack, 2.0f);
27932758

27942759
graph.prepare();
27952760
graph.encode_prepack();
@@ -2855,6 +2820,60 @@ TEST(VulkanComputeGraphOpsTest, mm_smoke_test) {
28552820
#undef RUN_TESTS
28562821
}
28572822

2823+
void test_mm_with_resize_reencode(
2824+
int B,
2825+
int M,
2826+
int K,
2827+
int N,
2828+
vkapi::ScalarType dtype,
2829+
utils::StorageType storage_type,
2830+
utils::GPUMemoryLayout memory_layout) {
2831+
ASSERT_TRUE(M > 1);
2832+
2833+
ComputeGraph graph = build_mm_graph(
2834+
B, M, K, N, dtype, storage_type, memory_layout, false, 2.0f);
2835+
2836+
graph.prepare();
2837+
graph.encode_prepack();
2838+
graph.prepack();
2839+
graph.encode_execute();
2840+
2841+
for (int i = 1; i < 4; i++) {
2842+
float val_mat1 = i;
2843+
float val_mat2 = i + 1;
2844+
float val_out = K * (val_mat1 * val_mat2);
2845+
execute_graph_and_check_output(graph, {val_mat1, val_mat2}, {val_out});
2846+
}
2847+
2848+
// Switch to GEMV mode
2849+
int new_K = K / 2;
2850+
std::vector<int64_t> new_mat1_size = {1, new_K};
2851+
std::vector<int64_t> new_mat2_size = {new_K, N};
2852+
graph.resize_input(0, new_mat1_size);
2853+
graph.resize_input(1, new_mat2_size);
2854+
graph.propagate_resize();
2855+
2856+
graph.encode_execute();
2857+
2858+
for (int i = 1; i < 4; i++) {
2859+
float val_mat1 = i;
2860+
float val_mat2 = i + 1;
2861+
float val_out = new_K * (val_mat1 * val_mat2);
2862+
execute_graph_and_check_output(graph, {val_mat1, val_mat2}, {val_out});
2863+
}
2864+
}
2865+
2866+
TEST(VulkanComputeGraphOpsTest, test_graph_resize_reencode) {
2867+
test_mm_with_resize_reencode(
2868+
/*B = */ 1,
2869+
/*M = */ 31,
2870+
/*K = */ 127,
2871+
/*N = */ 23,
2872+
vkapi::kFloat,
2873+
utils::kTexture3D,
2874+
utils::kWidthPacked);
2875+
}
2876+
28582877
void test_max_pool2d(
28592878
const std::vector<int64_t>& in_size,
28602879
const int64_t base_val,

0 commit comments

Comments
 (0)