Skip to content

Commit 26375cc

Browse files
SS-JIAfacebook-github-bot
authored andcommitted
Introduce virtual_transpose() to vTensor for no copy transposition (#5353)
Summary: Pull Request resolved: #5353 ## Context With `axis_map` integrated into matrix multiplication, we can now test no-copy transposes for texture backed tensors. Transposing a tensor can be done without modifying the storage by swapping elements in the tensor's `axis_map`, and also updating the layout of the tensor if the packed dimension was one of the dims that were transposed. ghstack-source-id: 242802318 exported-using-ghexport Reviewed By: jorgep31415 Differential Revision: D62652009 fbshipit-source-id: ae1c81a61a92e69aaa689418d7e62a81d8e09a12
1 parent c252553 commit 26375cc

File tree

5 files changed

+179
-34
lines changed

5 files changed

+179
-34
lines changed

backends/vulkan/runtime/api/containers/Tensor.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,48 @@ void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
567567
update_metadata();
568568
}
569569

570+
/*
571+
* Transposing the dim order is a bit unintuitive. dim0 and dim1 have swapped
572+
* their "identities", so we need to swap the values of dim0 and dim1 wherever
573+
* they appear in the dim order vector. Compare this to just swapping the
574+
* elements at dim0 and dim1 in the `sizes` vectors.
575+
*/
576+
void transpose_dim_order_inplace(
577+
std::vector<int64_t>& dim_order,
578+
const int64_t dim0,
579+
const int64_t dim1) {
580+
for (int i = 0; i < dim_order.size(); ++i) {
581+
if (dim_order[i] == dim0) {
582+
dim_order[i] = dim1;
583+
} else if (dim_order[i] == dim1) {
584+
dim_order[i] = dim0;
585+
}
586+
}
587+
}
588+
589+
void vTensor::virtual_transpose(const int64_t dim0, const int64_t dim1) {
590+
std::iter_swap(sizes_.begin() + dim0, sizes_.begin() + dim1);
591+
if (storage_type() == utils::kBuffer) {
592+
transpose_dim_order_inplace(dim_order_, dim0, dim1);
593+
} else {
594+
const int dim0_whcn = sizes_.size() - 1 - dim0;
595+
const int dim1_whcn = sizes_.size() - 1 - dim1;
596+
// Cannot transpose batch dimension for texture storage
597+
VK_CHECK_COND(dim0_whcn < 3 && dim1_whcn < 3);
598+
599+
std::iter_swap(
600+
axis_map_.begin() + dim0_whcn, axis_map_.begin() + dim1_whcn);
601+
602+
if (packed_dim_whcn_idx() == dim0_whcn) {
603+
memory_layout_ = utils::GPUMemoryLayout(dim1_whcn);
604+
}
605+
if (packed_dim_whcn_idx() == dim1_whcn) {
606+
memory_layout_ = utils::GPUMemoryLayout(dim0_whcn);
607+
}
608+
}
609+
update_metadata();
610+
}
611+
570612
void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
571613
sizes_ = new_sizes;
572614
update_metadata();

backends/vulkan/runtime/api/containers/Tensor.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,11 @@ class vTensor final {
530530
*/
531531
void virtual_resize(const std::vector<int64_t>& new_sizes);
532532

533+
/*
534+
* Transpose the tensor in-place by updating its metadata.
535+
*/
536+
void virtual_transpose(const int64_t dim0, const int64_t dim1);
537+
533538
/*
534539
* Discard the underlying VkImage or VkBuffer and re-allocate based on new
535540
* tensor sizes

backends/vulkan/test/utils/test_utils.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,44 @@ void record_reference_matmul(
314314
mat2.strides_ubo());
315315
}
316316

317+
void record_matmul_texture3d(
318+
api::Context* context,
319+
api::vTensor& out,
320+
api::vTensor& mat1,
321+
api::vTensor& mat2) {
322+
std::string kernel_name = "matmul_naive";
323+
kernel_name.reserve(kShaderNameReserve);
324+
add_storage_type_suffix(kernel_name, out.storage_type());
325+
add_dtype_suffix(kernel_name, out.dtype());
326+
327+
utils::uvec3 global_wg_size = out.logical_extents();
328+
329+
vkapi::PipelineBarrier pipeline_barrier{};
330+
api::context()->submit_compute_job(
331+
VK_KERNEL_FROM_STR(kernel_name),
332+
pipeline_barrier,
333+
global_wg_size,
334+
{8, 8, 1},
335+
{out.packed_dim_whcn_idx(),
336+
mat1.packed_dim_whcn_idx(),
337+
mat2.packed_dim_whcn_idx()},
338+
VK_NULL_HANDLE,
339+
0,
340+
out.image(
341+
pipeline_barrier,
342+
vkapi::PipelineStage::COMPUTE,
343+
vkapi::MemoryAccessType::WRITE),
344+
mat1.image(pipeline_barrier, vkapi::PipelineStage::COMPUTE),
345+
mat2.image(pipeline_barrier, vkapi::PipelineStage::COMPUTE),
346+
out.sizes_ubo(),
347+
out.logical_limits_ubo(),
348+
out.axis_map_ubo(),
349+
mat1.sizes_ubo(),
350+
mat1.axis_map_ubo(),
351+
mat2.sizes_ubo(),
352+
mat2.axis_map_ubo());
353+
}
354+
317355
//
318356
// Input & Output Utilities
319357
//

backends/vulkan/test/utils/test_utils.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,12 @@ void record_reference_matmul(
121121
api::vTensor& mat1,
122122
api::vTensor& mat2);
123123

124+
void record_matmul_texture3d(
125+
api::Context* context,
126+
api::vTensor& out,
127+
api::vTensor& mat1,
128+
api::vTensor& mat2);
129+
124130
//
125131
// Input & Output Utilities
126132
//

backends/vulkan/test/vulkan_compute_api_test.cpp

Lines changed: 88 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,48 @@ TEST_F(VulkanComputeAPITest, calculate_tensor_strides_test) {
258258
}
259259
}
260260

261+
TEST_F(VulkanComputeAPITest, virtual_transpose_test) {
262+
std::vector<int64_t> sizes = {7, 9, 11, 13};
263+
// (dim0, dim1), new_sizes, new_dim_order, new_axis_map, new_packed_dim_idx
264+
std::vector<std::vector<std::vector<int64_t>>> test_cases = {
265+
{{2, 3}, {7, 9, 13, 11}, {0, 1, 3, 2}, {1, 0, 2, 2}, {1}},
266+
{{2, 1}, {7, 11, 9, 13}, {0, 2, 1, 3}, {0, 2, 1, 2}, {0}},
267+
{{1, 3}, {7, 13, 11, 9}, {0, 3, 2, 1}, {2, 1, 0, 2}, {2}},
268+
};
269+
270+
for (const auto& test_case : test_cases) {
271+
const int dim0 = test_case.at(0).at(0);
272+
const int dim1 = test_case.at(0).at(1);
273+
274+
const auto& expected_sizes = test_case.at(1);
275+
const auto& expected_dim_order = test_case.at(2);
276+
const auto& expected_axis_map = test_case.at(3);
277+
const int expected_packed_dim = test_case.at(4).at(0);
278+
279+
{
280+
vTensor a_buffer = vTensor(
281+
context(), sizes, vkapi::kFloat, utils::kBuffer, utils::kWidthPacked);
282+
283+
a_buffer.virtual_transpose(dim0, dim1);
284+
EXPECT_TRUE(a_buffer.sizes() == expected_sizes);
285+
EXPECT_TRUE(a_buffer.dim_order() == expected_dim_order);
286+
}
287+
288+
{
289+
vTensor a_texture = vTensor(
290+
context(),
291+
sizes,
292+
vkapi::kFloat,
293+
utils::kTexture3D,
294+
utils::kWidthPacked);
295+
a_texture.virtual_transpose(dim0, dim1);
296+
EXPECT_TRUE(a_texture.sizes() == expected_sizes);
297+
EXPECT_TRUE(a_texture.axis_map() == expected_axis_map);
298+
EXPECT_TRUE(a_texture.packed_dim_whcn_idx() == expected_packed_dim);
299+
}
300+
}
301+
}
302+
261303
TEST_F(VulkanComputeAPITest, vec_test) {
262304
utils::vec3 v3({1, 2, 3});
263305
ASSERT_TRUE(v3[0] == 1);
@@ -637,46 +679,58 @@ TEST_F(VulkanComputeAPITest, tensor_no_copy_transpose_test) {
637679
constexpr int N = 17;
638680
std::vector<int64_t> mat1_sizes = {M, K};
639681
std::vector<int64_t> mat2_sizes = {N, K};
640-
std::vector<int64_t> mat2_t_sizes = {K, N};
641682
std::vector<int64_t> out_sizes = {M, N};
642683

643-
std::vector<int64_t> transposed_dim_order = {1, 0};
644-
645-
vTensor mat1 = CREATE_FLOAT_BUFFER(mat1_sizes, /*allocate_memory=*/true);
646-
vTensor mat2 = CREATE_FLOAT_BUFFER(mat2_sizes, /*allocate_memory=*/true);
647-
vTensor out = CREATE_FLOAT_BUFFER(out_sizes, /*allocate_memory=*/true);
648-
649-
// Generate data
650-
std::vector<float> mat1_data =
651-
create_random_float_buffer(mat1.staging_buffer_numel());
652-
std::vector<float> mat2_data =
653-
create_random_float_buffer(mat2.staging_buffer_numel());
654-
655-
// Create direct view and modify sizes and strides later
656-
vTensor mat2_t = vTensor(mat2);
657-
658-
std::vector<float> mat2_t_data = transpose_matrix(mat2_data, N, K);
659-
std::vector<float> ref_out =
660-
compute_reference_matmul(mat1_data, mat2_t_data, M, K, N);
661-
662-
// Fill original tensor with some data
663-
fill_vtensor(mat1, mat1_data);
664-
fill_vtensor(mat2, mat2_data);
665-
666-
record_reference_matmul(api::context(), out, mat1, mat2_t);
684+
for (const auto storage_type : {utils::kTexture3D, utils::kBuffer}) {
685+
vTensor mat1 = vTensor(
686+
context(),
687+
mat1_sizes,
688+
vkapi::kFloat,
689+
storage_type,
690+
utils::kWidthPacked);
691+
vTensor mat2 = vTensor(
692+
context(),
693+
mat2_sizes,
694+
vkapi::kFloat,
695+
storage_type,
696+
utils::kWidthPacked);
697+
vTensor out = vTensor(
698+
context(), out_sizes, vkapi::kFloat, storage_type, utils::kWidthPacked);
699+
700+
// Generate data
701+
std::vector<float> mat1_data =
702+
create_random_float_buffer(mat1.staging_buffer_numel());
703+
std::vector<float> mat2_data =
704+
create_random_float_buffer(mat2.staging_buffer_numel());
705+
706+
// Create direct view and modify sizes and strides later
707+
vTensor mat2_t = vTensor(mat2);
708+
// Update sizes and strides of mat2_t to be that of a transposed tensor
709+
mat2_t.virtual_transpose(0, 1);
710+
711+
EXPECT_TRUE(mat2_t.gpu_memory_layout() == utils::kHeightPacked);
712+
713+
std::vector<float> mat2_t_data = transpose_matrix(mat2_data, N, K);
714+
std::vector<float> ref_out =
715+
compute_reference_matmul(mat1_data, mat2_t_data, M, K, N);
667716

668-
// Update sizes and strides of mat2_t to be that of a transposed tensor
669-
mat2_t.virtual_reconfigure(mat2_t_sizes, transposed_dim_order);
670-
EXPECT_TRUE(mat2_t.gpu_memory_layout() == utils::kHeightPacked);
717+
// Fill original tensor with some data
718+
fill_vtensor(mat1, mat1_data);
719+
fill_vtensor(mat2, mat2_data);
671720

672-
std::vector<float> data_out(out.staging_buffer_numel());
673-
// Extract the copy tensor; should contain the data of the original tensor
674-
extract_vtensor(out, data_out);
721+
if (storage_type == utils::kTexture3D) {
722+
record_matmul_texture3d(context(), out, mat1, mat2_t);
723+
} else {
724+
record_reference_matmul(context(), out, mat1, mat2_t);
725+
}
675726

676-
EXPECT_TRUE(data_out.size() == ref_out.size());
727+
std::vector<float> data_out(out.staging_buffer_numel());
728+
// Extract the copy tensor; should contain the data of the original tensor
729+
extract_vtensor(out, data_out);
677730

678-
for (size_t i = 0; i < data_out.size(); ++i) {
679-
EXPECT_TRUE(check_close(data_out[i], ref_out[i]));
731+
for (size_t i = 0; i < ref_out.size(); ++i) {
732+
EXPECT_TRUE(check_close(data_out[i], ref_out[i]));
733+
}
680734
}
681735
}
682736

0 commit comments

Comments
 (0)