Skip to content

Commit

Permalink
Introduce virtual_transpose() to vTensor for no copy transposition (
Browse files Browse the repository at this point in the history
#5353)

Summary:
Pull Request resolved: #5353

## Context

With `axis_map` integrated into matrix multiplication, we can now test no-copy transposes for texture backed tensors.

Transposing a tensor can be done without modifying the storage by swapping elements in the tensor's `axis_map`, and also updating the layout of the tensor if the packed dimension was one of the dims that were transposed.
ghstack-source-id: 242802318
exported-using-ghexport

Reviewed By: jorgep31415

Differential Revision: D62652009

fbshipit-source-id: ae1c81a61a92e69aaa689418d7e62a81d8e09a12
  • Loading branch information
SS-JIA authored and facebook-github-bot committed Sep 16, 2024
1 parent c252553 commit 26375cc
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 34 deletions.
42 changes: 42 additions & 0 deletions backends/vulkan/runtime/api/containers/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,48 @@ void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
update_metadata();
}

/*
* Transposing the dim order is a bit unintuitive. dim0 and dim1 have swapped
* their "identities", so we need to swap the values of dim0 and dim1 wherever
* they appear in the dim order vector. Compare this to just swapping the
* elements at dim0 and dim1 in the `sizes` vectors.
*/
void transpose_dim_order_inplace(
std::vector<int64_t>& dim_order,
const int64_t dim0,
const int64_t dim1) {
for (int i = 0; i < dim_order.size(); ++i) {
if (dim_order[i] == dim0) {
dim_order[i] = dim1;
} else if (dim_order[i] == dim1) {
dim_order[i] = dim0;
}
}
}

void vTensor::virtual_transpose(const int64_t dim0, const int64_t dim1) {
std::iter_swap(sizes_.begin() + dim0, sizes_.begin() + dim1);
if (storage_type() == utils::kBuffer) {
transpose_dim_order_inplace(dim_order_, dim0, dim1);
} else {
const int dim0_whcn = sizes_.size() - 1 - dim0;
const int dim1_whcn = sizes_.size() - 1 - dim1;
// Cannot transpose batch dimension for texture storage
VK_CHECK_COND(dim0_whcn < 3 && dim1_whcn < 3);

std::iter_swap(
axis_map_.begin() + dim0_whcn, axis_map_.begin() + dim1_whcn);

if (packed_dim_whcn_idx() == dim0_whcn) {
memory_layout_ = utils::GPUMemoryLayout(dim1_whcn);
}
if (packed_dim_whcn_idx() == dim1_whcn) {
memory_layout_ = utils::GPUMemoryLayout(dim0_whcn);
}
}
update_metadata();
}

void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
sizes_ = new_sizes;
update_metadata();
Expand Down
5 changes: 5 additions & 0 deletions backends/vulkan/runtime/api/containers/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,11 @@ class vTensor final {
*/
void virtual_resize(const std::vector<int64_t>& new_sizes);

/*
* Transpose the tensor in-place by updating its metadata.
*/
void virtual_transpose(const int64_t dim0, const int64_t dim1);

/*
* Discard the underlying VkImage or VkBuffer and re-allocate based on new
* tensor sizes
Expand Down
38 changes: 38 additions & 0 deletions backends/vulkan/test/utils/test_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,44 @@ void record_reference_matmul(
mat2.strides_ubo());
}

void record_matmul_texture3d(
api::Context* context,
api::vTensor& out,
api::vTensor& mat1,
api::vTensor& mat2) {
std::string kernel_name = "matmul_naive";
kernel_name.reserve(kShaderNameReserve);
add_storage_type_suffix(kernel_name, out.storage_type());
add_dtype_suffix(kernel_name, out.dtype());

utils::uvec3 global_wg_size = out.logical_extents();

vkapi::PipelineBarrier pipeline_barrier{};
api::context()->submit_compute_job(
VK_KERNEL_FROM_STR(kernel_name),
pipeline_barrier,
global_wg_size,
{8, 8, 1},
{out.packed_dim_whcn_idx(),
mat1.packed_dim_whcn_idx(),
mat2.packed_dim_whcn_idx()},
VK_NULL_HANDLE,
0,
out.image(
pipeline_barrier,
vkapi::PipelineStage::COMPUTE,
vkapi::MemoryAccessType::WRITE),
mat1.image(pipeline_barrier, vkapi::PipelineStage::COMPUTE),
mat2.image(pipeline_barrier, vkapi::PipelineStage::COMPUTE),
out.sizes_ubo(),
out.logical_limits_ubo(),
out.axis_map_ubo(),
mat1.sizes_ubo(),
mat1.axis_map_ubo(),
mat2.sizes_ubo(),
mat2.axis_map_ubo());
}

//
// Input & Output Utilities
//
Expand Down
6 changes: 6 additions & 0 deletions backends/vulkan/test/utils/test_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,12 @@ void record_reference_matmul(
api::vTensor& mat1,
api::vTensor& mat2);

void record_matmul_texture3d(
api::Context* context,
api::vTensor& out,
api::vTensor& mat1,
api::vTensor& mat2);

//
// Input & Output Utilities
//
Expand Down
122 changes: 88 additions & 34 deletions backends/vulkan/test/vulkan_compute_api_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,48 @@ TEST_F(VulkanComputeAPITest, calculate_tensor_strides_test) {
}
}

TEST_F(VulkanComputeAPITest, virtual_transpose_test) {
std::vector<int64_t> sizes = {7, 9, 11, 13};
// (dim0, dim1), new_sizes, new_dim_order, new_axis_map, new_packed_dim_idx
std::vector<std::vector<std::vector<int64_t>>> test_cases = {
{{2, 3}, {7, 9, 13, 11}, {0, 1, 3, 2}, {1, 0, 2, 2}, {1}},
{{2, 1}, {7, 11, 9, 13}, {0, 2, 1, 3}, {0, 2, 1, 2}, {0}},
{{1, 3}, {7, 13, 11, 9}, {0, 3, 2, 1}, {2, 1, 0, 2}, {2}},
};

for (const auto& test_case : test_cases) {
const int dim0 = test_case.at(0).at(0);
const int dim1 = test_case.at(0).at(1);

const auto& expected_sizes = test_case.at(1);
const auto& expected_dim_order = test_case.at(2);
const auto& expected_axis_map = test_case.at(3);
const int expected_packed_dim = test_case.at(4).at(0);

{
vTensor a_buffer = vTensor(
context(), sizes, vkapi::kFloat, utils::kBuffer, utils::kWidthPacked);

a_buffer.virtual_transpose(dim0, dim1);
EXPECT_TRUE(a_buffer.sizes() == expected_sizes);
EXPECT_TRUE(a_buffer.dim_order() == expected_dim_order);
}

{
vTensor a_texture = vTensor(
context(),
sizes,
vkapi::kFloat,
utils::kTexture3D,
utils::kWidthPacked);
a_texture.virtual_transpose(dim0, dim1);
EXPECT_TRUE(a_texture.sizes() == expected_sizes);
EXPECT_TRUE(a_texture.axis_map() == expected_axis_map);
EXPECT_TRUE(a_texture.packed_dim_whcn_idx() == expected_packed_dim);
}
}
}

TEST_F(VulkanComputeAPITest, vec_test) {
utils::vec3 v3({1, 2, 3});
ASSERT_TRUE(v3[0] == 1);
Expand Down Expand Up @@ -637,46 +679,58 @@ TEST_F(VulkanComputeAPITest, tensor_no_copy_transpose_test) {
constexpr int N = 17;
std::vector<int64_t> mat1_sizes = {M, K};
std::vector<int64_t> mat2_sizes = {N, K};
std::vector<int64_t> mat2_t_sizes = {K, N};
std::vector<int64_t> out_sizes = {M, N};

std::vector<int64_t> transposed_dim_order = {1, 0};

vTensor mat1 = CREATE_FLOAT_BUFFER(mat1_sizes, /*allocate_memory=*/true);
vTensor mat2 = CREATE_FLOAT_BUFFER(mat2_sizes, /*allocate_memory=*/true);
vTensor out = CREATE_FLOAT_BUFFER(out_sizes, /*allocate_memory=*/true);

// Generate data
std::vector<float> mat1_data =
create_random_float_buffer(mat1.staging_buffer_numel());
std::vector<float> mat2_data =
create_random_float_buffer(mat2.staging_buffer_numel());

// Create direct view and modify sizes and strides later
vTensor mat2_t = vTensor(mat2);

std::vector<float> mat2_t_data = transpose_matrix(mat2_data, N, K);
std::vector<float> ref_out =
compute_reference_matmul(mat1_data, mat2_t_data, M, K, N);

// Fill original tensor with some data
fill_vtensor(mat1, mat1_data);
fill_vtensor(mat2, mat2_data);

record_reference_matmul(api::context(), out, mat1, mat2_t);
for (const auto storage_type : {utils::kTexture3D, utils::kBuffer}) {
vTensor mat1 = vTensor(
context(),
mat1_sizes,
vkapi::kFloat,
storage_type,
utils::kWidthPacked);
vTensor mat2 = vTensor(
context(),
mat2_sizes,
vkapi::kFloat,
storage_type,
utils::kWidthPacked);
vTensor out = vTensor(
context(), out_sizes, vkapi::kFloat, storage_type, utils::kWidthPacked);

// Generate data
std::vector<float> mat1_data =
create_random_float_buffer(mat1.staging_buffer_numel());
std::vector<float> mat2_data =
create_random_float_buffer(mat2.staging_buffer_numel());

// Create direct view and modify sizes and strides later
vTensor mat2_t = vTensor(mat2);
// Update sizes and strides of mat2_t to be that of a transposed tensor
mat2_t.virtual_transpose(0, 1);

EXPECT_TRUE(mat2_t.gpu_memory_layout() == utils::kHeightPacked);

std::vector<float> mat2_t_data = transpose_matrix(mat2_data, N, K);
std::vector<float> ref_out =
compute_reference_matmul(mat1_data, mat2_t_data, M, K, N);

// Update sizes and strides of mat2_t to be that of a transposed tensor
mat2_t.virtual_reconfigure(mat2_t_sizes, transposed_dim_order);
EXPECT_TRUE(mat2_t.gpu_memory_layout() == utils::kHeightPacked);
// Fill original tensor with some data
fill_vtensor(mat1, mat1_data);
fill_vtensor(mat2, mat2_data);

std::vector<float> data_out(out.staging_buffer_numel());
// Extract the copy tensor; should contain the data of the original tensor
extract_vtensor(out, data_out);
if (storage_type == utils::kTexture3D) {
record_matmul_texture3d(context(), out, mat1, mat2_t);
} else {
record_reference_matmul(context(), out, mat1, mat2_t);
}

EXPECT_TRUE(data_out.size() == ref_out.size());
std::vector<float> data_out(out.staging_buffer_numel());
// Extract the copy tensor; should contain the data of the original tensor
extract_vtensor(out, data_out);

for (size_t i = 0; i < data_out.size(); ++i) {
EXPECT_TRUE(check_close(data_out[i], ref_out[i]));
for (size_t i = 0; i < ref_out.size(); ++i) {
EXPECT_TRUE(check_close(data_out[i], ref_out[i]));
}
}
}

Expand Down

0 comments on commit 26375cc

Please sign in to comment.