Skip to content

[ET-VK] Introduce virtual_transpose() to vTensor for no copy transposition #5353

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions backends/vulkan/runtime/api/containers/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,48 @@ void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
update_metadata();
}

/*
* Transposing the dim order is a bit unintuitive. dim0 and dim1 have swapped
* their "identities", so we need to swap the values of dim0 and dim1 wherever
* they appear in the dim order vector. Compare this to just swapping the
* elements at dim0 and dim1 in the `sizes` vectors.
*/
void transpose_dim_order_inplace(
std::vector<int64_t>& dim_order,
const int64_t dim0,
const int64_t dim1) {
for (int i = 0; i < dim_order.size(); ++i) {
if (dim_order[i] == dim0) {
dim_order[i] = dim1;
} else if (dim_order[i] == dim1) {
dim_order[i] = dim0;
}
}
}

void vTensor::virtual_transpose(const int64_t dim0, const int64_t dim1) {
std::iter_swap(sizes_.begin() + dim0, sizes_.begin() + dim1);
if (storage_type() == utils::kBuffer) {
transpose_dim_order_inplace(dim_order_, dim0, dim1);
} else {
const int dim0_whcn = sizes_.size() - 1 - dim0;
const int dim1_whcn = sizes_.size() - 1 - dim1;
// Cannot transpose batch dimension for texture storage
VK_CHECK_COND(dim0_whcn < 3 && dim1_whcn < 3);

std::iter_swap(
axis_map_.begin() + dim0_whcn, axis_map_.begin() + dim1_whcn);

if (packed_dim_whcn_idx() == dim0_whcn) {
memory_layout_ = utils::GPUMemoryLayout(dim1_whcn);
}
if (packed_dim_whcn_idx() == dim1_whcn) {
memory_layout_ = utils::GPUMemoryLayout(dim0_whcn);
}
}
update_metadata();
}

void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
sizes_ = new_sizes;
update_metadata();
Expand Down
5 changes: 5 additions & 0 deletions backends/vulkan/runtime/api/containers/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,11 @@ class vTensor final {
*/
void virtual_resize(const std::vector<int64_t>& new_sizes);

/*
* Transpose the tensor in-place by updating its metadata.
*/
void virtual_transpose(const int64_t dim0, const int64_t dim1);

/*
* Discard the underlying VkImage or VkBuffer and re-allocate based on new
* tensor sizes
Expand Down
38 changes: 38 additions & 0 deletions backends/vulkan/test/utils/test_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,44 @@ void record_reference_matmul(
mat2.strides_ubo());
}

void record_matmul_texture3d(
api::Context* context,
api::vTensor& out,
api::vTensor& mat1,
api::vTensor& mat2) {
std::string kernel_name = "matmul_naive";
kernel_name.reserve(kShaderNameReserve);
add_storage_type_suffix(kernel_name, out.storage_type());
add_dtype_suffix(kernel_name, out.dtype());

utils::uvec3 global_wg_size = out.logical_extents();

vkapi::PipelineBarrier pipeline_barrier{};
api::context()->submit_compute_job(
VK_KERNEL_FROM_STR(kernel_name),
pipeline_barrier,
global_wg_size,
{8, 8, 1},
{out.packed_dim_whcn_idx(),
mat1.packed_dim_whcn_idx(),
mat2.packed_dim_whcn_idx()},
VK_NULL_HANDLE,
0,
out.image(
pipeline_barrier,
vkapi::PipelineStage::COMPUTE,
vkapi::MemoryAccessType::WRITE),
mat1.image(pipeline_barrier, vkapi::PipelineStage::COMPUTE),
mat2.image(pipeline_barrier, vkapi::PipelineStage::COMPUTE),
out.sizes_ubo(),
out.logical_limits_ubo(),
out.axis_map_ubo(),
mat1.sizes_ubo(),
mat1.axis_map_ubo(),
mat2.sizes_ubo(),
mat2.axis_map_ubo());
}

//
// Input & Output Utilities
//
Expand Down
6 changes: 6 additions & 0 deletions backends/vulkan/test/utils/test_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,12 @@ void record_reference_matmul(
api::vTensor& mat1,
api::vTensor& mat2);

void record_matmul_texture3d(
api::Context* context,
api::vTensor& out,
api::vTensor& mat1,
api::vTensor& mat2);

//
// Input & Output Utilities
//
Expand Down
122 changes: 88 additions & 34 deletions backends/vulkan/test/vulkan_compute_api_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,48 @@ TEST_F(VulkanComputeAPITest, calculate_tensor_strides_test) {
}
}

TEST_F(VulkanComputeAPITest, virtual_transpose_test) {
std::vector<int64_t> sizes = {7, 9, 11, 13};
// (dim0, dim1), new_sizes, new_dim_order, new_axis_map, new_packed_dim_idx
std::vector<std::vector<std::vector<int64_t>>> test_cases = {
{{2, 3}, {7, 9, 13, 11}, {0, 1, 3, 2}, {1, 0, 2, 2}, {1}},
{{2, 1}, {7, 11, 9, 13}, {0, 2, 1, 3}, {0, 2, 1, 2}, {0}},
{{1, 3}, {7, 13, 11, 9}, {0, 3, 2, 1}, {2, 1, 0, 2}, {2}},
};

for (const auto& test_case : test_cases) {
const int dim0 = test_case.at(0).at(0);
const int dim1 = test_case.at(0).at(1);

const auto& expected_sizes = test_case.at(1);
const auto& expected_dim_order = test_case.at(2);
const auto& expected_axis_map = test_case.at(3);
const int expected_packed_dim = test_case.at(4).at(0);

{
vTensor a_buffer = vTensor(
context(), sizes, vkapi::kFloat, utils::kBuffer, utils::kWidthPacked);

a_buffer.virtual_transpose(dim0, dim1);
EXPECT_TRUE(a_buffer.sizes() == expected_sizes);
EXPECT_TRUE(a_buffer.dim_order() == expected_dim_order);
}

{
vTensor a_texture = vTensor(
context(),
sizes,
vkapi::kFloat,
utils::kTexture3D,
utils::kWidthPacked);
a_texture.virtual_transpose(dim0, dim1);
EXPECT_TRUE(a_texture.sizes() == expected_sizes);
EXPECT_TRUE(a_texture.axis_map() == expected_axis_map);
EXPECT_TRUE(a_texture.packed_dim_whcn_idx() == expected_packed_dim);
}
}
}

TEST_F(VulkanComputeAPITest, vec_test) {
utils::vec3 v3({1, 2, 3});
ASSERT_TRUE(v3[0] == 1);
Expand Down Expand Up @@ -637,46 +679,58 @@ TEST_F(VulkanComputeAPITest, tensor_no_copy_transpose_test) {
constexpr int N = 17;
std::vector<int64_t> mat1_sizes = {M, K};
std::vector<int64_t> mat2_sizes = {N, K};
std::vector<int64_t> mat2_t_sizes = {K, N};
std::vector<int64_t> out_sizes = {M, N};

std::vector<int64_t> transposed_dim_order = {1, 0};

vTensor mat1 = CREATE_FLOAT_BUFFER(mat1_sizes, /*allocate_memory=*/true);
vTensor mat2 = CREATE_FLOAT_BUFFER(mat2_sizes, /*allocate_memory=*/true);
vTensor out = CREATE_FLOAT_BUFFER(out_sizes, /*allocate_memory=*/true);

// Generate data
std::vector<float> mat1_data =
create_random_float_buffer(mat1.staging_buffer_numel());
std::vector<float> mat2_data =
create_random_float_buffer(mat2.staging_buffer_numel());

// Create direct view and modify sizes and strides later
vTensor mat2_t = vTensor(mat2);

std::vector<float> mat2_t_data = transpose_matrix(mat2_data, N, K);
std::vector<float> ref_out =
compute_reference_matmul(mat1_data, mat2_t_data, M, K, N);

// Fill original tensor with some data
fill_vtensor(mat1, mat1_data);
fill_vtensor(mat2, mat2_data);

record_reference_matmul(api::context(), out, mat1, mat2_t);
for (const auto storage_type : {utils::kTexture3D, utils::kBuffer}) {
vTensor mat1 = vTensor(
context(),
mat1_sizes,
vkapi::kFloat,
storage_type,
utils::kWidthPacked);
vTensor mat2 = vTensor(
context(),
mat2_sizes,
vkapi::kFloat,
storage_type,
utils::kWidthPacked);
vTensor out = vTensor(
context(), out_sizes, vkapi::kFloat, storage_type, utils::kWidthPacked);

// Generate data
std::vector<float> mat1_data =
create_random_float_buffer(mat1.staging_buffer_numel());
std::vector<float> mat2_data =
create_random_float_buffer(mat2.staging_buffer_numel());

// Create direct view and modify sizes and strides later
vTensor mat2_t = vTensor(mat2);
// Update sizes and strides of mat2_t to be that of a transposed tensor
mat2_t.virtual_transpose(0, 1);

EXPECT_TRUE(mat2_t.gpu_memory_layout() == utils::kHeightPacked);

std::vector<float> mat2_t_data = transpose_matrix(mat2_data, N, K);
std::vector<float> ref_out =
compute_reference_matmul(mat1_data, mat2_t_data, M, K, N);

// Update sizes and strides of mat2_t to be that of a transposed tensor
mat2_t.virtual_reconfigure(mat2_t_sizes, transposed_dim_order);
EXPECT_TRUE(mat2_t.gpu_memory_layout() == utils::kHeightPacked);
// Fill original tensor with some data
fill_vtensor(mat1, mat1_data);
fill_vtensor(mat2, mat2_data);

std::vector<float> data_out(out.staging_buffer_numel());
// Extract the copy tensor; should contain the data of the original tensor
extract_vtensor(out, data_out);
if (storage_type == utils::kTexture3D) {
record_matmul_texture3d(context(), out, mat1, mat2_t);
} else {
record_reference_matmul(context(), out, mat1, mat2_t);
}

EXPECT_TRUE(data_out.size() == ref_out.size());
std::vector<float> data_out(out.staging_buffer_numel());
// Extract the copy tensor; should contain the data of the original tensor
extract_vtensor(out, data_out);

for (size_t i = 0; i < data_out.size(); ++i) {
EXPECT_TRUE(check_close(data_out[i], ref_out[i]));
for (size_t i = 0; i < ref_out.size(); ++i) {
EXPECT_TRUE(check_close(data_out[i], ref_out[i]));
}
}
}

Expand Down
Loading