Skip to content

[ET-VK] Deprecate gpu_sizes_ubo() and toggle packing layout via specialization shader #3181

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 16 additions & 51 deletions backends/vulkan/runtime/api/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,8 @@ vTensor::vTensor(
// Calculate sizes and strides
sizes_(sizes.begin(), sizes.end()),
gpu_sizes_{calc_gpu_sizes(sizes, memory_layout_, storage_type)},
// Utility Uniform Buffers that can be passed to shaders as arguments
cpu_sizes_uniform_(),
gpu_sizes_uniform_(),
extents_uniform_(),
// Utility Uniform Buffer that can be passed to shaders as arguments
sizes_uniform_(context, api::utils::make_whcn_ivec4(sizes_)),
// Construct Tensor storage
storage_(
context,
Expand Down Expand Up @@ -189,35 +187,6 @@ api::VulkanBuffer& vTensor::buffer(
return storage_.buffer_;
}

const api::BufferBindInfo vTensor::cpu_sizes_ubo() {
if (!cpu_sizes_uniform_.buffer()) {
cpu_sizes_uniform_ = api::UniformParamsBuffer(
storage_.context_, api::utils::make_whcn_ivec4(sizes_));
}
return api::BufferBindInfo(cpu_sizes_uniform_.buffer());
}

const api::BufferBindInfo vTensor::gpu_sizes_ubo() {
if (!gpu_sizes_uniform_.buffer()) {
gpu_sizes_uniform_ = api::UniformParamsBuffer(
storage_.context_, api::utils::make_whcn_ivec4(gpu_sizes_));
}
return api::BufferBindInfo(gpu_sizes_uniform_.buffer());
}

const api::BufferBindInfo vTensor::extents_ubo() {
if (!extents_uniform_.buffer()) {
extents_uniform_ = api::UniformParamsBuffer(
storage_.context_,
api::utils::uvec4(
{storage_.extents_.data[0],
storage_.extents_.data[1],
storage_.extents_.data[2],
1u}));
}
return api::BufferBindInfo(extents_uniform_.buffer());
}

VmaAllocationCreateInfo vTensor::get_allocation_create_info() const {
switch (storage_type()) {
case api::kBuffer:
Expand Down Expand Up @@ -255,24 +224,7 @@ void vTensor::bind_allocation(const api::MemoryAllocation& allocation) {
void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {
sizes_ = new_sizes;
gpu_sizes_ = calc_gpu_sizes(sizes_, memory_layout_, storage_type());
api::utils::uvec3 virtual_extents =
create_image_extents(gpu_sizes_, storage_type(), memory_layout_);

if (cpu_sizes_uniform_.buffer()) {
cpu_sizes_uniform_.update(api::utils::make_whcn_ivec4(sizes_));
}

if (gpu_sizes_uniform_.buffer()) {
gpu_sizes_uniform_.update(api::utils::make_whcn_ivec4(gpu_sizes_));
}

if (extents_uniform_.buffer()) {
extents_uniform_.update(api::utils::uvec4(
{virtual_extents.data[0],
virtual_extents.data[1],
virtual_extents.data[2],
1u}));
}
sizes_uniform_.update(api::utils::make_whcn_ivec4(sizes_));
}

void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
Expand All @@ -284,6 +236,19 @@ void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
}

void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
if (storage_type() != api::kBuffer) {
api::utils::uvec3 virtual_extents =
create_image_extents(gpu_sizes_, storage_type(), memory_layout_);

bool valid_resize = virtual_extents.data[0] <= extents().data[0];
valid_resize = valid_resize && virtual_extents.data[1] <= extents().data[1];
valid_resize = valid_resize && virtual_extents.data[2] <= extents().data[2];

VK_CHECK_COND(
valid_resize,
"Cannot use virtual resize if new sizes requires a larger texture.");
}

update_size_metadata(new_sizes);
}

Expand Down
35 changes: 6 additions & 29 deletions backends/vulkan/runtime/api/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,17 +118,7 @@ class vTensor final {

// A Vulkan uniform buffer containing the tensor sizes in WHCN that can be
// passed into a shader.
api::UniformParamsBuffer cpu_sizes_uniform_;

// A Vulkan uniform buffer containing the GPU tensor sizes in WHCN that can
// be passed into a shader. GPU sizes refers to the sizes of the tensor after
// padding has been applied to one dimension to align it to the next multiple
// of 4.
api::UniformParamsBuffer gpu_sizes_uniform_;

// A Vulkan uniform buffer containing the image extents of the underlying
// image texture that can be passed into a shader.
api::UniformParamsBuffer extents_uniform_;
api::UniformParamsBuffer sizes_uniform_;

vTensorStorage storage_;

Expand Down Expand Up @@ -203,25 +193,12 @@ class vTensor final {
}

/*
* Get a uniform buffer object containing the tensor sizes to use in a compute
* shader. Note that the UBO will be created the first time this function is
* called.
*/
const api::BufferBindInfo cpu_sizes_ubo();

/*
* Get a uniform buffer object containing the tensor GPU sizes to use in a
* compute shader. Note that the UBO will be created the first time this
* function is called.
* Get the binding information for the uniform buffer object containing the
* tensor sizes to use in a compute shader.
*/
const api::BufferBindInfo gpu_sizes_ubo();

/*
* Get a uniform buffer object containing the image extents to use in a
* compute shader. Note that the UBO will be created the first time this
* function is called.
*/
const api::BufferBindInfo extents_ubo();
inline const api::BufferBindInfo sizes_ubo() {
return api::BufferBindInfo(sizes_uniform_.buffer());
}

inline size_t numel() const {
return api::utils::multiply_integers(sizes());
Expand Down
8 changes: 4 additions & 4 deletions backends/vulkan/runtime/graph/ops/ExecuteNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,17 @@ ExecuteNode::ExecuteNode(
const api::utils::uvec3& local_workgroup_size,
const std::vector<ArgGroup>& args,
const api::ParamsBindList& params,
const api::SpecVarList& spec_vars,
const ResizeFunction& resize_fn,
const std::vector<ValueRef>& resize_args,
const api::SpecVarList& spec_vars)
const std::vector<ValueRef>& resize_args)
: shader_(shader),
global_workgroup_size_(global_workgroup_size),
local_workgroup_size_(local_workgroup_size),
args_(args),
params_(params),
spec_vars_(spec_vars),
resize_fn_(resize_fn),
resize_args_(resize_args),
spec_vars_(spec_vars) {
resize_args_(resize_args) {
graph.update_descriptor_counts(shader, /*execute = */ true);
}

Expand Down
6 changes: 3 additions & 3 deletions backends/vulkan/runtime/graph/ops/ExecuteNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ class ExecuteNode final {
const api::utils::uvec3& local_workgroup_size,
const std::vector<ArgGroup>& args,
const api::ParamsBindList& params,
const api::SpecVarList& spec_vars = {},
const ResizeFunction& resize_fn = nullptr,
const std::vector<ValueRef>& resize_args = {},
const api::SpecVarList& spec_vars = {});
const std::vector<ValueRef>& resize_args = {});

~ExecuteNode() = default;

Expand All @@ -75,9 +75,9 @@ class ExecuteNode final {
const api::utils::uvec3 local_workgroup_size_;
const std::vector<ArgGroup> args_;
const api::ParamsBindList params_;
const api::SpecVarList spec_vars_;
const ResizeFunction resize_fn_;
const std::vector<ValueRef> resize_args_;
const api::SpecVarList spec_vars_;
};

} // namespace vkcompute
8 changes: 5 additions & 3 deletions backends/vulkan/runtime/graph/ops/PrepackNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,16 @@ PrepackNode::PrepackNode(
const api::utils::uvec3& local_workgroup_size,
const ValueRef tref,
const ValueRef packed,
const api::ParamsBindList& params)
const api::ParamsBindList& params,
const api::SpecVarList& spec_vars)
: shader_(shader),
noop_shader_(get_noop_shader(graph, packed)),
global_workgroup_size_(global_workgroup_size),
local_workgroup_size_(local_workgroup_size),
tref_(tref),
packed_(packed),
params_(params) {
params_(params),
spec_vars_(spec_vars) {
graph.update_descriptor_counts(shader, /*execute = */ false);
graph.update_descriptor_counts(noop_shader_, /*execute = */ false);
}
Expand Down Expand Up @@ -75,7 +77,7 @@ void PrepackNode::encode(ComputeGraph* graph) {
{
api::PipelineBarrier pipeline_barrier{};
api::DescriptorSet descriptor_set =
context->get_descriptor_set(shader_, local_workgroup_size_);
context->get_descriptor_set(shader_, local_workgroup_size_, spec_vars_);

uint32_t idx = 0;
bind_tensor_to_descriptor_set(
Expand Down
4 changes: 3 additions & 1 deletion backends/vulkan/runtime/graph/ops/PrepackNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ class PrepackNode final {
const api::utils::uvec3& local_workgroup_size,
const ValueRef tref,
const ValueRef packed,
const api::ParamsBindList& params);
const api::ParamsBindList& params,
const api::SpecVarList& spec_vars = {});

~PrepackNode() = default;

Expand All @@ -47,6 +48,7 @@ class PrepackNode final {
const ValueRef tref_;
const ValueRef packed_;
const api::ParamsBindList params_;
const api::SpecVarList spec_vars_;

private:
api::StorageBuffer create_staging_buffer(ComputeGraph* graph);
Expand Down
48 changes: 21 additions & 27 deletions backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@

#define VEC4_T ${texel_type(DTYPE)}

#define to_tensor_idx to_tensor_idx_${PACKING}
#define to_texture_pos to_texture_pos_${PACKING}

#define op(X, Y, A) ${OPERATOR}

#include "broadcasting_utils.h"
Expand All @@ -27,59 +24,56 @@ layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
layout(set = 0, binding = 2) uniform PRECISION sampler3D image_other;

layout(set = 0, binding = 3) uniform PRECISION restrict OutSizes {
ivec4 data;
}
out_sizes;
ivec4 out_sizes;
};

layout(set = 0, binding = 4) uniform PRECISION restrict InSizes {
ivec4 data;
}
in_sizes;
ivec4 in_sizes;
};

layout(set = 0, binding = 5) uniform PRECISION restrict OtherSizes {
ivec4 data;
}
other_sizes;
ivec4 other_sizes;
};

layout(set = 0, binding = 6) uniform PRECISION restrict BroadcastParams {
ivec2 data;
}
broadcast_params;
ivec2 broadcast_params;
};

layout(set = 0, binding = 7) uniform PRECISION restrict Alpha {
float data;
}
alpha;
float alpha;
};

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

layout(constant_id = 3) const int packed_dim = C_DIM;

void main() {
const ivec3 pos = ivec3(gl_GlobalInvocationID);
const ivec4 idx = to_tensor_idx(pos, out_sizes.data);
const ivec4 idx = to_tensor_idx(pos, out_sizes, packed_dim);

if (any(greaterThanEqual(idx, out_sizes.data))) {
if (any(greaterThanEqual(idx, out_sizes))) {
return;
}

ivec4 in_idx = broadcast_indices(idx, in_sizes.data);
ivec4 in_idx = broadcast_indices(idx, in_sizes);
VEC4_T in_texel = VEC4_T(texelFetch(
image_in,
to_texture_pos(in_idx, in_sizes.data),
to_texture_pos(in_idx, in_sizes, packed_dim),
0));

ivec4 other_idx = broadcast_indices(idx, other_sizes.data);
ivec4 other_idx = broadcast_indices(idx, other_sizes);
VEC4_T other_texel = VEC4_T(texelFetch(
image_other,
to_texture_pos(other_idx, other_sizes.data),
to_texture_pos(other_idx, other_sizes, packed_dim),
0));

// Check boolean broadcast flags; we use ivec2 instead of bvec2 for alignment.
if (broadcast_params.data.x > 0) {
if (broadcast_params.x > 0) {
in_texel = in_texel.xxxx;
}
if (broadcast_params.data.y > 0) {
if (broadcast_params.y > 0) {
other_texel = other_texel.xxxx;
}

imageStore(image_out, pos, VEC4_T(op(in_texel, other_texel, alpha.data)));
imageStore(image_out, pos, VEC4_T(op(in_texel, other_texel, alpha)));
}
4 changes: 0 additions & 4 deletions backends/vulkan/runtime/graph/ops/glsl/binary_op.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@ binary_op:
DTYPE: float
PACKING: C_packed
generate_variant_forall:
PACKING:
- VALUE: C_packed
- VALUE: W_packed
- VALUE: H_packed
DTYPE:
- VALUE: half
- VALUE: float
Expand Down
Loading