pytorch · SS-JIA · Apr 19, 2024 · Apr 20, 2024
diff --git a/backends/vulkan/runtime/api/Tensor.cpp b/backends/vulkan/runtime/api/Tensor.cpp
@@ -139,10 +139,8 @@ vTensor::vTensor(
       // Calculate sizes and strides
       sizes_(sizes.begin(), sizes.end()),
       gpu_sizes_{calc_gpu_sizes(sizes, memory_layout_, storage_type)},
-      // Utility Uniform Buffers that can be passed to shaders as arguments
-      cpu_sizes_uniform_(),
-      gpu_sizes_uniform_(),
-      extents_uniform_(),
+      // Utility Uniform Buffer that can be passed to shaders as arguments
+      sizes_uniform_(context, api::utils::make_whcn_ivec4(sizes_)),
       // Construct Tensor storage
       storage_(
           context,
@@ -189,35 +187,6 @@ api::VulkanBuffer& vTensor::buffer(
   return storage_.buffer_;
 }
 
-const api::BufferBindInfo vTensor::cpu_sizes_ubo() {
-  if (!cpu_sizes_uniform_.buffer()) {
-    cpu_sizes_uniform_ = api::UniformParamsBuffer(
-        storage_.context_, api::utils::make_whcn_ivec4(sizes_));
-  }
-  return api::BufferBindInfo(cpu_sizes_uniform_.buffer());
-}
-
-const api::BufferBindInfo vTensor::gpu_sizes_ubo() {
-  if (!gpu_sizes_uniform_.buffer()) {
-    gpu_sizes_uniform_ = api::UniformParamsBuffer(
-        storage_.context_, api::utils::make_whcn_ivec4(gpu_sizes_));
-  }
-  return api::BufferBindInfo(gpu_sizes_uniform_.buffer());
-}
-
-const api::BufferBindInfo vTensor::extents_ubo() {
-  if (!extents_uniform_.buffer()) {
-    extents_uniform_ = api::UniformParamsBuffer(
-        storage_.context_,
-        api::utils::uvec4(
-            {storage_.extents_.data[0],
-             storage_.extents_.data[1],
-             storage_.extents_.data[2],
-             1u}));
-  }
-  return api::BufferBindInfo(extents_uniform_.buffer());
-}
-
 VmaAllocationCreateInfo vTensor::get_allocation_create_info() const {
   switch (storage_type()) {
     case api::kBuffer:
@@ -255,24 +224,7 @@ void vTensor::bind_allocation(const api::MemoryAllocation& allocation) {
 void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {
   sizes_ = new_sizes;
   gpu_sizes_ = calc_gpu_sizes(sizes_, memory_layout_, storage_type());
-  api::utils::uvec3 virtual_extents =
-      create_image_extents(gpu_sizes_, storage_type(), memory_layout_);
-
-  if (cpu_sizes_uniform_.buffer()) {
-    cpu_sizes_uniform_.update(api::utils::make_whcn_ivec4(sizes_));
-  }
-
-  if (gpu_sizes_uniform_.buffer()) {
-    gpu_sizes_uniform_.update(api::utils::make_whcn_ivec4(gpu_sizes_));
-  }
-
-  if (extents_uniform_.buffer()) {
-    extents_uniform_.update(api::utils::uvec4(
-        {virtual_extents.data[0],
-         virtual_extents.data[1],
-         virtual_extents.data[2],
-         1u}));
-  }
+  sizes_uniform_.update(api::utils::make_whcn_ivec4(sizes_));
 }
 
 void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
@@ -284,6 +236,19 @@ void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
 }
 
 void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
+  if (storage_type() != api::kBuffer) {
+    api::utils::uvec3 virtual_extents =
+        create_image_extents(gpu_sizes_, storage_type(), memory_layout_);
+
+    bool valid_resize = virtual_extents.data[0] <= extents().data[0];
+    valid_resize = valid_resize && virtual_extents.data[1] <= extents().data[1];
+    valid_resize = valid_resize && virtual_extents.data[2] <= extents().data[2];
+
+    VK_CHECK_COND(
+        valid_resize,
+        "Cannot use virtual resize if new sizes requires a larger texture.");
+  }
+
   update_size_metadata(new_sizes);
 }
 

diff --git a/backends/vulkan/runtime/api/Tensor.h b/backends/vulkan/runtime/api/Tensor.h
@@ -118,17 +118,7 @@ class vTensor final {
 
   // A Vulkan uniform buffer containing the tensor sizes in WHCN that can be
   // passed into a shader.
-  api::UniformParamsBuffer cpu_sizes_uniform_;
-
-  // A Vulkan uniform buffer containing the GPU tensor sizes in WHCN that can
-  // be passed into a shader. GPU sizes refers to the sizes of the tensor after
-  // padding has been applied to one dimension to align it to the next multiple
-  // of 4.
-  api::UniformParamsBuffer gpu_sizes_uniform_;
-
-  // A Vulkan uniform buffer containing the image extents of the underlying
-  // image texture that can be passed into a shader.
-  api::UniformParamsBuffer extents_uniform_;
+  api::UniformParamsBuffer sizes_uniform_;
 
   vTensorStorage storage_;
 
@@ -203,25 +193,12 @@ class vTensor final {
   }
 
   /*
-   * Get a uniform buffer object containing the tensor sizes to use in a compute
-   * shader. Note that the UBO will be created the first time this function is
-   * called.
-   */
-  const api::BufferBindInfo cpu_sizes_ubo();
-
-  /*
-   * Get a uniform buffer object containing the tensor GPU sizes to use in a
-   * compute shader. Note that the UBO will be created the first time this
-   * function is called.
+   * Get the binding information for the uniform buffer object containing the
+   * tensor sizes to use in a compute shader.
    */
-  const api::BufferBindInfo gpu_sizes_ubo();
-
-  /*
-   * Get a uniform buffer object containing the image extents to use in a
-   * compute shader. Note that the UBO will be created the first time this
-   * function is called.
-   */
-  const api::BufferBindInfo extents_ubo();
+  inline const api::BufferBindInfo sizes_ubo() {
+    return api::BufferBindInfo(sizes_uniform_.buffer());
+  }
 
   inline size_t numel() const {
     return api::utils::multiply_integers(sizes());

diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp b/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp
@@ -21,17 +21,17 @@ ExecuteNode::ExecuteNode(
     const api::utils::uvec3& local_workgroup_size,
     const std::vector<ArgGroup>& args,
     const api::ParamsBindList& params,
+    const api::SpecVarList& spec_vars,
     const ResizeFunction& resize_fn,
-    const std::vector<ValueRef>& resize_args,
-    const api::SpecVarList& spec_vars)
+    const std::vector<ValueRef>& resize_args)
     : shader_(shader),
       global_workgroup_size_(global_workgroup_size),
       local_workgroup_size_(local_workgroup_size),
       args_(args),
       params_(params),
+      spec_vars_(spec_vars),
       resize_fn_(resize_fn),
-      resize_args_(resize_args),
-      spec_vars_(spec_vars) {
+      resize_args_(resize_args) {
   graph.update_descriptor_counts(shader, /*execute = */ true);
 }
 

diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.h b/backends/vulkan/runtime/graph/ops/ExecuteNode.h
@@ -55,9 +55,9 @@ class ExecuteNode final {
       const api::utils::uvec3& local_workgroup_size,
       const std::vector<ArgGroup>& args,
       const api::ParamsBindList& params,
+      const api::SpecVarList& spec_vars = {},
       const ResizeFunction& resize_fn = nullptr,
-      const std::vector<ValueRef>& resize_args = {},
-      const api::SpecVarList& spec_vars = {});
+      const std::vector<ValueRef>& resize_args = {});
 
   ~ExecuteNode() = default;
 
@@ -75,9 +75,9 @@ class ExecuteNode final {
   const api::utils::uvec3 local_workgroup_size_;
   const std::vector<ArgGroup> args_;
   const api::ParamsBindList params_;
+  const api::SpecVarList spec_vars_;
   const ResizeFunction resize_fn_;
   const std::vector<ValueRef> resize_args_;
-  const api::SpecVarList spec_vars_;
 };
 
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp
@@ -31,14 +31,16 @@ PrepackNode::PrepackNode(
     const api::utils::uvec3& local_workgroup_size,
     const ValueRef tref,
     const ValueRef packed,
-    const api::ParamsBindList& params)
+    const api::ParamsBindList& params,
+    const api::SpecVarList& spec_vars)
     : shader_(shader),
       noop_shader_(get_noop_shader(graph, packed)),
       global_workgroup_size_(global_workgroup_size),
       local_workgroup_size_(local_workgroup_size),
       tref_(tref),
       packed_(packed),
-      params_(params) {
+      params_(params),
+      spec_vars_(spec_vars) {
   graph.update_descriptor_counts(shader, /*execute = */ false);
   graph.update_descriptor_counts(noop_shader_, /*execute = */ false);
 }
@@ -75,7 +77,7 @@ void PrepackNode::encode(ComputeGraph* graph) {
   {
     api::PipelineBarrier pipeline_barrier{};
     api::DescriptorSet descriptor_set =
-        context->get_descriptor_set(shader_, local_workgroup_size_);
+        context->get_descriptor_set(shader_, local_workgroup_size_, spec_vars_);
 
     uint32_t idx = 0;
     bind_tensor_to_descriptor_set(

diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.h b/backends/vulkan/runtime/graph/ops/PrepackNode.h
@@ -33,7 +33,8 @@ class PrepackNode final {
       const api::utils::uvec3& local_workgroup_size,
       const ValueRef tref,
       const ValueRef packed,
-      const api::ParamsBindList& params);
+      const api::ParamsBindList& params,
+      const api::SpecVarList& spec_vars = {});
 
   ~PrepackNode() = default;
 
@@ -47,6 +48,7 @@ class PrepackNode final {
   const ValueRef tref_;
   const ValueRef packed_;
   const api::ParamsBindList params_;
+  const api::SpecVarList spec_vars_;
 
  private:
   api::StorageBuffer create_staging_buffer(ComputeGraph* graph);

diff --git a/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl b/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl
@@ -12,9 +12,6 @@
 
 #define VEC4_T ${texel_type(DTYPE)}
 
-#define to_tensor_idx to_tensor_idx_${PACKING}
-#define to_texture_pos to_texture_pos_${PACKING}
-
 #define op(X, Y, A) ${OPERATOR}
 
 #include "broadcasting_utils.h"
@@ -27,59 +24,56 @@ layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
 layout(set = 0, binding = 2) uniform PRECISION sampler3D image_other;
 
 layout(set = 0, binding = 3) uniform PRECISION restrict OutSizes {
-  ivec4 data;
-}
-out_sizes;
+  ivec4 out_sizes;
+};
 
 layout(set = 0, binding = 4) uniform PRECISION restrict InSizes {
-  ivec4 data;
-}
-in_sizes;
+  ivec4 in_sizes;
+};
 
 layout(set = 0, binding = 5) uniform PRECISION restrict OtherSizes {
-  ivec4 data;
-}
-other_sizes;
+  ivec4 other_sizes;
+};
 
 layout(set = 0, binding = 6) uniform PRECISION restrict BroadcastParams {
-  ivec2 data;
-}
-broadcast_params;
+  ivec2 broadcast_params;
+};
 
 layout(set = 0, binding = 7) uniform PRECISION restrict Alpha {
-  float data;
-}
-alpha;
+  float alpha;
+};
 
 layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 
+layout(constant_id = 3) const int packed_dim = C_DIM;
+
 void main() {
   const ivec3 pos = ivec3(gl_GlobalInvocationID);
-  const ivec4 idx = to_tensor_idx(pos, out_sizes.data);
+  const ivec4 idx = to_tensor_idx(pos, out_sizes, packed_dim);
 
-  if (any(greaterThanEqual(idx, out_sizes.data))) {
+  if (any(greaterThanEqual(idx, out_sizes))) {
     return;
   }
 
-  ivec4 in_idx = broadcast_indices(idx, in_sizes.data);
+  ivec4 in_idx = broadcast_indices(idx, in_sizes);
   VEC4_T in_texel = VEC4_T(texelFetch(
     image_in,
-    to_texture_pos(in_idx, in_sizes.data),
+    to_texture_pos(in_idx, in_sizes, packed_dim),
     0));
 
-  ivec4 other_idx = broadcast_indices(idx, other_sizes.data);
+  ivec4 other_idx = broadcast_indices(idx, other_sizes);
   VEC4_T other_texel = VEC4_T(texelFetch(
     image_other,
-    to_texture_pos(other_idx, other_sizes.data),
+    to_texture_pos(other_idx, other_sizes, packed_dim),
     0));
 
   // Check boolean broadcast flags; we use ivec2 instead of bvec2 for alignment.
-  if (broadcast_params.data.x > 0) {
+  if (broadcast_params.x > 0) {
     in_texel = in_texel.xxxx;
   }
-  if (broadcast_params.data.y > 0) {
+  if (broadcast_params.y > 0) {
     other_texel = other_texel.xxxx;
   }
 
-  imageStore(image_out, pos, VEC4_T(op(in_texel, other_texel, alpha.data)));
+  imageStore(image_out, pos, VEC4_T(op(in_texel, other_texel, alpha)));
 }
diff --git a/backends/vulkan/runtime/graph/ops/glsl/binary_op.yaml b/backends/vulkan/runtime/graph/ops/glsl/binary_op.yaml
@@ -11,10 +11,6 @@ binary_op:
     DTYPE: float
     PACKING: C_packed
   generate_variant_forall:
-    PACKING:
-      - VALUE: C_packed
-      - VALUE: W_packed
-      - VALUE: H_packed
     DTYPE:
       - VALUE: half
       - VALUE: float