From ce74024985d07ab85351d9ade8c6962bfc056b07 Mon Sep 17 00:00:00 2001 From: Nathanael See Date: Tue, 24 Sep 2024 12:25:48 -0700 Subject: [PATCH] update copy_channel_offset to axis mapping (#5587) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/5587 Updating copy_channel_offset to use axis mapping of in/out tensors. This shader/op implementation still assumes that the input and output tensors are both channel-packed. Updating it for arbitrary packing is more complicated. Reviewed By: derekxu, jorgep31415 Differential Revision: D63284763 fbshipit-source-id: 87b986ccad51ff7ab5980d9324ce417840c8c8bb --- .../graph/ops/glsl/copy_channel_offset.glsl | 43 +++++++++---------- .../vulkan/runtime/graph/ops/impl/Copy.cpp | 31 ++++++------- 2 files changed, 34 insertions(+), 40 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/glsl/copy_channel_offset.glsl b/backends/vulkan/runtime/graph/ops/glsl/copy_channel_offset.glsl index 361a182d6b..f02049dc2a 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/copy_channel_offset.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/copy_channel_offset.glsl @@ -20,20 +20,19 @@ ${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)} ${layout_declare_tensor(1, "r", "existing_out", DTYPE, STORAGE)} ${layout_declare_tensor(2, "r", "t_in", DTYPE, STORAGE)} -layout(set = 0, binding = 3) uniform PRECISION restrict CopyArgs { - ivec4 out_sizes; - ivec4 in_sizes; +${layout_declare_ubo(3, "ivec4", "out_sizes")} +${layout_declare_ubo(4, "ivec4", "out_axis_map")} +${layout_declare_ubo(5, "ivec4", "in_sizes")} +${layout_declare_ubo(6, "ivec4", "in_axis_map")} +layout(set = 0, binding = 7) uniform PRECISION restrict CopyArgs { + // Operates on (x, y, z) logical extents. + ivec3 range; // Analogus to range variable in copy. It defines the # of channel being // copied. int channel_range; - int src_channel_offset; - int dst_channel_offset; - int unused; - // Operates on (x, y, z) extents. - ivec3 range; - int unused1; ivec3 dst_offset; - int unused2; + int dst_channel_offset; + int src_channel_offset; }; layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; @@ -43,36 +42,36 @@ layout(constant_id = 3) const int packed_dim = C_DIM; void main() { // Note: Unlike other shaders, the range is often not equal to the destination // texture extent. - const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (any(greaterThanEqual(pos, range))) { + const ivec3 lpos = ivec3(gl_GlobalInvocationID); + if (any(greaterThanEqual(lpos, range))) { return; } - const ivec3 out_pos = pos + dst_offset; + const ivec3 out_lpos = lpos + dst_offset; - const ivec4 out_whcn = to_tensor_idx(out_pos, out_sizes, packed_dim); + const ivec4 out_tidx = lpos_to_tidx(out_lpos, out_sizes, out_axis_map.w, packed_dim); // First read the existing values to make sure the boundary values stay. - VEC4_T v = VEC4_T(texelFetch(existing_out, out_pos, 0)); + VEC4_T v = load_texel_lpos(existing_out, out_lpos, out_axis_map); + ivec4 in_tidx = out_tidx; for (int i=0; i<4; i++) { - ivec4 in_whcn = out_whcn; - in_whcn.z = out_whcn.z - dst_channel_offset + i; + in_tidx[packed_dim] = out_tidx[packed_dim] - dst_channel_offset + i; // Handle the partial update for begining of channel in an existing tensor. // If the source channel index is below zero or exceeds the range, we skip // updating the element to avoid overwriting existing data. - if ((in_whcn.z < 0) || (in_whcn.z >= channel_range)) { + if ((in_tidx[packed_dim] < 0) || (in_tidx[packed_dim] >= channel_range)) { continue; } // Readjust for the source offset. - in_whcn.z = in_whcn.z + src_channel_offset; + in_tidx[packed_dim] += src_channel_offset; - ivec4 in_elem_pos = to_texture_elem_pos(in_whcn, in_sizes, packed_dim); - v[i] = VEC4_T(texelFetch(t_in, in_elem_pos.xyz, 0))[in_elem_pos.w]; + ivec4 in_posi = tidx_to_posi(in_tidx, in_sizes, in_axis_map, packed_dim); + v[i] = load_texel(t_in, in_posi.xyz)[in_posi.w]; } - imageStore(t_out, out_pos, v); + write_texel_lpos(t_out, out_lpos, v, out_axis_map); } diff --git a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp index 1fe65611d9..c836a53d04 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp @@ -139,28 +139,17 @@ void add_copy_channel_offset_node( uvec3 local_size = adaptive_work_group_size(global_size); const struct Block final { - utils::ivec4 out_sizes; - utils::ivec4 in_sizes; - int32_t channel_range; - int32_t src_channel_offset; - int32_t dst_channel_offset; - int32_t unused; ivec3 range; - int32_t unused1; + int32_t channel_range; ivec3 dst_offset; - int32_t unused2; - + int32_t dst_channel_offset; + int32_t src_channel_offset; } channel_offset_params{ - utils::make_whcn_ivec4(out_sizes), - utils::make_whcn_ivec4(in_sizes), - channel_range, - src_channel_offset, - dst_channel_offset, - 0, utils::make_ivec3(global_size), - 0, + channel_range, dst_offset, - 0, + dst_channel_offset, + src_channel_offset, }; auto shader = VK_KERNEL_FROM_STR(kernel_name); @@ -177,7 +166,13 @@ void add_copy_channel_offset_node( {in, vkapi::MemoryAccessType::READ}, }, // Parameter buffers - {graph.create_params_buffer(channel_offset_params)}, + { + t_out->sizes_ubo(), + t_out->axis_map_ubo(), + t_in->sizes_ubo(), + t_in->axis_map_ubo(), + graph.create_params_buffer(channel_offset_params), + }, // Specialization Constants {})); }