Skip to content

Commit

Permalink
update copy_channel_offset to axis mapping (#5587)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #5587

Updating copy_channel_offset to use axis mapping of in/out tensors.

This shader/op implementation still assumes that the input and output tensors are both channel-packed. Updating it for arbitrary packing is more complicated.

Reviewed By: derekxu, jorgep31415

Differential Revision: D63284763

fbshipit-source-id: 87b986ccad51ff7ab5980d9324ce417840c8c8bb
  • Loading branch information
nathanaelsee authored and facebook-github-bot committed Sep 24, 2024
1 parent b206b97 commit ce74024
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 40 deletions.
43 changes: 21 additions & 22 deletions backends/vulkan/runtime/graph/ops/glsl/copy_channel_offset.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,19 @@ ${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
${layout_declare_tensor(1, "r", "existing_out", DTYPE, STORAGE)}
${layout_declare_tensor(2, "r", "t_in", DTYPE, STORAGE)}

layout(set = 0, binding = 3) uniform PRECISION restrict CopyArgs {
ivec4 out_sizes;
ivec4 in_sizes;
${layout_declare_ubo(3, "ivec4", "out_sizes")}
${layout_declare_ubo(4, "ivec4", "out_axis_map")}
${layout_declare_ubo(5, "ivec4", "in_sizes")}
${layout_declare_ubo(6, "ivec4", "in_axis_map")}
layout(set = 0, binding = 7) uniform PRECISION restrict CopyArgs {
// Operates on (x, y, z) logical extents.
ivec3 range;
// Analogus to range variable in copy. It defines the # of channel being
// copied.
int channel_range;
int src_channel_offset;
int dst_channel_offset;
int unused;
// Operates on (x, y, z) extents.
ivec3 range;
int unused1;
ivec3 dst_offset;
int unused2;
int dst_channel_offset;
int src_channel_offset;
};

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
Expand All @@ -43,36 +42,36 @@ layout(constant_id = 3) const int packed_dim = C_DIM;
void main() {
// Note: Unlike other shaders, the range is often not equal to the destination
// texture extent.
const ivec3 pos = ivec3(gl_GlobalInvocationID);
if (any(greaterThanEqual(pos, range))) {
const ivec3 lpos = ivec3(gl_GlobalInvocationID);
if (any(greaterThanEqual(lpos, range))) {
return;
}

const ivec3 out_pos = pos + dst_offset;
const ivec3 out_lpos = lpos + dst_offset;

const ivec4 out_whcn = to_tensor_idx(out_pos, out_sizes, packed_dim);
const ivec4 out_tidx = lpos_to_tidx(out_lpos, out_sizes, out_axis_map.w, packed_dim);

// First read the existing values to make sure the boundary values stay.
VEC4_T v = VEC4_T(texelFetch(existing_out, out_pos, 0));
VEC4_T v = load_texel_lpos(existing_out, out_lpos, out_axis_map);

ivec4 in_tidx = out_tidx;
for (int i=0; i<4; i++) {
ivec4 in_whcn = out_whcn;

in_whcn.z = out_whcn.z - dst_channel_offset + i;
in_tidx[packed_dim] = out_tidx[packed_dim] - dst_channel_offset + i;

// Handle the partial update for begining of channel in an existing tensor.
// If the source channel index is below zero or exceeds the range, we skip
// updating the element to avoid overwriting existing data.
if ((in_whcn.z < 0) || (in_whcn.z >= channel_range)) {
if ((in_tidx[packed_dim] < 0) || (in_tidx[packed_dim] >= channel_range)) {
continue;
}

// Readjust for the source offset.
in_whcn.z = in_whcn.z + src_channel_offset;
in_tidx[packed_dim] += src_channel_offset;

ivec4 in_elem_pos = to_texture_elem_pos(in_whcn, in_sizes, packed_dim);
v[i] = VEC4_T(texelFetch(t_in, in_elem_pos.xyz, 0))[in_elem_pos.w];
ivec4 in_posi = tidx_to_posi(in_tidx, in_sizes, in_axis_map, packed_dim);
v[i] = load_texel(t_in, in_posi.xyz)[in_posi.w];
}

imageStore(t_out, out_pos, v);
write_texel_lpos(t_out, out_lpos, v, out_axis_map);
}
31 changes: 13 additions & 18 deletions backends/vulkan/runtime/graph/ops/impl/Copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,28 +139,17 @@ void add_copy_channel_offset_node(
uvec3 local_size = adaptive_work_group_size(global_size);

const struct Block final {
utils::ivec4 out_sizes;
utils::ivec4 in_sizes;
int32_t channel_range;
int32_t src_channel_offset;
int32_t dst_channel_offset;
int32_t unused;
ivec3 range;
int32_t unused1;
int32_t channel_range;
ivec3 dst_offset;
int32_t unused2;

int32_t dst_channel_offset;
int32_t src_channel_offset;
} channel_offset_params{
utils::make_whcn_ivec4(out_sizes),
utils::make_whcn_ivec4(in_sizes),
channel_range,
src_channel_offset,
dst_channel_offset,
0,
utils::make_ivec3(global_size),
0,
channel_range,
dst_offset,
0,
dst_channel_offset,
src_channel_offset,
};

auto shader = VK_KERNEL_FROM_STR(kernel_name);
Expand All @@ -177,7 +166,13 @@ void add_copy_channel_offset_node(
{in, vkapi::MemoryAccessType::READ},
},
// Parameter buffers
{graph.create_params_buffer(channel_offset_params)},
{
t_out->sizes_ubo(),
t_out->axis_map_ubo(),
t_in->sizes_ubo(),
t_in->axis_map_ubo(),
graph.create_params_buffer(channel_offset_params),
},
// Specialization Constants
{}));
}
Expand Down

0 comments on commit ce74024

Please sign in to comment.