diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl index b1950f970e..9d1f6c3bd9 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl @@ -40,7 +40,12 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; * size is only 1x1, making it easier to re-use loaded texels from t_kernel. */ void main() { - const u16vec3 gpos = u16vec3(gl_GlobalInvocationID); + const uint16_t out_limits_y_scaled = uint16_t((out_limits.y + TILE_SIZE - 1) / TILE_SIZE); + + const u16vec3 gpos = u16vec3( + gl_GlobalInvocationID.x / (out_limits_y_scaled * out_limits.z), + (gl_GlobalInvocationID.x / out_limits.z) % out_limits_y_scaled, + gl_GlobalInvocationID.x % out_limits.z); // Output position for TILE_SIZE = 2 // +--------+--------+ diff --git a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp index 1cdd7315f1..4f123cb833 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp @@ -370,11 +370,17 @@ void add_conv2d_node( weight_data, clamp_out); + utils::uvec3 wg_size = create_conv2d_global_wg_size(graph, method, out); + + if (method == Conv2dMethod::Pointwise) { + wg_size = {wg_size[0] * wg_size[1] * wg_size[2], 1, 1}; + } + graph.execute_nodes().emplace_back(new DispatchNode( graph, shader, - create_conv2d_global_wg_size(graph, method, out), - graph.create_local_wg_size(out), + wg_size, + graph.create_local_wg_size(wg_size), // Inputs and Outputs {{out, vkapi::MemoryAccessType::WRITE}, {{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},