Skip to content

Commit c8eff94

Browse files
committed
[ET-VK] Applying bias after sum calculation in conv2d pw shader to improve performance.
Pull Request resolved: #11150 This diff improves the performance of the conv2d pw shader by changing the order of operations to apply bias after the sum calculation. ghstack-source-id: 286652108 @exported-using-ghexport Differential Revision: [D75450662](https://our.internmc.facebook.com/intern/diff/D75450662/)
1 parent 9d7ead2 commit c8eff94

File tree

1 file changed

+6
-8
lines changed

1 file changed

+6
-8
lines changed

backends/vulkan/runtime/graph/ops/glsl/conv2d_pw_s1p0.glsl

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,14 +79,9 @@ void main() {
7979
// Tuple of consecutive 4 elements represents a single output texel.
8080
float sum[TILE_SIZE_X * TILE_SIZE_Y * 4];
8181

82-
const vec4 bias = texelFetch(t_bias, ivec2(out_pos_z, 0), 0);
83-
8482
// Initialize the output array with the bias value
85-
for (int i = 0; i < TILE_SIZE_X * TILE_SIZE_Y * 4; i += 4) {
86-
sum[i] = bias.x;
87-
sum[i + 1] = bias.y;
88-
sum[i + 2] = bias.z;
89-
sum[i + 3] = bias.w;
83+
for (int i = 0; i < TILE_SIZE_X * TILE_SIZE_Y * 4; i++) {
84+
sum[i] = 0;
9085
}
9186

9287
int z4 = 0;
@@ -157,10 +152,13 @@ void main() {
157152
}
158153
}
159154

155+
const vec4 bias = texelFetch(t_bias, ivec2(out_pos_z, 0), 0);
156+
160157
for (int i = 0; i < TILE_SIZE_X * TILE_SIZE_Y; ++i) {
161158
const ivec3 pos_l = ivec3(pos[i * 2], pos[i * 2 + 1], out_pos_z);
162159
if (all(lessThan(pos_l.xy, out_limits.xy))) {
163-
imageStore(t_out, pos_l, op(vec4(sum[i * 4], sum[i * 4 + 1], sum[i * 4 + 2], sum[i * 4 + 3]), out_min, out_max));
160+
const vec4 out_sum = vec4(sum[i * 4], sum[i * 4 + 1], sum[i * 4 + 2], sum[i * 4 + 3]);
161+
imageStore(t_out, pos_l, op(out_sum + bias, out_min, out_max));
164162
}
165163
}
166164
}

0 commit comments

Comments
 (0)