@@ -46,17 +46,14 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
46
46
* size is only 1x1, making it easier to re-use loaded texels from t_kernel.
47
47
*/
48
48
void main() {
49
- const ivec2 out_limits_scaled = ( out_limits.xy + ivec2 (TILE_SIZE_X - 1 , TILE_SIZE_Y - 1 )) / ivec2 (TILE_SIZE_X, TILE_SIZE_Y) ;
49
+ const int out_limits_scaled[ 2 ] = { out_limits.x + (TILE_SIZE_X - 1 ) * TILE_SIZE_X, out_limits.y + ( TILE_SIZE_Y - 1 ) * TILE_SIZE_Y} ;
50
50
51
- const uint div_by_x = gl_GlobalInvocationID.x / out_limits_scaled.x;
52
- const ivec3 gpos = ivec3 (
53
- gl_GlobalInvocationID.x % out_limits_scaled.x,
54
- div_by_x,
55
- gl_GlobalInvocationID.y);
51
+ const int div_by_x = int (gl_GlobalInvocationID.x / out_limits_scaled[0 ]);
52
+ const int out_pos[3 ] = {int (gl_GlobalInvocationID.x % out_limits_scaled[0 ]), div_by_x, int (gl_GlobalInvocationID.y)};
56
53
57
54
// If the top left position is out of bounds, then this invocation will have
58
55
// no work to do.
59
- if (gpos.y >= out_limits_scaled.y || gpos.z >= out_limits.z) {
56
+ if (out_pos[ 1 ] >= out_limits_scaled[ 1 ] || out_pos[ 2 ] >= out_limits.z) {
60
57
return ;
61
58
}
62
59
@@ -69,8 +66,8 @@ void main() {
69
66
int pos[TILE_SIZE_X * TILE_SIZE_Y * 2 ];
70
67
for (int y = 0 , i = 0 ; y < TILE_SIZE_Y; ++ y) {
71
68
for (int x = 0 ; x < TILE_SIZE_X; ++ x) {
72
- pos[i * 2 ] = gpos.x * TILE_SIZE_X + x;
73
- pos[i * 2 + 1 ] = gpos.y * TILE_SIZE_Y + y;
69
+ pos[i * 2 ] = out_pos[ 0 ] * TILE_SIZE_X + x;
70
+ pos[i * 2 + 1 ] = out_pos[ 1 ] * TILE_SIZE_Y + y;
74
71
i++ ;
75
72
}
76
73
}
@@ -88,7 +85,7 @@ void main() {
88
85
// Tuple of consecutive 4 elements represents a single output texel.
89
86
float sum[TILE_SIZE_X * TILE_SIZE_Y * 4 ];
90
87
91
- const vec4 bias = texelFetch(t_bias, ivec2 (gpos.z , 0 ), 0 );
88
+ const vec4 bias = texelFetch(t_bias, ivec2 (out_pos[ 2 ] , 0 ), 0 );
92
89
93
90
// Initialize the output array with the bias value
94
91
for (int i = 0 ; i < TILE_SIZE_X * TILE_SIZE_Y * 4 ; i += 4 ) {
@@ -108,7 +105,7 @@ void main() {
108
105
109
106
// Load kernel values from texels to array
110
107
[[unroll]] for (int i = 0 ; i < 4 ; ++ i) {
111
- const vec4 k_tex = texelFetch(t_kernel, ivec2 (z + i, gpos.z ), 0 );
108
+ const vec4 k_tex = texelFetch(t_kernel, ivec2 (z + i, out_pos[ 2 ] ), 0 );
112
109
kernel_values[i * 4 + 0 ] = k_tex.x;
113
110
kernel_values[i * 4 + 1 ] = k_tex.y;
114
111
kernel_values[i * 4 + 2 ] = k_tex.z;
@@ -167,7 +164,7 @@ void main() {
167
164
}
168
165
169
166
for (int i = 0 ; i < TILE_SIZE_X * TILE_SIZE_Y; ++ i) {
170
- const ivec3 pos_l = ivec3 (pos[i * 2 ], pos[i * 2 + 1 ], gpos.z );
167
+ const ivec3 pos_l = ivec3 (pos[i * 2 ], pos[i * 2 + 1 ], out_pos[ 2 ] );
171
168
if (all (lessThan (pos_l, out_limits.xyz))) {
172
169
imageStore(t_out, pos_l, op(vec4 (sum[i * 4 ], sum[i * 4 + 1 ], sum[i * 4 + 2 ], sum[i * 4 + 3 ]), out_min, out_max));
173
170
}
0 commit comments