@@ -44,23 +44,57 @@ void main() {
44
44
return ;
45
45
}
46
46
47
- // Starting offset to write at within a texel
48
- const int out_lane_offset = dst_offset[packed_dim] & 0x3;
49
- const bool has_lane_offset = out_lane_offset != 0 ;
50
-
51
47
// Position in input tensor
52
- const ivec3 in_pos = pos + src_offset.xyz;
48
+ ivec3 in_pos = pos + src_offset.xyz;
49
+ in_pos[packed_dim] = pos[packed_dim] + (src_offset[packed_dim] >> 2 );
53
50
54
51
// Read input value mapping to this output texel
55
- const VEC4_T in_value = load_texel_lpos(t_in, in_pos, in_axis_map);
52
+ VEC4_T in_value = load_texel_lpos(t_in, in_pos, in_axis_map);
53
+
54
+ // Starting offset to read from a texel
55
+ const int src_lane_offset = src_offset[packed_dim] & 0x3;
56
+ const bool has_src_lane_offset = src_lane_offset != 0 ;
57
+
58
+ // If input lane offset is non zero i.e packed texel is composed from multiple sources
59
+ if (has_src_lane_offset) {
60
+ // Boundary values will come from next input texel in the packed dim.
61
+ ivec3 next_in_pos = in_pos;
62
+ next_in_pos[packed_dim] = in_pos[packed_dim] + 1 ;
63
+ VEC4_T next_value = load_texel_lpos(t_in, next_in_pos, in_axis_map);
64
+
65
+ // Keep input values from the end of current input pixel based on src_lane_offset
66
+ // offset 1 means the first lane of current input texel is not a part of the output texel
67
+ // offset 2 means first 2 lanes are not and so on
68
+ if (src_lane_offset == 1 ) {
69
+ in_value.xyz = in_value.yzw;
70
+ } else if (src_lane_offset == 2 ) {
71
+ in_value.xy = in_value.zw;
72
+ } else {
73
+ in_value.x = in_value.w;
74
+ }
75
+ // Copy next texel's values towards the end of input texel, based on lane offset
76
+ // offset 1 means the first lane from next texel is part of the input texel
77
+ // offset 2 means first 2 lanes from next texel is part of the input texel and so on
78
+ if (src_lane_offset == 1 ) {
79
+ in_value.w = next_value.x;
80
+ } else if (src_lane_offset == 2 ) {
81
+ in_value.zw = next_value.xy;
82
+ } else {
83
+ in_value.yzw = next_value.xyz;
84
+ }
85
+ }
86
+
87
+ // Starting offset to write at within a texel
88
+ const int out_lane_offset = dst_offset[packed_dim] & 0x3;
89
+ const bool has_dst_lane_offset = out_lane_offset != 0 ;
56
90
57
91
ivec3 out_pos = pos + dst_offset.xyz;
58
92
out_pos[packed_dim] = pos[packed_dim] + (dst_offset[packed_dim] >> 2 );
59
93
60
94
VEC4_T out_value;
61
95
62
96
// If lane offset is non zero i.e packed texel is composed from multiple sources
63
- if (has_lane_offset ) {
97
+ if (has_dst_lane_offset ) {
64
98
// When position in packed dim is > 0
65
99
if (pos[packed_dim] > 0 ) {
66
100
// Boundary values will come from previous input texel in the packed dim.
0 commit comments