Skip to content

Commit 8a92d80

Browse files
authored
[ET-VK] Adding source_offset processing to copy_packed_dim_offset function.
Differential Revision: D71349217 Pull Request resolved: #9344
1 parent 3a4b9ab commit 8a92d80

File tree

2 files changed

+64
-12
lines changed

2 files changed

+64
-12
lines changed

backends/vulkan/runtime/graph/ops/glsl/copy_packed_dim_offset.glsl

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,23 +44,57 @@ void main() {
4444
return;
4545
}
4646

47-
// Starting offset to write at within a texel
48-
const int out_lane_offset = dst_offset[packed_dim] & 0x3;
49-
const bool has_lane_offset = out_lane_offset != 0;
50-
5147
// Position in input tensor
52-
const ivec3 in_pos = pos + src_offset.xyz;
48+
ivec3 in_pos = pos + src_offset.xyz;
49+
in_pos[packed_dim] = pos[packed_dim] + (src_offset[packed_dim] >> 2);
5350

5451
// Read input value mapping to this output texel
55-
const VEC4_T in_value = load_texel_lpos(t_in, in_pos, in_axis_map);
52+
VEC4_T in_value = load_texel_lpos(t_in, in_pos, in_axis_map);
53+
54+
// Starting offset to read from a texel
55+
const int src_lane_offset = src_offset[packed_dim] & 0x3;
56+
const bool has_src_lane_offset = src_lane_offset != 0;
57+
58+
// If input lane offset is non zero i.e packed texel is composed from multiple sources
59+
if (has_src_lane_offset) {
60+
// Boundary values will come from next input texel in the packed dim.
61+
ivec3 next_in_pos = in_pos;
62+
next_in_pos[packed_dim] = in_pos[packed_dim] + 1;
63+
VEC4_T next_value = load_texel_lpos(t_in, next_in_pos, in_axis_map);
64+
65+
// Keep input values from the end of current input pixel based on src_lane_offset
66+
// offset 1 means the first lane of current input texel is not a part of the output texel
67+
// offset 2 means first 2 lanes are not and so on
68+
if (src_lane_offset == 1) {
69+
in_value.xyz = in_value.yzw;
70+
} else if (src_lane_offset == 2) {
71+
in_value.xy = in_value.zw;
72+
} else {
73+
in_value.x = in_value.w;
74+
}
75+
// Copy next texel's values towards the end of input texel, based on lane offset
76+
// offset 1 means the first lane from next texel is part of the input texel
77+
// offset 2 means first 2 lanes from next texel is part of the input texel and so on
78+
if (src_lane_offset == 1) {
79+
in_value.w = next_value.x;
80+
} else if (src_lane_offset == 2) {
81+
in_value.zw = next_value.xy;
82+
} else {
83+
in_value.yzw = next_value.xyz;
84+
}
85+
}
86+
87+
// Starting offset to write at within a texel
88+
const int out_lane_offset = dst_offset[packed_dim] & 0x3;
89+
const bool has_dst_lane_offset = out_lane_offset != 0;
5690

5791
ivec3 out_pos = pos + dst_offset.xyz;
5892
out_pos[packed_dim] = pos[packed_dim] + (dst_offset[packed_dim] >> 2);
5993

6094
VEC4_T out_value;
6195

6296
// If lane offset is non zero i.e packed texel is composed from multiple sources
63-
if (has_lane_offset) {
97+
if (has_dst_lane_offset) {
6498
// When position in packed dim is > 0
6599
if (pos[packed_dim] > 0) {
66100
// Boundary values will come from previous input texel in the packed dim.

backends/vulkan/runtime/graph/ops/impl/Copy.cpp

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,19 +92,37 @@ void add_copy_packed_dim_offset_node(
9292
ivec4 final_range = {
9393
range[0], range[1], range[2], dim_at(t_in->sizes(), kBatch4D)};
9494
ivec3 global_wg_size = t_out->logical_limits();
95+
// The starting offset in a texel where this tensor will start copying from
96+
const auto src_lane_offset = src_offset[packed_dim] & 0x3;
9597
// The starting offset in a texel where this tensor will start copying to
9698
const auto dst_lane_offset = dst_offset[packed_dim] & 0x3;
99+
100+
// The total packed texels this tensor will be copied from
101+
// The first texel of tensor data in packed dimension will be copied from
102+
// remaining lanes from current source Hence (4 - src_lane_offset) is added
103+
// to tensor size in packed dimension
104+
const auto src_packed_size = utils::div_up_4(
105+
(4 - src_lane_offset) +
106+
dim_at(t_out->sizes(), normalize_to_dim_index(*t_out, packed_dim)));
107+
97108
// The total packed texels this tensor will be copied to
98-
// The first texel of tensor data in packed dimension will be copied to remain
99-
// lanes from previous write Hence (4 - dst_lane_offset) is added to tensor
100-
// size in packed dimension
109+
// The first texel of tensor data in packed dimension will be copied to
110+
// remaining lanes from previous write Hence (4 - dst_lane_offset) is added to
111+
// tensor size in packed dimension
101112
const auto dst_packed_size = utils::div_up_4(
102113
(4 - dst_lane_offset) +
103114
dim_at(t_in->sizes(), normalize_to_dim_index(*t_in, packed_dim)));
104115

105-
// If the starting offset is not 0, and the total packed texels is greater
116+
// If the starting src offset is not 0, and the total packed texels is greater
106117
// than the source texel range
107-
if (dst_lane_offset != 0 && dst_packed_size > final_range[packed_dim]) {
118+
const bool has_additional_src_work =
119+
src_lane_offset != 0 && src_packed_size > final_range[packed_dim];
120+
// If the starting dst offset is not 0, and the total packed texels is greater
121+
// than the source texel range
122+
const bool has_additional_dst_work =
123+
dst_lane_offset != 0 && dst_packed_size > final_range[packed_dim];
124+
125+
if (has_additional_src_work || has_additional_dst_work) {
108126
global_wg_size[packed_dim]++; // Increase the global work group size in
109127
// packed dimension
110128
final_range[packed_dim]++; // Increase the range in packed dimension

0 commit comments

Comments
 (0)