Skip to content

[ET-VK] Adding all tensor packing support to split op. #9345

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions backends/vulkan/op_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,8 +528,6 @@ def register_view_op(features: OpFeatures):
exir_ops.edge.aten.index_select.default,
exir_ops.edge.aten.select_copy.int,
# Tensor combination
exir_ops.edge.aten.split_with_sizes_copy.default,
exir_ops.edge.aten.split.Tensor,
exir_ops.edge.aten.repeat.default,
# Tensor creation
exir_ops.edge.aten.arange.start_step,
Expand Down Expand Up @@ -563,6 +561,8 @@ def register_ported_op(features: OpFeatures):
exir_ops.edge.aten.permute_copy.default,
# Tensor combination
exir_ops.edge.aten.cat.default,
exir_ops.edge.aten.split_with_sizes_copy.default,
exir_ops.edge.aten.split.Tensor,
]
)
def register_ported_op_all_packed_dims(features: OpFeatures):
Expand Down
90 changes: 43 additions & 47 deletions backends/vulkan/runtime/graph/ops/impl/Split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ void add_split_with_sizes_default_node(
ValueRef out_list_ref) {
vTensorPtr t_in = graph.get_tensor(in);

VK_CHECK_COND(check_packed_dim_is(*t_in, WHCN::kChannelsDim));

ValueListPtr out_list = graph.get_value_list(out_list_ref);

DimIndex dim_index = normalize_to_dim_index(*t_in, dim);
Expand All @@ -38,62 +36,60 @@ void add_split_with_sizes_default_node(
ValueRef out_ref = (*out_list)[split_idx];

vTensorPtr t_out = graph.get_tensor(out_ref);
VK_CHECK_COND(check_packed_dim_is(*t_out, WHCN::kChannelsDim));
VK_CHECK_COND(dim_at(*t_out, dim_index) == split_size);
}

if (dim_index == kWidth4D) {
utils::ivec4 src_offset = utils::make_ivec4({0, 0, 0, 0}, false);
utils::ivec4 dst_offset = utils::make_ivec4({0, 0, 0, 0}, false);
const auto packed_dim = t_in->packed_dim();
const auto packed_dim_index = static_cast<DimIndex>(kWidth4D - packed_dim);

for (ValueRef out_ref : *out_list) {
// Doesn't need to use split_size since we have already verified that the
// output tensor's size matches with the split_size.
vTensorPtr t_out = graph.get_tensor(out_ref);
utils::ivec3 range = t_out->logical_limits();
add_copy_offset_node(
graph, in, range, src_offset, dst_offset, out_ref, false, true);
// Index of dimension to be concatenated in (w, h, c * b) coordinate system
const auto dim_xyz_index = std::min(2, -dim_index - 1);

src_offset[0] += range[0];
}
} else if (dim_index == kHeight4D) {
utils::ivec4 src_offset = utils::make_ivec4({0, 0, 0, 0}, false);
utils::ivec4 dst_offset = utils::make_ivec4({0, 0, 0, 0}, false);
utils::ivec4 src_offset = utils::make_ivec4({0, 0, 0, 0}, false);
utils::ivec4 dst_offset = utils::make_ivec4({0, 0, 0, 0}, false);

for (ValueRef out_ref : *out_list) {
vTensorPtr t_out = graph.get_tensor(out_ref);
utils::ivec3 range = t_out->logical_limits();
add_copy_offset_node(
graph, in, range, src_offset, dst_offset, out_ref, false, true);
const bool is_splitting_channel = (dim_index == kChannel4D);

src_offset[1] += range[1];
}
} else if (dim_index == kBatch4D) {
utils::ivec4 src_offset = utils::make_ivec4({0, 0, 0, 0}, false);
utils::ivec4 dst_offset = utils::make_ivec4({0, 0, 0, 0}, false);
// if splitting channels
if (is_splitting_channel) {
// set source offset w as channel size of the input tensor
src_offset[3] = dim_at(t_in->sizes(), kChannel4D);
}

for (ValueRef out_ref : *out_list) {
vTensorPtr t_out = graph.get_tensor(out_ref);
utils::ivec3 range = t_out->logical_limits();
for (ValueRef out_ref : *out_list) {
// Doesn't need to use split_size since we have already verified that the
// output tensor's size matches with the split_size.
vTensorPtr t_out = graph.get_tensor(out_ref);
const auto out_channel_size = dim_at(t_out->sizes(), kChannel4D);
utils::ivec3 range = t_out->logical_limits();

if (dim_index == packed_dim_index) {
// if splitting channels, use add_copy_channel_offset_node function as
// add_copy_packed_dim_offset_node does not support channel packing
if (is_splitting_channel) {
add_copy_channel_offset_node(
graph, in, out_channel_size, src_offset[2], dst_offset[2], out_ref);
src_offset[dim_xyz_index] += out_channel_size;
} else {
// dst_offset[3] is not used now but will be used in the future when
// add_copy_packed_dim_offset_node will support channel packing
//
// set destination offset w as channel size of the output tensor if
// splitting channel
dst_offset[3] = is_splitting_channel ? out_channel_size : 0;
add_copy_packed_dim_offset_node(
graph, in, range, src_offset, dst_offset, out_ref);
src_offset[dim_xyz_index] += dim_at(t_out->sizes(), packed_dim_index);
}
} else {
// set destination offset w as channel size of the output tensor if
// splitting channels
dst_offset[3] = is_splitting_channel ? out_channel_size : 0;
add_copy_offset_node(
graph, in, range, src_offset, dst_offset, out_ref, false, true);

src_offset[2] += range[2];
}
} else if (dim_index == kChannel4D) {
int32_t src_offset = 0;
int32_t dst_offset = 0;

for (ValueRef out_ref : *out_list) {
vTensorPtr t_out = graph.get_tensor(out_ref);
int32_t range = dim_at<kChannel4D>(t_out->sizes());
add_copy_channel_offset_node(
graph, in, range, src_offset, dst_offset, out_ref);
src_offset += range;
src_offset[dim_xyz_index] +=
is_splitting_channel ? out_channel_size : range[dim_xyz_index];
}

} else {
VK_THROW("not ipmlemented");
}
}

Expand Down
13 changes: 13 additions & 0 deletions backends/vulkan/test/op_tests/cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -922,30 +922,41 @@ def get_split_with_sizes_inputs():
Test = namedtuple("VkSliceTest", ["self", "sizes", "dim"])
test_cases = [
# Split on Width
Test(self=(S1, 7, 10, 11), sizes=[1, 3, 2, 5], dim=3),
Test(self=(S1, 7, 10, 10), sizes=[1, 2, 3, 4], dim=3),
Test(self=(7, 10, 11), sizes=[1, 3, 2, 5], dim=2),
Test(self=(7, 10, 10), sizes=[1, 2, 3, 4], dim=2),
Test(self=(7, 10, 11), sizes=[3, 8], dim=2),
Test(self=(7, 10, 10), sizes=[1, 9], dim=2),
Test(self=(10, 10), sizes=[1, 9], dim=1),
Test(self=(10,), sizes=[1, 9], dim=0),
# Split on Height
Test(self=(S1, 7, 11, 10), sizes=[1, 3, 2, 5], dim=2),
Test(self=(S1, 7, 10, 10), sizes=[1, 2, 3, 4], dim=2),
Test(self=(7, 11, 10), sizes=[1, 3, 2, 5], dim=1),
Test(self=(7, 10, 10), sizes=[1, 2, 3, 4], dim=1),
Test(self=(7, 11, 11), sizes=[3, 8], dim=1),
Test(self=(7, 10, 10), sizes=[10], dim=1),
Test(self=(7, 6, 10), sizes=[1, 1, 1, 1, 1, 1], dim=1),
Test(self=(10, 10), sizes=[1, 2, 3, 4], dim=0),
# Split on Batch
Test(self=(10, 7, 10, 10), sizes=[3, 6, 1], dim=0),
Test(self=(10, 7, 10, 10), sizes=[10], dim=0),
# Split on Channel
Test(self=(7, 13, 4, 8), sizes=[3, 5, 2, 3], dim=1),
Test(self=(7, 13, 4, 8), sizes=[3, 6, 1, 3], dim=1),
Test(self=(7, 13, 4, 8), sizes=[3, 2, 2, 5, 1], dim=1),
Test(self=(7, 13, 4, 8), sizes=[3, 3, 3, 3, 1], dim=1),
Test(self=(13, 4, 8), sizes=[3, 5, 2, 1, 2], dim=0),
Test(self=(13, 4, 8), sizes=[3, 3, 3, 3, 1], dim=0),
Test(self=(13, 4, 8), sizes=[2, 9, 2], dim=0),
Test(self=(13, 4, 8), sizes=[13], dim=0),
]
test_suite = VkTestSuite([tuple(tc) for tc in test_cases])

test_suite.layouts = [
"utils::kWidthPacked",
"utils::kHeightPacked",
"utils::kChannelsPacked",
]
test_suite.data_gen = "make_seq_tensor"
Expand Down Expand Up @@ -997,6 +1008,8 @@ def get_split_tensor_inputs():
)

test_suite.layouts = [
"utils::kWidthPacked",
"utils::kHeightPacked",
"utils::kChannelsPacked",
]
test_suite.data_gen = "make_seq_tensor"
Expand Down
Loading