Skip to content

Commit

Permalink
Merge branch 'pytorch:main' into Arm-backend-Improve-memory-config-an…
Browse files Browse the repository at this point in the history
…d-documentation-in-the-runtime
  • Loading branch information
zingo authored Sep 24, 2024
2 parents 005b43f + 3e79ea4 commit 0f5f9d6
Show file tree
Hide file tree
Showing 27 changed files with 841 additions and 124 deletions.
9 changes: 7 additions & 2 deletions backends/vulkan/runtime/api/containers/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -707,8 +707,7 @@ void vTensor::virtual_transpose(const int64_t dim0, const int64_t dim1) {
const int dim1_whcn = sizes_.size() - 1 - dim1;
if (packed_dim_ == dim0_whcn) {
packed_dim_ = dim1_whcn;
}
if (packed_dim_ == dim1_whcn) {
} else if (packed_dim_ == dim1_whcn) {
packed_dim_ = dim0_whcn;
}

Expand All @@ -719,6 +718,12 @@ void vTensor::virtual_transpose(const int64_t dim0, const int64_t dim1) {
VK_CHECK_COND(dim0_whcn < 3 && dim1_whcn < 3);
std::iter_swap(
axis_map_.begin() + dim0_whcn, axis_map_.begin() + dim1_whcn);
// Update the "identity" of the concatted dimension
if (axis_map_.at(3) == dim0_whcn) {
axis_map_.at(3) = dim1_whcn;
} else if (axis_map_.at(3) == dim1_whcn) {
axis_map_.at(3) = dim0_whcn;
}
}
update_metadata();
}
Expand Down
26 changes: 26 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,32 @@ std::vector<int64_t> ComputeGraph::sizes_of(const ValueRef idx) const {
VK_THROW("Could not get sizes of value with type ", val.type());
}

int64_t ComputeGraph::dim_of(const ValueRef idx) const {
const Value& val = values_.at(idx);
if (val.isTensor()) {
return val.toConstTensor().dim();
} else if (val.isTensorRef()) {
return val.toConstTensorRef().sizes.size();
}
VK_THROW("Could not get dim of value with type ", val.type());
}

std::vector<int64_t> ComputeGraph::dim_order_of(const ValueRef idx) const {
const Value& val = values_.at(idx);
if (val.isTensor()) {
return val.toConstTensor().dim_order();
}
VK_THROW("Could not get dim order of value with type ", val.type());
}

std::vector<int64_t> ComputeGraph::strides_of(const ValueRef idx) const {
const Value& val = values_.at(idx);
if (val.isTensor()) {
return val.toConstTensor().strides();
}
VK_THROW("Could not get strides of value with type ", val.type());
}

vkapi::ScalarType ComputeGraph::dtype_of(const ValueRef idx) const {
const Value& val = values_.at(idx);
if (val.isTensor()) {
Expand Down
6 changes: 6 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,12 @@ class ComputeGraph final {
VK_THROW("Could not get sizes of value with type ", val.type());
}

int64_t dim_of(const ValueRef idx) const;

std::vector<int64_t> dim_order_of(const ValueRef idx) const;

std::vector<int64_t> strides_of(const ValueRef idx) const;

vkapi::ScalarType dtype_of(const ValueRef idx) const;

inline const utils::ivec3& logical_limits_of(const ValueRef idx) const {
Expand Down
6 changes: 3 additions & 3 deletions backends/vulkan/runtime/graph/ops/ExecuteNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@ class ComputeGraph;
* access permission.
*/
struct ArgGroup {
ArgGroup(const ValueRef ref, const vkapi::MemoryAccessType access)
ArgGroup(const ValueRef ref, const vkapi::MemoryAccessFlags access)
: refs{ref}, access(access) {}

ArgGroup(
const std::vector<ValueRef>& refs,
const vkapi::MemoryAccessType access)
const vkapi::MemoryAccessFlags access)
: refs(refs), access(access) {}

const std::vector<ValueRef> refs;
const vkapi::MemoryAccessType access;
const vkapi::MemoryAccessFlags access;
};

/*
Expand Down
43 changes: 21 additions & 22 deletions backends/vulkan/runtime/graph/ops/glsl/copy_channel_offset.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,19 @@ ${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
${layout_declare_tensor(1, "r", "existing_out", DTYPE, STORAGE)}
${layout_declare_tensor(2, "r", "t_in", DTYPE, STORAGE)}

layout(set = 0, binding = 3) uniform PRECISION restrict CopyArgs {
ivec4 out_sizes;
ivec4 in_sizes;
${layout_declare_ubo(3, "ivec4", "out_sizes")}
${layout_declare_ubo(4, "ivec4", "out_axis_map")}
${layout_declare_ubo(5, "ivec4", "in_sizes")}
${layout_declare_ubo(6, "ivec4", "in_axis_map")}
layout(set = 0, binding = 7) uniform PRECISION restrict CopyArgs {
// Operates on (x, y, z) logical extents.
ivec3 range;
// Analogus to range variable in copy. It defines the # of channel being
// copied.
int channel_range;
int src_channel_offset;
int dst_channel_offset;
int unused;
// Operates on (x, y, z) extents.
ivec3 range;
int unused1;
ivec3 dst_offset;
int unused2;
int dst_channel_offset;
int src_channel_offset;
};

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
Expand All @@ -43,36 +42,36 @@ layout(constant_id = 3) const int packed_dim = C_DIM;
void main() {
// Note: Unlike other shaders, the range is often not equal to the destination
// texture extent.
const ivec3 pos = ivec3(gl_GlobalInvocationID);
if (any(greaterThanEqual(pos, range))) {
const ivec3 lpos = ivec3(gl_GlobalInvocationID);
if (any(greaterThanEqual(lpos, range))) {
return;
}

const ivec3 out_pos = pos + dst_offset;
const ivec3 out_lpos = lpos + dst_offset;

const ivec4 out_whcn = to_tensor_idx(out_pos, out_sizes, packed_dim);
const ivec4 out_tidx = lpos_to_tidx(out_lpos, out_sizes, out_axis_map.w, packed_dim);

// First read the existing values to make sure the boundary values stay.
VEC4_T v = VEC4_T(texelFetch(existing_out, out_pos, 0));
VEC4_T v = load_texel_lpos(existing_out, out_lpos, out_axis_map);

ivec4 in_tidx = out_tidx;
for (int i=0; i<4; i++) {
ivec4 in_whcn = out_whcn;

in_whcn.z = out_whcn.z - dst_channel_offset + i;
in_tidx[packed_dim] = out_tidx[packed_dim] - dst_channel_offset + i;

// Handle the partial update for begining of channel in an existing tensor.
// If the source channel index is below zero or exceeds the range, we skip
// updating the element to avoid overwriting existing data.
if ((in_whcn.z < 0) || (in_whcn.z >= channel_range)) {
if ((in_tidx[packed_dim] < 0) || (in_tidx[packed_dim] >= channel_range)) {
continue;
}

// Readjust for the source offset.
in_whcn.z = in_whcn.z + src_channel_offset;
in_tidx[packed_dim] += src_channel_offset;

ivec4 in_elem_pos = to_texture_elem_pos(in_whcn, in_sizes, packed_dim);
v[i] = VEC4_T(texelFetch(t_in, in_elem_pos.xyz, 0))[in_elem_pos.w];
ivec4 in_posi = tidx_to_posi(in_tidx, in_sizes, in_axis_map, packed_dim);
v[i] = load_texel(t_in, in_posi.xyz)[in_posi.w];
}

imageStore(t_out, out_pos, v);
write_texel_lpos(t_out, out_lpos, v, out_axis_map);
}
31 changes: 13 additions & 18 deletions backends/vulkan/runtime/graph/ops/impl/Copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,28 +139,17 @@ void add_copy_channel_offset_node(
uvec3 local_size = adaptive_work_group_size(global_size);

const struct Block final {
utils::ivec4 out_sizes;
utils::ivec4 in_sizes;
int32_t channel_range;
int32_t src_channel_offset;
int32_t dst_channel_offset;
int32_t unused;
ivec3 range;
int32_t unused1;
int32_t channel_range;
ivec3 dst_offset;
int32_t unused2;

int32_t dst_channel_offset;
int32_t src_channel_offset;
} channel_offset_params{
utils::make_whcn_ivec4(out_sizes),
utils::make_whcn_ivec4(in_sizes),
channel_range,
src_channel_offset,
dst_channel_offset,
0,
utils::make_ivec3(global_size),
0,
channel_range,
dst_offset,
0,
dst_channel_offset,
src_channel_offset,
};

auto shader = VK_KERNEL_FROM_STR(kernel_name);
Expand All @@ -177,7 +166,13 @@ void add_copy_channel_offset_node(
{in, vkapi::MemoryAccessType::READ},
},
// Parameter buffers
{graph.create_params_buffer(channel_offset_params)},
{
t_out->sizes_ubo(),
t_out->axis_map_ubo(),
t_in->sizes_ubo(),
t_in->axis_map_ubo(),
graph.create_params_buffer(channel_offset_params),
},
// Specialization Constants
{}));
}
Expand Down
Loading

0 comments on commit 0f5f9d6

Please sign in to comment.