-
Notifications
You must be signed in to change notification settings - Fork 357
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Pull Request resolved: #3129 aten.view_copy, supporting all packing. Using @SSJia's idea to do a direct lookup. Differential Revision: [D56281400](https://our.internmc.facebook.com/intern/diff/D56281400/) ghstack-source-id: 223111187
- Loading branch information
Showing
6 changed files
with
209 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
/* | ||
* Copyright (c) Meta Platforms, Inc. and affiliates. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
#version 450 core | ||
|
||
#define PRECISION ${PRECISION} | ||
|
||
#define VEC4_T ${texel_type(DTYPE)} | ||
|
||
layout(std430) buffer; | ||
|
||
#include "indexing_utils.h" | ||
|
||
layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; | ||
layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; | ||
|
||
#define VEC4_T ${texel_type(DTYPE)} | ||
|
||
#define to_tensor_idx to_tensor_idx_${PACKING} | ||
#define to_texture_pos_elem to_texture_pos_elem_${PACKING} | ||
#define get_packed_stride get_packed_stride_${PACKING} | ||
|
||
layout(set = 0, binding = 2) uniform PRECISION restrict OutGpuSizes { | ||
uvec4 out_gpu_sizes; | ||
}; | ||
|
||
layout(set = 0, binding = 3) uniform PRECISION restrict OutCpuSizes { | ||
uvec4 out_cpu_sizes; | ||
}; | ||
|
||
layout(set = 0, binding = 4) uniform PRECISION restrict InGpuSizes { | ||
uvec4 in_gpu_sizes; | ||
}; | ||
|
||
layout(set = 0, binding = 5) uniform PRECISION restrict InCpuSizes { | ||
uvec4 in_cpu_sizes; | ||
}; | ||
|
||
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; | ||
|
||
|
||
void main() { | ||
const ivec3 out_pos = ivec3(gl_GlobalInvocationID); | ||
const ivec4 out_tensor_idx = to_tensor_idx(out_pos, out_gpu_sizes); | ||
|
||
if (all(greaterThanEqual(out_tensor_idx, out_gpu_sizes))) { | ||
return; | ||
} | ||
|
||
// Assume there is a virtual continous buffer in nchw format. From the output | ||
// pos, we first calculate the index in the virual buffer, and then calculate | ||
// the input position from the indx. | ||
|
||
const uint base_index = to_buffer_i(out_tensor_idx, out_cpu_sizes); | ||
const uvec4 buf_indices = | ||
base_index + ivec4(0, 1, 2, 3) * get_packed_stride(out_cpu_sizes); | ||
|
||
VEC4_T value; | ||
// Need to look up the 4 values in the output texel separately. | ||
for (int i=0; i<4; i++) { | ||
ivec4 user_coor = from_buffer_i(buf_indices[i], in_cpu_sizes); | ||
|
||
ivec4 in_pos_elem = to_texture_pos_elem(user_coor, in_gpu_sizes); | ||
|
||
VEC4_T intex = VEC4_T(texelFetch(image_in, in_pos_elem.xyz, 0)); | ||
|
||
value[i] = intex[in_pos_elem.w]; | ||
} | ||
|
||
imageStore(image_out, out_pos, value); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
view: | ||
parameter_names_with_default_values: | ||
DTYPE: float | ||
NDIM: 3 | ||
generate_variant_forall: | ||
DTYPE: | ||
- VALUE: half | ||
- VALUE: float | ||
PACKING: | ||
- VALUE: C_packed | ||
- VALUE: W_packed | ||
- VALUE: H_packed | ||
shader_variants: | ||
- NAME: view |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
/* | ||
* Copyright (c) Meta Platforms, Inc. and affiliates. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h> | ||
|
||
#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h> | ||
#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h> | ||
#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h> | ||
|
||
namespace vkcompute { | ||
|
||
void add_view_node(ComputeGraph& graph, ValueRef in, ValueRef out) { | ||
vTensorPtr t_in = graph.get_tensor(in); | ||
vTensorPtr t_out = graph.get_tensor(out); | ||
|
||
std::string kernel_name = "view"; | ||
kernel_name.reserve(kShaderNameReserve); | ||
add_dtype_suffix(kernel_name, *t_out); | ||
add_memory_layout_suffix(kernel_name, *t_out); | ||
|
||
api::utils::uvec3 global_size = t_out->extents(); | ||
api::utils::uvec3 local_size = adaptive_work_group_size(global_size); | ||
|
||
graph.execute_nodes().emplace_back(new ExecuteNode( | ||
graph, | ||
VK_KERNEL_FROM_STR(kernel_name), | ||
global_size, | ||
local_size, | ||
{{out, api::MemoryAccessType::WRITE}, {in, api::MemoryAccessType::READ}}, | ||
{t_out->gpu_sizes_ubo(), | ||
t_out->cpu_sizes_ubo(), | ||
t_in->gpu_sizes_ubo(), | ||
t_in->cpu_sizes_ubo()})); | ||
} | ||
|
||
void view(ComputeGraph& graph, const std::vector<ValueRef>& args) { | ||
// Note: The second argument size_ref is not used here. Since the output | ||
// tensor's size have been determined during compilation. | ||
return add_view_node(graph, args[0], args[2]); | ||
} | ||
|
||
REGISTER_OPERATORS { | ||
VK_REGISTER_OP(aten.view_copy.default, view); | ||
} | ||
|
||
} // namespace vkcompute |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters