-
Notifications
You must be signed in to change notification settings - Fork 355
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support int8 texture tensors without requiring int8 buffers (#4485)
Summary: Pull Request resolved: #4485 ## Context By default, storage buffers in Vulkan must contain 32 bit data types; using 8 bit and 16 bit data types in buffers can be enabled optionally by supporting the [VK_KHR_8bit_storage](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_8bit_storage.html) extension or the [VK_KHR_16bit_storage](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_16bit_storage.html) extension respectively. Previously, 8-bit and 16-bit tensors were enabled by using those extensions; however, this meant that 8-bit and 16-bit tensors could not be used if the Vulkan driver does not support the corresponding extension. This diff adds support for 8-bit texture-backed tensors without the need for the VK_KHR_8bit_storage extension. This is done by introducing shaders that manually pack and repack 4 8-bit integers into a single int32 value. Once the tensor data has been transferred to an image texture (which will use the `VK_FORMAT_R8G8B8A8_SINT` image format) the extension will no longer be required. Reviewed By: jorgep31415 Differential Revision: D60536832 fbshipit-source-id: 8d3d8b069582ab8c18d41701c864778621d2f6e3
- Loading branch information
1 parent
4483bb6
commit 448c7d3
Showing
16 changed files
with
320 additions
and
73 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
54 changes: 54 additions & 0 deletions
54
backends/vulkan/runtime/graph/ops/glsl/int8_tensor_to_nchw_noint8.glsl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
/* | ||
* Copyright (c) Meta Platforms, Inc. and affiliates. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
#version 450 core | ||
|
||
#define PRECISION ${PRECISION} | ||
|
||
#include "indexing_utils.h" | ||
|
||
layout(std430) buffer; | ||
|
||
#extension GL_EXT_control_flow_attributes : require | ||
|
||
${layout_declare_tensor(0, "r", "t_in", "int8", "texture3d")} | ||
${layout_declare_buffer(1, "w", "nchw_out", "int")} | ||
${layout_declare_ubo(2, "ivec4", "tensor_sizes")} | ||
${layout_declare_ubo(3, "int", "out_ntexels")} | ||
|
||
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; | ||
|
||
layout(constant_id = 3) const int packed_dim = C_DIM; | ||
|
||
void main() { | ||
const int out_buf_idx = int(gl_GlobalInvocationID.x); | ||
if (out_buf_idx >= out_ntexels) { | ||
return; | ||
} | ||
|
||
ivec4 values; | ||
int in_buf_idx = 4 * out_buf_idx; | ||
|
||
[[unroll]] for (int i = 0; i < 4; ++i) { | ||
const ivec4 tensor_idx = from_nchw_buffer_i(in_buf_idx, tensor_sizes); | ||
const ivec4 texture_pos = to_texture_elem_pos( | ||
tensor_idx, tensor_sizes, packed_dim); | ||
values[i] = load_texel(t_in, texture_pos.xyz)[texture_pos.w]; | ||
in_buf_idx++; | ||
} | ||
|
||
// Manually pack 4x 8-bit integers into a 32 bit integer. Note that little | ||
// endian is assumed, since most processors use little endian. Thus the | ||
// "later" values are placed in most significant bytes. | ||
int packed = ((values[3] & 0xFF) << 24) | ||
| ((values[2] & 0xFF) << 16) | ||
| ((values[1] & 0xFF) << 8) | ||
| ((values[0] & 0xFF)); | ||
|
||
nchw_out[out_buf_idx] = packed; | ||
} |
74 changes: 74 additions & 0 deletions
74
backends/vulkan/runtime/graph/ops/glsl/nchw_to_int8_tensor_noint8.glsl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
/* | ||
* Copyright (c) Meta Platforms, Inc. and affiliates. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
#version 450 core | ||
|
||
#define PRECISION ${PRECISION} | ||
|
||
#include "indexing_utils.h" | ||
|
||
layout(std430) buffer; | ||
|
||
#extension GL_EXT_control_flow_attributes : require | ||
|
||
${layout_declare_tensor(0, "w", "t_out", "int8", "texture3d")} | ||
${layout_declare_buffer(1, "r", "nchw_in", "int")} | ||
${layout_declare_ubo(2, "ivec4", "tensor_sizes")} | ||
|
||
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; | ||
|
||
layout(constant_id = 3) const int packed_dim = C_DIM; | ||
|
||
/* | ||
* Extends sign of int8 | ||
*/ | ||
int extend_sign(int x) { | ||
if (x >> 7 == 1) { | ||
return x | 0xFFFFFF00; | ||
} | ||
return x; | ||
} | ||
|
||
ivec4 read_texel(ivec4 tensor_idx) { | ||
const ivec4 buf_indices = get_texel_nchw_buffer_ixs( | ||
tensor_idx, tensor_sizes, packed_dim); | ||
|
||
int shift = (1 << 8) - 1; | ||
ivec4 masks; | ||
// Masks used to unpack 4x 8-bit values from a 32 bit integer. Note that | ||
// little endian is assumed, as most processors use little endian. Thus the | ||
// most significant bytes correspond to the "latter" packed values. | ||
masks.x = shift << (8 * (buf_indices.x % 4)); | ||
masks.y = shift << (8 * (buf_indices.y % 4)); | ||
masks.z = shift << (8 * (buf_indices.z % 4)); | ||
masks.w = shift << (8 * (buf_indices.w % 4)); | ||
|
||
ivec4 out_tex = ivec4(0); | ||
|
||
[[unroll]] for (int i = 0; i < 4; ++i) { | ||
if (tensor_idx[packed_dim] + i < tensor_sizes[packed_dim]) { | ||
int in_texel = nchw_in[buf_indices[i] / 4]; | ||
int extracted_val = (in_texel & masks[i]) >> (8 * (buf_indices[i] % 4)); | ||
extracted_val = extend_sign(extracted_val); | ||
out_tex[i] = extracted_val; | ||
} | ||
} | ||
|
||
return out_tex; | ||
} | ||
|
||
void main() { | ||
const ivec3 pos = ivec3(gl_GlobalInvocationID); | ||
const ivec4 tensor_idx = to_tensor_idx(pos, tensor_sizes, packed_dim); | ||
|
||
if (any(greaterThanEqual(tensor_idx, tensor_sizes))) { | ||
return; | ||
} | ||
|
||
write_texel(t_out, pos, read_texel(tensor_idx)); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.