Skip to content

Add buffer cacheline size metric #4228

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions backends/vulkan/tools/gpuinfo/glsl/buf_cacheline_size.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#version 450 core

#define PRECISION ${PRECISION}

layout(std430) buffer;


${layout_declare_buffer(0, "r", "source", DTYPE)}
${layout_declare_buffer(1, "w", "destination", DTYPE)}

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

layout(constant_id = 3) const int niter = 1;
layout(constant_id = 4) const int stride = 1;
layout(constant_id = 5) const int pitch = 1;

void main() {
float c = 0;
for (int i = 0; i < niter; ++i) {
const int zero = i >> 31;
c += source[zero + pitch * gl_GlobalInvocationID[0]];
c += source[zero + stride + pitch * gl_GlobalInvocationID[0]];
}
destination[0] = c;
}
12 changes: 12 additions & 0 deletions backends/vulkan/tools/gpuinfo/glsl/buf_cacheline_size.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

buf_cacheline_size:
parameter_names_with_default_values:
DTYPE: float
STORAGE: buffer
shader_variants:
- NAME: buf_cacheline_size
7 changes: 2 additions & 5 deletions backends/vulkan/tools/gpuinfo/glsl/reg_count.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@

layout(std430) buffer;

layout(set = 0, binding = 0) buffer PRECISION restrict writeonly Buffer {
float data[];
}
out_buff;
${layout_declare_buffer(0, "w", "out_buff", DTYPE)}

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

Expand All @@ -35,5 +32,5 @@ void main() {
i = i >> 31;

$for k in range(int(NREG)):
out_buff.data[${k} * i] = reg_data${k};
out_buff[${k} * i] = reg_data${k};
}
66 changes: 66 additions & 0 deletions backends/vulkan/tools/gpuinfo/src/app.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class App {
const uint32_t NREG_MAX = 512;
const uint32_t NREG_STEP = 1;

// TODO: Make these values configurable
const double COMPENSATE = 0.01;
const double THRESHOLD = 3;

Expand Down Expand Up @@ -150,11 +151,76 @@ class App {
<< std::endl;
std::cout << "Register type," << reg_ty << std::endl;
}

void buf_cacheline_size() {
std::cout << std::endl;
std::cout << "------ Buffer Cacheline Size ------" << std::endl;

// TODO: Make these values configurable
const double COMPENSATE = 0.01;
const double THRESHOLD = 10;

const uint32_t PITCH = buf_cache_size_ / nthread_logic_;
const uint32_t BUF_SIZE = buf_cache_size_;
const uint32_t MAX_STRIDE = PITCH;

uint32_t NITER;

auto bench = [&](int stride) {
size_t len = sizeof(float);
StorageBuffer in_buf(context(), vkapi::kFloat, BUF_SIZE);
StorageBuffer out_buf(context(), vkapi::kFloat, len);
vkapi::PipelineBarrier pipeline_barrier{};

auto shader_name = "buf_cacheline_size";

auto time = benchmark_on_gpu(shader_name, 100, [&]() {
context()->submit_compute_job(
VK_KERNEL_FROM_STR(shader_name),
pipeline_barrier,
{nthread_logic_, 1, 1},
{nthread_logic_, 1, 1},
{SV(NITER), SV(stride), SV(PITCH)},
VK_NULL_HANDLE,
0,
in_buf.buffer(),
out_buf.buffer());
});
return time;
};

ensure_min_niter(1000, NITER, [&]() { return bench(1); });

uint32_t cacheline_size;

DtJumpFinder<5> dj(COMPENSATE, THRESHOLD);
uint32_t stride = 1;
for (; stride <= MAX_STRIDE; ++stride) {
double time = bench(stride);
std::cout << "Testing stride=\t" << stride << "\t, time=\t" << time
<< std::endl;

if (dj.push(time)) {
cacheline_size = stride * sizeof(float);
break;
}
}
if (stride >= MAX_STRIDE) {
std::cout << "Unable to conclude a top level buffer cacheline size."
<< std::endl;
cacheline_size = MAX_STRIDE;
}

std::cout << "BufTopLevelCachelineSize," << cacheline_size << std::endl;
}
};

int main(int argc, const char** argv) {
App app;

// TODO: Allow user to skip tests
app.reg_count();
app.buf_cacheline_size();

return 0;
}
Loading