Skip to content

Commit

Permalink
Add multidimensional memset vectorization benchmark.
Browse files Browse the repository at this point in the history
  • Loading branch information
brycelelbach committed Jul 31, 2023
1 parent ec1831e commit 53d051e
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 1 deletion.
5 changes: 4 additions & 1 deletion benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@ add_executable(internal-iteration-benchmark internal_iteration_benchmark.cpp)
target_link_libraries(internal-iteration-benchmark PUBLIC nanobench flux)

add_executable(sort-benchmark sort_benchmark.cpp)
target_link_libraries(sort-benchmark PUBLIC nanobench flux)
target_link_libraries(sort-benchmark PUBLIC nanobench flux)

add_executable(multidimensional-memset-benchmark multidimensional_memset_benchmark.cpp multidimensional_memset_benchmark_kernels.cpp)
target_link_libraries(multidimensional-memset-benchmark PUBLIC nanobench flux)
46 changes: 46 additions & 0 deletions benchmark/multidimensional_memset_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@

// Copyright (c) 2021 Barry Revzin
// Copyright (c) 2023 NVIDIA Corporation
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <nanobench.h>

#include <flux.hpp>

#include <numeric>

namespace an = ankerl::nanobench;

extern void memset_2d_reference(double* A, std::size_t N, std::size_t M);

extern void memset_2d_flux_cartesian_product_iota(double* A, std::size_t N, std::size_t M);

int main(int argc, char** argv)
{
int const n_iters = argc > 1 ? std::atoi(argv[1]) : 40;

constexpr std::size_t N = 1024;
constexpr std::size_t M = 2048;
std::vector<double> A(N * M);

{
auto bench = an::Bench().minEpochIterations(n_iters).relative(true);

std::iota(A.begin(), A.end(), 0);

bench.run("memset_2d_handwritten",
[&] { memset_2d_reference(A.data(), N, M); });

if (auto it = std::ranges::find_if_not(A, [&] (auto e) { return e == 0; }); it != A.end())
throw false;

std::iota(A.begin(), A.end(), 0);

bench.run("memset_2d_flux_cartesian_product_iota",
[&] { memset_2d_flux_cartesian_product_iota(A.data(), N, M); });

if (auto it = std::ranges::find_if_not(A, [&] (auto e) { return e == 0; }); it != A.end())
throw false;
}
}
23 changes: 23 additions & 0 deletions benchmark/multidimensional_memset_benchmark_kernels.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@

// Copyright (c) 2023 NVIDIA Corporation
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <flux/op/cartesian_product.hpp>
#include <flux/source/iota.hpp>
#include <flux/op/for_each.hpp>

void memset_2d_reference(double* A, std::size_t N, std::size_t M)
{
for (std::size_t j = 0; j != M; ++j)
for (std::size_t i = 0; i != N; ++i)
A[i + j * N] = 0.0;
}

void memset_2d_flux_cartesian_product_iota(double* A, std::size_t N, std::size_t M)
{
flux::cartesian_product(flux::iota(0LU, N), flux::iota(0LU, M))
.for_each(flux::unpack([&] (auto i, auto j) {
A[i + j * N] = 0.0;
}));
}

0 comments on commit 53d051e

Please sign in to comment.