Skip to content

Commit

Permalink
* Correct 2D memset benchmark to iterate in the right order for flux.
Browse files Browse the repository at this point in the history
* Add 2D diagonal memset benchmark, which evaluates the performance of
  the composition of `cartesian_product` of `iota`s and `filter`ing.
* Add a reference implementation of C++23's `cartesian_product`.
* Add comparisons against C++ Standard Library ranges to all of the memset
  benchmarks.
  • Loading branch information
brycelelbach committed Jul 31, 2023
1 parent 4406237 commit 6958d51
Show file tree
Hide file tree
Showing 3 changed files with 541 additions and 28 deletions.
86 changes: 66 additions & 20 deletions benchmark/multidimensional_memset_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,38 +10,84 @@
#include <flux.hpp>

#include <numeric>
#include <ranges>
#include <iostream>

namespace an = ankerl::nanobench;

extern void memset_2d_reference(double* A, std::size_t N, std::size_t M);

extern void memset_2d_flux_cartesian_product_iota(double* A, std::size_t N, std::size_t M);
// Kernels are placed in a separate translation unit to prevent compilers from
// optimizing them based on the input that we'll be giving them and to make it
// easier to study their compiled assembly.
extern void memset_2d_reference(double* A, flux::distance_t N, flux::distance_t M);
extern void memset_2d_std_cartesian_product_iota(double* A, flux::distance_t N, flux::distance_t M);
extern void memset_2d_flux_cartesian_product_iota(double* A, flux::distance_t N, flux::distance_t M);
extern void memset_diagonal_2d_reference(double* A, flux::distance_t N, flux::distance_t M);
extern void memset_diagonal_2d_std_cartesian_product_iota_filter(double* A, flux::distance_t N, flux::distance_t M);
extern void memset_diagonal_2d_flux_cartesian_product_iota_filter(double* A, flux::distance_t N, flux::distance_t M);

int main(int argc, char** argv)
{
int const n_iters = argc > 1 ? std::atoi(argv[1]) : 40;

constexpr std::size_t N = 1024;
constexpr std::size_t M = 2048;
constexpr flux::distance_t N = 1024;
constexpr flux::distance_t M = 2048;
std::vector<double> A(N * M);

{
auto bench = an::Bench().minEpochIterations(n_iters).relative(true);

std::iota(A.begin(), A.end(), 0);

bench.run("memset_2d_handwritten",
[&] { memset_2d_reference(A.data(), N, M); });

if (auto it = std::ranges::find_if_not(A, [&] (auto e) { return e == 0; }); it != A.end())
throw false;

const auto run_benchmark =
[] (auto& bench, auto& A, auto N, auto M, auto name, auto func, auto check) {
std::iota(A.begin(), A.end(), 0);
bench.run(name, [&] { func(A.data(), N, M); });
check(A, N, M);
};

bench.run("memset_2d_flux_cartesian_product_iota",
[&] { memset_2d_flux_cartesian_product_iota(A.data(), N, M); });
{
const auto check_2d = [] (auto& A, auto N, auto M) {
const auto it = std::ranges::find_if_not(A, [&] (auto e) { return e == 0.0; });
if (it != A.end())
throw false;
};

auto bench = an::Bench()
.minEpochIterations(n_iters)
.relative(true)
.performanceCounters(false);

const auto run_2d_benchmark_impl = [&] (auto name, auto func) {
run_benchmark(bench, A, N, M, name, func, check_2d);
};

#define run_2d_benchmark(func) run_2d_benchmark_impl(#func, func)

run_2d_benchmark(memset_2d_reference);
run_2d_benchmark(memset_2d_std_cartesian_product_iota);
run_2d_benchmark(memset_2d_flux_cartesian_product_iota);
}

if (auto it = std::ranges::find_if_not(A, [&] (auto e) { return e == 0; }); it != A.end())
throw false;
{
const auto check_diagonal_2d = [] (auto& A, auto N, auto M) {
for (auto i : std::views::iota(0, N))
for (auto j : std::views::iota(0, M)) {
if (i == j) {
if (A[i * M + j] != 0.0) throw false;
} else {
if (A[i * M + j] != i * M + j) throw false;
}
}
};

auto bench = an::Bench()
.minEpochIterations(n_iters)
.relative(true)
.performanceCounters(false);

const auto run_diagonal_2d_benchmark_impl = [&] (auto name, auto func) {
run_benchmark(bench, A, N, M, name, func, check_diagonal_2d);
};

#define run_diagonal_2d_benchmark(func) run_diagonal_2d_benchmark_impl(#func, func)

run_diagonal_2d_benchmark(memset_diagonal_2d_reference);
run_diagonal_2d_benchmark(memset_diagonal_2d_std_cartesian_product_iota_filter);
run_diagonal_2d_benchmark(memset_diagonal_2d_flux_cartesian_product_iota_filter);
}
}
60 changes: 52 additions & 8 deletions benchmark/multidimensional_memset_benchmark_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,62 @@
#include <flux/op/cartesian_product.hpp>
#include <flux/source/iota.hpp>
#include <flux/op/for_each.hpp>
#include <flux/op/filter.hpp>

void memset_2d_reference(double* A, std::size_t N, std::size_t M)
#include "ranges_cartesian_product.hpp"

#include <ranges>
#include <algorithm>

void memset_2d_reference(double* A, flux::distance_t N, flux::distance_t M)
{
for (flux::distance_t i = 0; i != N; ++i)
for (flux::distance_t j = 0; j != M; ++j)
A[i * M + j] = 0.0;
}

void memset_2d_std_cartesian_product_iota(double* A, flux::distance_t N, flux::distance_t M)
{
std::ranges::for_each(
std::views::cartesian_product(std::views::iota(0, N), std::views::iota(0, M)),
flux::unpack([&] (auto i, auto j) {
A[i * M + j] = 0.0;
}));
}

void memset_2d_flux_cartesian_product_iota(double* A, flux::distance_t N, flux::distance_t M)
{
for (std::size_t j = 0; j != M; ++j)
for (std::size_t i = 0; i != N; ++i)
A[i + j * N] = 0.0;
flux::for_each(
flux::cartesian_product(flux::ints(0, N), flux::ints(0, M)),
flux::unpack([&] (auto i, auto j) {
A[i * M + j] = 0.0;
}));
}

void memset_2d_flux_cartesian_product_iota(double* A, std::size_t N, std::size_t M)
void memset_diagonal_2d_reference(double* A, flux::distance_t N, flux::distance_t M)
{
flux::cartesian_product(flux::iota(0LU, N), flux::iota(0LU, M))
.for_each(flux::unpack([&] (auto i, auto j) {
A[i + j * N] = 0.0;
for (flux::distance_t i = 0; i != N; ++i)
for (flux::distance_t j = 0; j != M; ++j)
if (i == j) A[i * M + j] = 0.0;
}

void memset_diagonal_2d_std_cartesian_product_iota_filter(double* A, flux::distance_t N, flux::distance_t M)
{
std::ranges::for_each(
std::views::cartesian_product(std::views::iota(0, N), std::views::iota(0, M))
| std::views::filter(flux::unpack([] (auto i, auto j) { return i == j; })),
flux::unpack([&] (auto i, auto j) {
A[i * M + j] = 0.0;
}));
}

void memset_diagonal_2d_flux_cartesian_product_iota_filter(double* A, flux::distance_t N, flux::distance_t M)
{
flux::for_each(
flux::cartesian_product(flux::ints(0, N), flux::ints(0, M))
.filter(flux::unpack([] (auto i, auto j) { return i == j; })),
flux::unpack([&] (auto i, auto j) {
A[i * M + j] = 0.0;
}));
}

Loading

0 comments on commit 6958d51

Please sign in to comment.