Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add examples for SYCL Spark #16

Merged
merged 8 commits into from
May 4, 2022
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ List of benchmark kernels, and their sources.
| Reduction | Original benchmark |
| -------------------------- | ----------------------------------------- |
| r += a[i] * b[i] | Dot product |
| dp[i] = r[i] + r'*r * d[i] | Dot product->daxpy |
| r += c[i] | Sum of complex numbers |
| (r1,r2) += (c1[i],c2[i]) | Sum of complex numbers, stored as SoA |
| min(abs(c[i])) | Minimum absolute value of complex numbers |
Expand Down
42 changes: 42 additions & 0 deletions src/dot_rank1.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright (c) 2022 Everything's Reduced authors
// SPDX-License-Identifier: MIT

#include <memory>

struct dot_rank1 {

// Problem size and data arrays
// Data arrays use C++ PIMPL because different models store data with very
// different types
const long N;
struct data;
std::unique_ptr<data> pdata;

// Constructor: set up any model initialisation (not data)
dot_rank1(long N);

// Deconstructor: set any model finalisation
~dot_rank1();

// Allocate and initalise benchmark data
// r will be set to 1 * 1024 / N
// d will be set to 2 * 1024 / N
// Scaling the input data is helpful to keep the reduction in range
void setup();

// Run the benchmark once
double run();

// Finalise, clearing any benchmark data
void teardown();

// Return expected result
double expect() {
double r_exp = 1024.0 * 1024.0 / static_cast<double>(N);
double d = 2.0 * 1024.0 / static_cast<double>(N) + r_exp / static_cast<double>(N);
return d;
}

// Return theoretical minimum number of GB moved in run()
double gigabytes() { return 1.0E-9 * sizeof(double) * 4.0 * N; }
};
39 changes: 39 additions & 0 deletions src/histogram.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright (c) 2022 Everything's Reduced authors
// SPDX-License-Identifier: MIT

#include <memory>

struct histogram {

// Problem size and data arrays
// Data arrays use C++ PIMPL because different models store data with very
// different types
const long N;
struct data;
std::unique_ptr<data> pdata;

// Constructor: set up any model initialisation (not data)
histogram(long N);

// Deconstructor: set any model finalisation
~histogram();

// Allocate and initalise benchmark data
// A will be set to 8
// Scaling the input data is helpful to keep the reduction in range
void setup();

// Run the benchmark once
double run();

// Finalise, clearing any benchmark data
void teardown();

// Return expected result
double expect() {
return N;
}

// Return theoretical minimum number of GB moved in run()
double gigabytes() { return 1.0E-9 * sizeof(int) * N; }
};
107 changes: 104 additions & 3 deletions src/main.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2021 Everything's Reduced authors
// Copyright (c) 2022 Everything's Reduced authors
// SPDX-License-Identifier: MIT

#include <chrono>
Expand All @@ -24,12 +24,14 @@ const auto LINE = "------------------------------------------------------------"
#include "describe.hpp"
#include "dot.hpp"
#include "field_summary.hpp"
#include "dot_rank1.hpp"
#include "histogram.hpp"

#define NITERS 100

#include "util.hpp"

enum class Benchmark { dot, complex_sum, complex_sum_soa, complex_min, field_summary, describe };
enum class Benchmark { dot, complex_sum, complex_sum_soa, complex_min, field_summary, describe, dot_rank1, histogram };

// Choose the benchmark based on the input argument given from the command line
Benchmark select_benchmark(const std::string name) {
Expand All @@ -46,6 +48,10 @@ Benchmark select_benchmark(const std::string name) {
return Benchmark::field_summary;
else if (name == "describe")
return Benchmark::describe;
else if (name == "dot_rank1")
return Benchmark::dot_rank1;
else if (name == "histogram")
return Benchmark::histogram;
else {
std::cerr << "Invalid benchmark: " << name << std::endl;
exit(EXIT_FAILURE);
Expand Down Expand Up @@ -107,7 +113,7 @@ int main(int argc, char *argv[]) {
<< std::endl
<< "Valid benchmarks:" << std::endl
<< " dot, complex_sum, complex_sum_soa, complex_min, "
"field_summary, describe"
"field_summary, describe, dot_rank1, histogram"
<< std::endl;
exit(EXIT_FAILURE);
}
Expand Down Expand Up @@ -481,6 +487,101 @@ int main(int argc, char *argv[]) {
elapsed(run_start, run_stop), elapsed(check_start, check_stop), elapsed(teardown_start, teardown_stop),
static_cast<double>(NITERS)*d.gigabytes());
}
else if (run == Benchmark::dot_rank1) {
check_for_option(argc);
long N = get_problem_size(argv[2]);

std::vector<double> res(NITERS);

auto construct_start = clock::now();
dot_rank1 ranky(N);
auto construct_stop = clock::now();

auto setup_start = clock::now();
ranky.setup();
auto setup_stop = clock::now();

auto run_start = clock::now();
for (int i = 0; i < NITERS; ++i) {
res[i] = ranky.run();
}
auto run_stop = clock::now();

// Check solution
auto check_start = clock::now();
for (int i = 0; i < NITERS; ++i) {
auto r = res[i];
const double eps = std::numeric_limits<double>::epsilon() * N * 1.0e6;
if (std::abs(r - ranky.expect()) > eps) {
std::cerr << "Dot_rank1: result incorrect" << std::endl
<< "Result: " << i << " (skipping rest)" << std::endl
<< "Expected: " << ranky.expect() << std::endl
<< "Result: " << r << std::endl
<< "Difference: " << std::abs(r - ranky.expect()) << std::endl
<< "Eps: " << eps << std::endl;
break;
}
}
auto check_stop = clock::now();

auto teardown_start = clock::now();
ranky.teardown();
auto teardown_stop = clock::now();

print_timing("Dot rank1", elapsed(construct_start, construct_stop), elapsed(setup_start, setup_stop),
elapsed(run_start, run_stop), elapsed(check_start, check_stop), elapsed(teardown_start, teardown_stop),
static_cast<double>(NITERS)*ranky.gigabytes());

}
//////////////////////////////////////////////////////////////////////////////
// histogram Benchmark
//////////////////////////////////////////////////////////////////////////////
else if (run == Benchmark::histogram) {
check_for_option(argc);
long N = get_problem_size(argv[2]);

std::vector<double> res(NITERS);

auto construct_start = clock::now();
histogram d(N);
auto construct_stop = clock::now();

auto setup_start = clock::now();
d.setup();
auto setup_stop = clock::now();

auto run_start = clock::now();
for (int i = 0; i < NITERS; ++i) {
res[i] = d.run();
}
auto run_stop = clock::now();

// Check solution
auto check_start = clock::now();
double expected = d.expect();
for (int i = 0; i < NITERS; ++i) {
auto r = res[i];
bool wrong = false;
if (std::abs(r - expected) > std::numeric_limits<double>::epsilon() * 100.0) {
std::cerr << "Histogram: result incorrect" << std::endl
<< "Result: " << i << " (skipping rest)" << std::endl
<< "Expected: " << expected << std::endl
<< "Result: " << r << std::endl
<< "Difference: " << std::abs(r - expected) << std::endl;
wrong = true;
}
if (wrong) break;
}
auto check_stop = clock::now();

auto teardown_start = clock::now();
d.teardown();
auto teardown_stop = clock::now();

print_timing("Histogram", elapsed(construct_start, construct_stop), elapsed(setup_start, setup_stop),
elapsed(run_start, run_stop), elapsed(check_start, check_stop), elapsed(teardown_start, teardown_stop),
static_cast<double>(NITERS)*d.gigabytes());
}

return EXIT_SUCCESS;
}
47 changes: 46 additions & 1 deletion src/sycl/dot.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2021 Everything's Reduced authors
// Copyright (c) 2022 Everything's Reduced authors
// SPDX-License-Identifier: MIT

#include <iostream>
Expand All @@ -8,6 +8,20 @@

#include <sycl.hpp>

#ifdef SYCL_USM
struct dot::data {
data(long N) : q(sycl::default_selector{}),
A(sycl::malloc_shared<double>(N,q)),
B(sycl::malloc_shared<double>(N,q)),
sum(sycl::malloc_shared<double>(1,q))
{}

sycl::queue q;
double *A;
double *B;
double *sum;
};
#else
struct dot::data {
data(long N) : A(N), B(N), sum(1), q(sycl::default_selector{}) {}

Expand All @@ -16,6 +30,7 @@ struct dot::data {
sycl::buffer<double> sum;
sycl::queue q;
};
#endif

dot::dot(long N_) : N(N_), pdata{std::make_unique<data>(N)} {
std::cout << config_string("Dot", pdata->q) << std::endl;
Expand All @@ -25,14 +40,24 @@ dot::~dot() {}

void dot::setup() {
pdata->q.submit([&](sycl::handler &h) {
#ifdef SYCL_USM
#pragma warning("what")
double *sum = pdata->sum;
#else
sycl::accessor sum(pdata->sum, h, sycl::write_only);
#endif
h.single_task([=]() { sum[0] = 0.0; });
});
pdata->q.wait();

pdata->q.submit([&, N = this->N](sycl::handler &h) {
#ifdef SYCL_USM
double *A = pdata->A;
double *B = pdata->B;
#else
sycl::accessor A(pdata->A, h, sycl::write_only);
sycl::accessor B(pdata->B, h, sycl::write_only);
#endif
h.parallel_for(
N,
[=](const int i) {
Expand All @@ -44,21 +69,41 @@ void dot::setup() {
}

void dot::teardown() {
#ifdef SYCL_USM
sycl::free(pdata->A, pdata->q);
sycl::free(pdata->B, pdata->q);
sycl::free(pdata->sum, pdata->q);
#else
pdata.reset();
#endif
// NOTE: All the data has been destroyed!
}

double dot::run() {
pdata->q.submit([&](sycl::handler &h) {
#ifdef SYCL_USM
double *A = pdata->A;
double *B = pdata->B;
#else
sycl::accessor A(pdata->A, h, sycl::read_only);
sycl::accessor B(pdata->B, h, sycl::read_only);
#endif
h.parallel_for(
sycl::range<1>(N),
#ifdef SYCL_USM
sycl::reduction(pdata->sum, std::plus<>(), sycl::property::reduction::initialize_to_identity{}),
#else
sycl::reduction(pdata->sum, h, std::plus<>(), sycl::property::reduction::initialize_to_identity{}),
#endif
[=](sycl::id<1> i, auto &sum) {
sum += A[i] * B[i];
});
});

#ifdef SYCL_USM
pdata->q.wait();
return pdata->sum[0];
#else
return pdata->sum.get_host_access()[0];
#endif
}
Loading