Skip to content

[SYCL] Write tests following the test plan for the work group memory extension #15928

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 37 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
c9ad9ab
Add tests for work group memory extension
lbushi25 Oct 30, 2024
bd69b8b
Remove unused variable
lbushi25 Oct 30, 2024
83887be
Update reduction_free_function.cpp
lbushi25 Oct 30, 2024
17c4003
Merge branch 'work_group_memory_tests' of https://github.com/lbushi25…
lbushi25 Oct 31, 2024
3964f27
Fix missing aspect runtime errors in tests
lbushi25 Oct 31, 2024
69642a1
Revert "Fix missing aspect runtime errors in tests"
lbushi25 Oct 31, 2024
64096a7
Merge branch 'intel:sycl' into work_group_memory_tests
lbushi25 Oct 31, 2024
db2f720
Fix formatting
lbushi25 Oct 31, 2024
2307672
Fix formatting
lbushi25 Oct 31, 2024
553a127
Fix comment typo in free function kernel test
lbushi25 Oct 31, 2024
d8faebe
Remove ext/intel/math from includes
lbushi25 Oct 31, 2024
adb2331
Remove ext/intel/math from includes
lbushi25 Oct 31, 2024
671bea8
Add comment regarding the limitations of free function kernels
lbushi25 Oct 31, 2024
82cc19d
Fix formatting
lbushi25 Oct 31, 2024
7a21a40
Merge branch 'work_group_memory_tests' of https://github.com/lbushi25…
lbushi25 Nov 4, 2024
0f62c8b
Merge branch 'intel:sycl' into work_group_memory_tests
lbushi25 Nov 4, 2024
942e993
Merge branch 'work_group_memory_tests' of https://github.com/lbushi25…
lbushi25 Nov 4, 2024
f231a26
Add another constructor that takes an argument of indeterminate_t typ…
lbushi25 Nov 4, 2024
209a5b7
Add TODOs to remove diagnostic once feature is supported
lbushi25 Nov 4, 2024
8c203ef
Fix include fails
lbushi25 Nov 4, 2024
3ef139a
Update tests to conform to the spec
lbushi25 Nov 5, 2024
faa3382
Update tests to conform to the spec
lbushi25 Nov 5, 2024
5c0c4b3
Formatting changes
lbushi25 Nov 5, 2024
df3902f
Formatting changes
lbushi25 Nov 5, 2024
61944fc
Remove error limit from WorkGroupMemory test
lbushi25 Nov 6, 2024
bb63b54
Remove indeterminate change and add it in a separate PR
lbushi25 Nov 6, 2024
7b0c7bb
Merge branch 'work_group_memory_tests' of https://github.com/lbushi25…
lbushi25 Nov 6, 2024
3d85911
Apply suggested changes to api_misuse_test.cpp
lbushi25 Nov 6, 2024
3068b89
Add unsupported tracker for cuda failures
lbushi25 Nov 6, 2024
de090f7
Refactor tests by pulling out common functionality
lbushi25 Nov 7, 2024
07d1220
Print message about skipping tests when aspect not supported for a ce…
lbushi25 Nov 7, 2024
188f0fe
Fix merge conflicts
lbushi25 Nov 15, 2024
7595770
Add missing checks for aspect fp64
lbushi25 Nov 15, 2024
67aa143
Fix formatting
lbushi25 Nov 15, 2024
f1d99fa
Add check for empty properties
lbushi25 Nov 15, 2024
1091d6e
Fix error in test logic
lbushi25 Nov 15, 2024
7ecab94
Fix error in test logic
lbushi25 Nov 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory
using value_type = std::remove_all_extents_t<DataT>;

private:
// At the moment we do not have a way to set properties nor property values to
// set for work group memory. So, we check here for diagnostic purposes that
// the property list is empty.
// TODO: Remove this function and its occurrences in this file once properties
// have been created for work group memory.
void check_props_empty() const {
static_assert(std::is_same_v<PropertyListT, empty_properties_t> &&
"Work group memory class does not support properties yet!");
}
using decoratedPtr = typename sycl::detail::DecoratedType<
value_type, access::address_space::local_space>::type *;

Expand All @@ -62,18 +71,22 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory
#endif

public:
work_group_memory(const indeterminate_t &) {};
work_group_memory(const indeterminate_t &) { check_props_empty(); };
work_group_memory(const work_group_memory &rhs) = default;
work_group_memory &operator=(const work_group_memory &rhs) = default;
template <typename T = DataT,
typename = std::enable_if_t<!sycl::detail::is_unbounded_array_v<T>>>
work_group_memory(handler &)
: sycl::detail::work_group_memory_impl(sizeof(DataT)) {}
: sycl::detail::work_group_memory_impl(sizeof(DataT)) {
check_props_empty();
}
template <typename T = DataT,
typename = std::enable_if_t<sycl::detail::is_unbounded_array_v<T>>>
work_group_memory(size_t num, handler &)
: sycl::detail::work_group_memory_impl(
num * sizeof(std::remove_extent_t<DataT>)) {}
num * sizeof(std::remove_extent_t<DataT>)) {
check_props_empty();
}
template <access::decorated IsDecorated = access::decorated::no>
multi_ptr<value_type, access::address_space::local_space, IsDecorated>
get_multi_ptr() const {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include <sycl/detail/core.hpp>
#include <sycl/ext/oneapi/experimental/work_group_memory.hpp>
#include <sycl/group_barrier.hpp>
#include <sycl/half_type.hpp>

namespace syclexp = sycl::ext::oneapi::experimental;

sycl::queue q;
Expand Down Expand Up @@ -50,7 +52,9 @@ template <typename T> void swap_scalar(T &a, T &b) {
sycl::nd_range<1> ndr{size, wgsize};
cgh.parallel_for(ndr, [=](sycl::nd_item<1> it) {
syclexp::work_group_memory<T> temp2{syclexp::indeterminate};
temp2 = temp; // temp and temp2 have the same underlying data
temp2 = temp; // temp and temp2 have the same underlying data
assert(&temp2 == &temp); // check that both objects return same
// underlying address after assignment
temp = acc_a[0];
acc_a[0] = acc_b[0];
acc_b[0] = temp2; // safe to use temp2
Expand Down Expand Up @@ -86,6 +90,8 @@ template <typename T> void swap_scalar(T &a, T &b) {
assert(a == old_b && b == old_a && "Incorrect swap!");

// Same as above but instead of using multi_ptr, use address-of operator.
// Also verify that get_multi_ptr() returns the same address as address-of
// operator.
{
sycl::buffer<T, 1> buf_a{&a, 1};
sycl::buffer<T, 1> buf_b{&b, 1};
Expand All @@ -96,6 +102,7 @@ template <typename T> void swap_scalar(T &a, T &b) {
syclexp::work_group_memory<T> temp2{cgh};
sycl::nd_range<1> ndr{size, wgsize};
cgh.parallel_for(ndr, [=](sycl::nd_item<> it) {
assert(&temp == temp.get_multi_ptr().get());
temp = acc_a[0];
acc_a[0] = acc_b[0];
temp2 = *(&temp);
Expand Down Expand Up @@ -294,6 +301,8 @@ void swap_array_2d(T (&a)[N][N], T (&b)[N][N], size_t batch_size) {
temp[i][j] = acc_a[i][j];
acc_a[i][j] = acc_b[i][j];
syclexp::work_group_memory<T[N][N]> temp2{temp};
assert(&temp2 == &temp); // check both objects return same underlying
// address after copy construction.
acc_b[i][j] = temp2[i][j];
});
});
Expand Down Expand Up @@ -342,28 +351,28 @@ void swap_array_2d(T (&a)[N][N], T (&b)[N][N], size_t batch_size) {
// so we can verify that each work-item sees the value written by its leader.
// The test also is a sanity check that different work groups get different
// work group memory locations as otherwise we'd have data races.
void coherency(size_t size, size_t wgsize) {
template <typename T> void coherency(size_t size, size_t wgsize) {
q.submit([&](sycl::handler &cgh) {
syclexp::work_group_memory<int> data{cgh};
syclexp::work_group_memory<T> data{cgh};
sycl::nd_range<1> ndr{size, wgsize};
cgh.parallel_for(ndr, [=](sycl::nd_item<1> it) {
if (it.get_group().leader()) {
data = it.get_global_id() / wgsize;
data = T(it.get_global_id() / wgsize);
}
sycl::group_barrier(it.get_group());
assert(data == it.get_global_id() / wgsize);
assert(data == T(it.get_global_id() / wgsize));
});
});
}

constexpr size_t N = 32;
int main() {
int intarr1[N][N];
int intarr2[N][N];
template <typename T> void test() {
T intarr1[N][N];
T intarr2[N][N];
for (int i = 0; i < N; ++i) {
for (int j = 0; j < N; ++j) {
intarr1[i][j] = i + j;
intarr2[i][j] = i * j;
intarr1[i][j] = T(i) + T(j);
intarr2[i][j] = T(i) * T(j);
}
}
for (int i = 0; i < N; ++i) {
Expand All @@ -373,10 +382,37 @@ int main() {
swap_array_1d(intarr1[i], intarr2[i], 8);
}
swap_array_2d(intarr1, intarr2, 8);
coherency(N, N / 2);
coherency(N, N / 4);
coherency(N, N / 8);
coherency(N, N / 16);
coherency(N, N / 32);
coherency<T>(N, N / 2);
coherency<T>(N, N / 4);
coherency<T>(N, N / 8);
coherency<T>(N, N / 16);
coherency<T>(N, N / 32);
}

template <typename T> void test_ptr() {
T arr1[N][N];
T arr2[N][N];
for (int i = 0; i < N; ++i) {
for (int j = 0; j < N; ++j) {
swap_scalar(arr1[i][j], arr2[i][j]);
}
swap_array_1d(arr1[i], arr2[i], 8);
}
swap_array_2d(arr1, arr2, 8);
}

int main() {
test<int>();
test<char>();
test<uint16_t>();
if (q.get_device().has(sycl::aspect::fp16))
test<sycl::half>();
test_ptr<float *>();
test_ptr<int *>();
test_ptr<char *>();
test_ptr<uint16_t *>();
if (q.get_device().has(sycl::aspect::fp16))
test_ptr<sycl::half *>();
test_ptr<float *>();
return 0;
}
57 changes: 57 additions & 0 deletions sycl/test-e2e/WorkGroupMemory/common.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#pragma once

#include <cassert>
#include <iostream>
#include <sycl/atomic_ref.hpp>
#include <sycl/detail/core.hpp>
#include <sycl/ext/oneapi/experimental/work_group_memory.hpp>
#include <sycl/ext/oneapi/free_function_queries.hpp>
#include <sycl/group_barrier.hpp>
#include <sycl/marray.hpp>
#include <sycl/usm.hpp>
#include <sycl/vector.hpp>

using namespace sycl;

template <typename T> bool check_half_aspect(queue &q) {
if (std::is_same_v<sycl::half, T> &&
!q.get_device().has(sycl::aspect::fp16)) {
std::cout << "Device does not support fp16 aspect. Skipping all tests with "
"sycl::half type!"
<< std::endl;
return false;
}
return true;
}

template <typename T> bool check_double_aspect(queue &q) {
if (std::is_same_v<T, double> && !q.get_device().has(aspect::fp64)) {
std::cout << "Device does not support fp64 aspect. Skipping all tests with "
"double type!"
<< std::endl;
return false;
}
return true;
}

template <typename T> struct S {
T val;
};

template <typename T> struct M {
T val;
};

union U {
S<int> s;
M<int> m;
};

template <typename T>
void sum_helper(sycl::ext::oneapi::experimental::work_group_memory<T[]> mem,
sycl::ext::oneapi::experimental::work_group_memory<T> ret,
size_t WGSIZE) {
for (int i = 0; i < WGSIZE; ++i) {
ret = ret + mem[i];
}
}
121 changes: 121 additions & 0 deletions sycl/test-e2e/WorkGroupMemory/common_free_function.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#pragma once

#include "common.hpp"
#include "common_lambda.hpp"
#include <cassert>
#include <sycl/detail/core.hpp>
#include <sycl/ext/oneapi/experimental/work_group_memory.hpp>
#include <sycl/ext/oneapi/free_function_queries.hpp>
#include <sycl/group_barrier.hpp>
#include <sycl/usm.hpp>

using namespace sycl;

template <typename T>
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
(ext::oneapi::experimental::nd_range_kernel<1>))
void sum(sycl::ext::oneapi::experimental::work_group_memory<T[]> mem, T *buf,
sycl::ext::oneapi::experimental::work_group_memory<T> result,
T expected, size_t WGSIZE, bool UseHelper) {
const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>();
size_t local_id = it.get_local_id();
mem[local_id] = buf[local_id];
group_barrier(it.get_group());
if (it.get_group().leader()) {
result = 0;
if (!UseHelper) {
for (int i = 0; i < WGSIZE; ++i) {
result = result + mem[i];
}
} else {
sum_helper(mem, result, WGSIZE);
}
assert(result == expected);
}
}

// Explicit instantiations for the relevant data types.
// These are needed because free function kernel support is not fully
// implemented yet.
// TODO: Remove these once free function kernel support is fully there.
#define SUM(T) \
template void sum<T>( \
sycl::ext::oneapi::experimental::work_group_memory<T[]> mem, T * buf, \
sycl::ext::oneapi::experimental::work_group_memory<T> result, \
T expected, size_t WGSIZE, bool UseHelper);

SUM(int)
SUM(uint16_t)
SUM(half)
SUM(double)
SUM(float)
SUM(char)
SUM(bool)

template <typename T>
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
(ext::oneapi::experimental::nd_range_kernel<1>))
void sum_marray(
sycl::ext::oneapi::experimental::work_group_memory<sycl::marray<T, 16>> mem,
T *buf, sycl::ext::oneapi::experimental::work_group_memory<T> result,
T expected) {
const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>();
size_t local_id = it.get_local_id();
constexpr T tolerance = 0.0001;
sycl::marray<T, 16> &data = mem;
data[local_id] = buf[local_id];
group_barrier(it.get_group());
if (it.get_group().leader()) {
result = 0;
for (int i = 0; i < 16; ++i) {
result = result + data[i];
}
assert((result - expected) * (result - expected) <= tolerance);
}
}

// Explicit instantiations for the relevant data types.
#define SUM_MARRAY(T) \
template void sum_marray<T>( \
sycl::ext::oneapi::experimental::work_group_memory<sycl::marray<T, 16>> \
mem, \
T * buf, sycl::ext::oneapi::experimental::work_group_memory<T> result, \
T expected);

SUM_MARRAY(float);
SUM_MARRAY(double);
SUM_MARRAY(half);

template <typename T>
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
(ext::oneapi::experimental::nd_range_kernel<1>))
void sum_vec(
sycl::ext::oneapi::experimental::work_group_memory<sycl::vec<T, 16>> mem,
T *buf, sycl::ext::oneapi::experimental::work_group_memory<T> result,
T expected) {
const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>();
size_t local_id = it.get_local_id();
constexpr T tolerance = 0.0001;
sycl::vec<T, 16> &data = mem;
data[local_id] = buf[local_id];
group_barrier(it.get_group());
if (it.get_group().leader()) {
result = 0;
for (int i = 0; i < 16; ++i) {
result = result + data[i];
}
assert((result - expected) * (result - expected) <= tolerance);
}
}

// Explicit instantiations for the relevant data types.
#define SUM_VEC(T) \
template void sum_vec<T>( \
sycl::ext::oneapi::experimental::work_group_memory<sycl::vec<T, 16>> \
mem, \
T * buf, sycl::ext::oneapi::experimental::work_group_memory<T> result, \
T expected);

SUM_VEC(float);
SUM_VEC(double);
SUM_VEC(half);
22 changes: 22 additions & 0 deletions sycl/test-e2e/WorkGroupMemory/common_lambda.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#pragma once

#include <cassert>
#include <sycl/atomic_ref.hpp>
#include <sycl/detail/core.hpp>
#include <sycl/ext/oneapi/experimental/work_group_memory.hpp>
#include <sycl/ext/oneapi/free_function_queries.hpp>
#include <sycl/group_barrier.hpp>
#include <sycl/marray.hpp>
#include <sycl/usm.hpp>
#include <sycl/vector.hpp>

using namespace sycl;

template <typename T>
void sum_helper(sycl::ext::oneapi::experimental::work_group_memory<T[]> mem,
sycl::ext::oneapi::experimental::work_group_memory<T> ret,
size_t WGSIZE) {
for (int i = 0; i < WGSIZE; ++i) {
ret = ret + mem[i];
}
}
Loading
Loading