Skip to content

[SYCL] Add group algorithms for MUL/OR/XOR/AND operations #2339

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions clang/lib/Sema/SPIRVBuiltins.td
Original file line number Diff line number Diff line change
Expand Up @@ -917,11 +917,13 @@ foreach name = ["GroupBroadcast"] in {
}
}

foreach name = ["GroupIAdd"] in {
foreach name = ["GroupIAdd", "GroupNonUniformIMul", "GroupNonUniformBitwiseOr",
"GroupNonUniformBitwiseXor", "GroupNonUniformBitwiseAnd"] in {
def : SPVBuiltin<name, [AIGenTypeN, UInt, UInt, AIGenTypeN], Attr.Convergent>;
}

foreach name = ["GroupFAdd", "GroupFMin", "GroupFMax"] in {
foreach name = ["GroupFAdd", "GroupFMin", "GroupFMax",
"GroupNonUniformFMul"] in {
def : SPVBuiltin<name, [FGenTypeN, UInt, UInt, FGenTypeN], Attr.Convergent>;
}

Expand Down
11 changes: 11 additions & 0 deletions sycl/include/CL/sycl/ONEAPI/functional.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ template <> struct maximum<void> {
#endif

template <typename T = void> using plus = std::plus<T>;
template <typename T = void> using multiplies = std::multiplies<T>;
template <typename T = void> using bit_or = std::bit_or<T>;
template <typename T = void> using bit_xor = std::bit_xor<T>;
template <typename T = void> using bit_and = std::bit_and<T>;
Expand Down Expand Up @@ -103,6 +104,16 @@ __SYCL_CALC_OVERLOAD(GroupOpISigned, IAdd, ONEAPI::plus<T>)
__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, IAdd, ONEAPI::plus<T>)
__SYCL_CALC_OVERLOAD(GroupOpFP, FAdd, ONEAPI::plus<T>)

__SYCL_CALC_OVERLOAD(GroupOpISigned, NonUniformIMul, ONEAPI::multiplies<T>)
__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, NonUniformIMul, ONEAPI::multiplies<T>)
__SYCL_CALC_OVERLOAD(GroupOpFP, NonUniformFMul, ONEAPI::multiplies<T>)
__SYCL_CALC_OVERLOAD(GroupOpISigned, NonUniformBitwiseOr, ONEAPI::bit_or<T>)
__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, NonUniformBitwiseOr, ONEAPI::bit_or<T>)
__SYCL_CALC_OVERLOAD(GroupOpISigned, NonUniformBitwiseXor, ONEAPI::bit_xor<T>)
__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, NonUniformBitwiseXor, ONEAPI::bit_xor<T>)
__SYCL_CALC_OVERLOAD(GroupOpISigned, NonUniformBitwiseAnd, ONEAPI::bit_and<T>)
__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, NonUniformBitwiseAnd, ONEAPI::bit_and<T>)

#undef __SYCL_CALC_OVERLOAD

template <typename T, __spv::GroupOperation O, __spv::Scope::Flag S,
Expand Down
28 changes: 25 additions & 3 deletions sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,39 @@ template <typename T, typename V> struct identity<T, ONEAPI::plus<V>> {
};

template <typename T, typename V> struct identity<T, ONEAPI::minimum<V>> {
static constexpr T value = (std::numeric_limits<T>::max)();
static constexpr T value = std::numeric_limits<T>::has_infinity
? std::numeric_limits<T>::infinity()
: (std::numeric_limits<T>::max)();
};

template <typename T, typename V> struct identity<T, ONEAPI::maximum<V>> {
static constexpr T value = std::numeric_limits<T>::lowest();
static constexpr T value =
std::numeric_limits<T>::has_infinity
? static_cast<T>(-std::numeric_limits<T>::infinity())
: std::numeric_limits<T>::lowest();
};

template <typename T, typename V> struct identity<T, ONEAPI::multiplies<V>> {
static constexpr T value = static_cast<T>(1);
};

template <typename T, typename V> struct identity<T, ONEAPI::bit_or<V>> {
static constexpr T value = 0;
};

template <typename T, typename V> struct identity<T, ONEAPI::bit_xor<V>> {
static constexpr T value = 0;
};

template <typename T, typename V> struct identity<T, ONEAPI::bit_and<V>> {
static constexpr T value = ~static_cast<T>(0);
};

template <typename T>
using native_op_list =
type_list<ONEAPI::plus<T>, ONEAPI::bit_or<T>, ONEAPI::bit_xor<T>,
ONEAPI::bit_and<T>, ONEAPI::maximum<T>, ONEAPI::minimum<T>>;
ONEAPI::bit_and<T>, ONEAPI::maximum<T>, ONEAPI::minimum<T>,
ONEAPI::multiplies<T>>;

template <typename T, typename BinaryOperation> struct is_native_op {
static constexpr bool value =
Expand Down
39 changes: 35 additions & 4 deletions sycl/test/group-algorithm/exclusive_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// TODO: enable compile+runtime checks for operations defined in SPIR-V 1.3.
// That requires either adding a switch to clang (-spirv-max-version=1.3) or
// raising the spirv version from 1.1. to 1.3 for spirv translator
// unconditionally. Using operators specific for spirv 1.3 and higher with
// -spirv-max-version=1.1 being set by default causes assert/check fails
// in spirv translator.
// RUNx: %clangxx -fsycl -fsycl-targets=%sycl_triple -DSPIRV_1_3 %s -o %t13.out

#include <CL/sycl.hpp>
#include <algorithm>
#include <cassert>
Expand Down Expand Up @@ -120,10 +128,27 @@ void test(queue q, InputContainer input, OutputContainer output,
assert(std::equal(output.begin(), output.begin() + N, expected.begin()));
}

bool isSupportedDevice(device D) {
std::string PlatformName = D.get_platform().get_info<info::platform::name>();
if (PlatformName.find("Level-Zero") != std::string::npos)
return true;

if (PlatformName.find("OpenCL") != std::string::npos) {
std::string Version = D.get_info<info::device::version>();
size_t Offset = Version.find("OpenCL");
if (Offset == std::string::npos)
return false;
Version = Version.substr(Offset + 7, 3);
if (Version >= std::string("2.0"))
return true;
}

return false;
}

int main() {
queue q;
std::string version = q.get_device().get_info<info::device::version>();
if (version < std::string("2.0")) {
if (!isSupportedDevice(q.get_device())) {
std::cout << "Skipping test\n";
return 0;
}
Expand All @@ -134,14 +159,20 @@ int main() {
std::iota(input.begin(), input.end(), 0);
std::fill(output.begin(), output.end(), 0);

#if __cplusplus >= 201402L
test(q, input, output, plus<>(), 0);
test(q, input, output, minimum<>(), std::numeric_limits<int>::max());
test(q, input, output, maximum<>(), std::numeric_limits<int>::lowest());
#endif

test(q, input, output, plus<int>(), 0);
test(q, input, output, minimum<int>(), std::numeric_limits<int>::max());
test(q, input, output, maximum<int>(), std::numeric_limits<int>::lowest());

#ifdef SPIRV_1_3
test(q, input, output, multiplies<int>(), 1);
test(q, input, output, bit_or<int>(), 0);
test(q, input, output, bit_xor<int>(), 0);
test(q, input, output, bit_and<int>(), ~0);
#endif // SPIRV_1_3

std::cout << "Test passed." << std::endl;
}
39 changes: 35 additions & 4 deletions sycl/test/group-algorithm/inclusive_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// TODO: enable compile+runtime checks for operations defined in SPIR-V 1.3.
// That requires either adding a switch to clang (-spirv-max-version=1.3) or
// raising the spirv version from 1.1. to 1.3 for spirv translator
// unconditionally. Using operators specific for spirv 1.3 and higher with
// -spirv-max-version=1.1 being set by default causes assert/check fails
// in spirv translator.
// RUNx: %clangxx -fsycl -fsycl-targets=%sycl_triple -DSPIRV_1_3 %s -o %t13.out

#include <CL/sycl.hpp>
#include <algorithm>
#include <cassert>
Expand Down Expand Up @@ -120,10 +128,27 @@ void test(queue q, InputContainer input, OutputContainer output,
assert(std::equal(output.begin(), output.begin() + N, expected.begin()));
}

bool isSupportedDevice(device D) {
std::string PlatformName = D.get_platform().get_info<info::platform::name>();
if (PlatformName.find("Level-Zero") != std::string::npos)
return true;

if (PlatformName.find("OpenCL") != std::string::npos) {
std::string Version = D.get_info<info::device::version>();
size_t Offset = Version.find("OpenCL");
if (Offset == std::string::npos)
return false;
Version = Version.substr(Offset + 7, 3);
if (Version >= std::string("2.0"))
return true;
}

return false;
}

int main() {
queue q;
std::string version = q.get_device().get_info<info::device::version>();
if (version < std::string("2.0")) {
if (!isSupportedDevice(q.get_device())) {
std::cout << "Skipping test\n";
return 0;
}
Expand All @@ -134,14 +159,20 @@ int main() {
std::iota(input.begin(), input.end(), 0);
std::fill(output.begin(), output.end(), 0);

#if __cplusplus >= 201402L
test(q, input, output, plus<>(), 0);
test(q, input, output, minimum<>(), std::numeric_limits<int>::max());
test(q, input, output, maximum<>(), std::numeric_limits<int>::lowest());
#endif

test(q, input, output, plus<int>(), 0);
test(q, input, output, minimum<int>(), std::numeric_limits<int>::max());
test(q, input, output, maximum<int>(), std::numeric_limits<int>::lowest());

#ifdef SPIRV_1_3
test(q, input, output, multiplies<int>(), 1);
test(q, input, output, bit_or<int>(), 0);
test(q, input, output, bit_xor<int>(), 0);
test(q, input, output, bit_and<int>(), ~0);
#endif // SPIRV_1_3

std::cout << "Test passed." << std::endl;
}
39 changes: 35 additions & 4 deletions sycl/test/group-algorithm/reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// TODO: enable compile+runtime checks for operations defined in SPIR-V 1.3.
// That requires either adding a switch to clang (-spirv-max-version=1.3) or
// raising the spirv version from 1.1. to 1.3 for spirv translator
// unconditionally. Using operators specific for spirv 1.3 and higher with
// -spirv-max-version=1.1 being set by default causes assert/check fails
// in spirv translator.
// RUNx: %clangxx -fsycl -fsycl-targets=%sycl_triple -DSPIRV_1_3 %s -o %t13.out

#include <CL/sycl.hpp>
#include <algorithm>
#include <cassert>
Expand Down Expand Up @@ -58,10 +66,27 @@ void test(queue q, InputContainer input, OutputContainer output,
std::accumulate(input.begin(), input.end(), init, binary_op));
}

bool isSupportedDevice(device D) {
std::string PlatformName = D.get_platform().get_info<info::platform::name>();
if (PlatformName.find("Level-Zero") != std::string::npos)
return true;

if (PlatformName.find("OpenCL") != std::string::npos) {
std::string Version = D.get_info<info::device::version>();
size_t Offset = Version.find("OpenCL");
if (Offset == std::string::npos)
return false;
Version = Version.substr(Offset + 7, 3);
if (Version >= std::string("2.0"))
return true;
}

return false;
}

int main() {
queue q;
std::string version = q.get_device().get_info<info::device::version>();
if (version < std::string("2.0")) {
if (!isSupportedDevice(q.get_device())) {
std::cout << "Skipping test\n";
return 0;
}
Expand All @@ -72,14 +97,20 @@ int main() {
std::iota(input.begin(), input.end(), 0);
std::fill(output.begin(), output.end(), 0);

#if __cplusplus >= 201402L
test(q, input, output, plus<>(), 0);
test(q, input, output, minimum<>(), std::numeric_limits<int>::max());
test(q, input, output, maximum<>(), std::numeric_limits<int>::lowest());
#endif

test(q, input, output, plus<int>(), 0);
test(q, input, output, minimum<int>(), std::numeric_limits<int>::max());
test(q, input, output, maximum<int>(), std::numeric_limits<int>::lowest());

#ifdef SPIRV_1_3
test(q, input, output, multiplies<int>(), 1);
test(q, input, output, bit_or<int>(), 0);
test(q, input, output, bit_xor<int>(), 0);
test(q, input, output, bit_and<int>(), ~0);
#endif // SPIRV_1_3

std::cout << "Test passed." << std::endl;
}