Skip to content

[SYCL] Fix sycl::vec unary ops #10722

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 91 additions & 49 deletions sycl/include/sycl/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -570,13 +570,17 @@ template <typename Type, int NumElements> class vec {
// vector extension. This is for MSVC compatibility, which has a max alignment
// of 64 for direct params. If we drop MSVC, we can have alignment the same as
// size and use vector extensions for all sizes.
static constexpr bool IsUsingArray =
static constexpr bool IsUsingArrayOnDevice =
(IsHostHalf || IsSizeGreaterThanMaxAlign);

#if defined(__SYCL_DEVICE_ONLY__)
static constexpr bool NativeVec = NumElements > 1 && !IsUsingArray;
static constexpr bool NativeVec = NumElements > 1 && !IsUsingArrayOnDevice;
static constexpr bool IsUsingArrayOnHost =
false; // we are not compiling for host.
#else
static constexpr bool NativeVec = false;
static constexpr bool IsUsingArrayOnHost =
true; // host always uses std::array.
#endif

static constexpr int getNumElements() { return NumElements; }
Expand Down Expand Up @@ -757,6 +761,15 @@ template <typename Type, int NumElements> class vec {
return *this;
}

template <typename T = void>
using EnableIfUsingArray =
typename std::enable_if_t<IsUsingArrayOnDevice || IsUsingArrayOnHost, T>;

template <typename T = void>
using EnableIfNotUsingArray =
typename std::enable_if_t<!IsUsingArrayOnDevice && !IsUsingArrayOnHost,
T>;

#ifdef __SYCL_DEVICE_ONLY__
template <typename T = void>
using EnableIfNotHostHalf = typename std::enable_if_t<!IsHostHalf, T>;
Expand All @@ -765,27 +778,29 @@ template <typename Type, int NumElements> class vec {
using EnableIfHostHalf = typename std::enable_if_t<IsHostHalf, T>;

template <typename T = void>
using EnableIfUsingArray = typename std::enable_if_t<IsUsingArray, T>;
using EnableIfUsingArrayOnDevice =
typename std::enable_if_t<IsUsingArrayOnDevice, T>;

template <typename T = void>
using EnableIfNotUsingArray = typename std::enable_if_t<!IsUsingArray, T>;
using EnableIfNotUsingArrayOnDevice =
typename std::enable_if_t<!IsUsingArrayOnDevice, T>;

template <typename Ty = DataT>
explicit constexpr vec(const EnableIfNotUsingArray<Ty> &arg)
explicit constexpr vec(const EnableIfNotUsingArrayOnDevice<Ty> &arg)
: m_Data{DataType(vec_data<Ty>::get(arg))} {}

template <typename Ty = DataT>
typename std::enable_if_t<
std::is_fundamental_v<vec_data_t<Ty>> ||
std::is_same_v<typename std::remove_const_t<Ty>, half>,
vec &>
operator=(const EnableIfNotUsingArray<Ty> &Rhs) {
operator=(const EnableIfNotUsingArrayOnDevice<Ty> &Rhs) {
m_Data = (DataType)vec_data<Ty>::get(Rhs);
return *this;
}

template <typename Ty = DataT>
explicit constexpr vec(const EnableIfUsingArray<Ty> &arg)
explicit constexpr vec(const EnableIfUsingArrayOnDevice<Ty> &arg)
: vec{detail::RepeatValue<NumElements>(
static_cast<vec_data_t<DataT>>(arg)),
std::make_index_sequence<NumElements>()} {}
Expand All @@ -795,7 +810,7 @@ template <typename Type, int NumElements> class vec {
std::is_fundamental_v<vec_data_t<Ty>> ||
std::is_same_v<typename std::remove_const_t<Ty>, half>,
vec &>
operator=(const EnableIfUsingArray<Ty> &Rhs) {
operator=(const EnableIfUsingArrayOnDevice<Ty> &Rhs) {
for (int i = 0; i < NumElements; ++i) {
setValue(i, Rhs);
}
Expand Down Expand Up @@ -831,22 +846,22 @@ template <typename Type, int NumElements> class vec {
std::is_convertible_v<T, DataT> && NumElements == IdxNum, DataT>;
template <typename Ty = DataT>
constexpr vec(const EnableIfMultipleElems<2, Ty> Arg0,
const EnableIfNotUsingArray<Ty> Arg1)
const EnableIfNotUsingArrayOnDevice<Ty> Arg1)
: m_Data{vec_data<Ty>::get(Arg0), vec_data<Ty>::get(Arg1)} {}
template <typename Ty = DataT>
constexpr vec(const EnableIfMultipleElems<3, Ty> Arg0,
const EnableIfNotUsingArray<Ty> Arg1, const DataT Arg2)
const EnableIfNotUsingArrayOnDevice<Ty> Arg1, const DataT Arg2)
: m_Data{vec_data<Ty>::get(Arg0), vec_data<Ty>::get(Arg1),
vec_data<Ty>::get(Arg2)} {}
template <typename Ty = DataT>
constexpr vec(const EnableIfMultipleElems<4, Ty> Arg0,
const EnableIfNotUsingArray<Ty> Arg1, const DataT Arg2,
const EnableIfNotUsingArrayOnDevice<Ty> Arg1, const DataT Arg2,
const Ty Arg3)
: m_Data{vec_data<Ty>::get(Arg0), vec_data<Ty>::get(Arg1),
vec_data<Ty>::get(Arg2), vec_data<Ty>::get(Arg3)} {}
template <typename Ty = DataT>
constexpr vec(const EnableIfMultipleElems<8, Ty> Arg0,
const EnableIfNotUsingArray<Ty> Arg1, const DataT Arg2,
const EnableIfNotUsingArrayOnDevice<Ty> Arg1, const DataT Arg2,
const DataT Arg3, const DataT Arg4, const DataT Arg5,
const DataT Arg6, const DataT Arg7)
: m_Data{vec_data<Ty>::get(Arg0), vec_data<Ty>::get(Arg1),
Expand All @@ -855,7 +870,7 @@ template <typename Type, int NumElements> class vec {
vec_data<Ty>::get(Arg6), vec_data<Ty>::get(Arg7)} {}
template <typename Ty = DataT>
constexpr vec(const EnableIfMultipleElems<16, Ty> Arg0,
const EnableIfNotUsingArray<Ty> Arg1, const DataT Arg2,
const EnableIfNotUsingArrayOnDevice<Ty> Arg1, const DataT Arg2,
const DataT Arg3, const DataT Arg4, const DataT Arg5,
const DataT Arg6, const DataT Arg7, const DataT Arg8,
const DataT Arg9, const DataT ArgA, const DataT ArgB,
Expand Down Expand Up @@ -895,15 +910,15 @@ template <typename Type, int NumElements> class vec {
std::is_same<vector_t_, vector_t>::value &&
!std::is_same<vector_t_, DataT>::value>>
constexpr vec(vector_t openclVector) {
if constexpr (!IsUsingArray) {
if constexpr (!IsUsingArrayOnDevice) {
m_Data = openclVector;
} else {
m_Data = bit_cast<DataType>(openclVector);
}
}

operator vector_t() const {
if constexpr (!IsUsingArray) {
if constexpr (!IsUsingArrayOnDevice) {
return m_Data;
} else {
auto ptr = bit_cast<const VectorDataType *>((&m_Data)->data());
Expand Down Expand Up @@ -1064,7 +1079,7 @@ template <typename Type, int NumElements> class vec {
#ifdef __SYCL_DEVICE_ONLY__
#define __SYCL_BINOP(BINOP, OPASSIGN, CONVERT) \
template <typename Ty = vec> \
vec operator BINOP(const EnableIfNotUsingArray<Ty> &Rhs) const { \
vec operator BINOP(const EnableIfNotUsingArrayOnDevice<Ty> &Rhs) const { \
vec Ret; \
Ret.m_Data = m_Data BINOP Rhs.m_Data; \
if constexpr (std::is_same<Type, bool>::value && CONVERT) { \
Expand All @@ -1073,7 +1088,7 @@ template <typename Type, int NumElements> class vec {
return Ret; \
} \
template <typename Ty = vec> \
vec operator BINOP(const EnableIfUsingArray<Ty> &Rhs) const { \
vec operator BINOP(const EnableIfUsingArrayOnDevice<Ty> &Rhs) const { \
vec Ret; \
for (size_t I = 0; I < NumElements; ++I) { \
Ret.setValue(I, (getValue(I) BINOP Rhs.getValue(I))); \
Expand Down Expand Up @@ -1227,67 +1242,94 @@ template <typename Type, int NumElements> class vec {
__SYCL_UOP(--, -=)
#undef __SYCL_UOP

// Available only when: dataT != cl_float && dataT != cl_double
// && dataT != cl_half
// operator~() available only when: dataT != float && dataT != double
// && dataT != half
template <typename T = DataT>
typename std::enable_if_t<std::is_integral_v<vec_data_t<T>>, vec>
typename std::enable_if_t<!std::is_floating_point_v<vec_data_t<T>> &&
(!IsUsingArrayOnDevice && !IsUsingArrayOnHost),
vec>
operator~() const {
// Use __SYCL_DEVICE_ONLY__ macro because cast to OpenCL vector type is defined
// by SYCL device compiler only.
#ifdef __SYCL_DEVICE_ONLY__
vec Ret{(typename vec::DataType) ~m_Data};
if constexpr (std::is_same<Type, bool>::value) {
Ret.ConvertToDataT();
}
return Ret;
#else
}
template <typename T = DataT>
typename std::enable_if_t<!std::is_floating_point_v<vec_data_t<T>> &&
(IsUsingArrayOnDevice || IsUsingArrayOnHost),
vec>
operator~() const {
vec Ret{};
for (size_t I = 0; I < NumElements; ++I) {
Ret.setValue(I, ~getValue(I));
}
return Ret;
#endif
}

vec<rel_t, NumElements> operator!() const {
// Use __SYCL_DEVICE_ONLY__ macro because cast to OpenCL vector type is defined
// by SYCL device compiler only.
#ifdef __SYCL_DEVICE_ONLY__
return vec<rel_t, NumElements>{
(typename vec<rel_t, NumElements>::DataType) !m_Data};
#else
vec<rel_t, NumElements> Ret{};
// operator!
template <typename T = DataT, int N = NumElements>
EnableIfNotUsingArray<vec<T, N>> operator!() const {
return vec<T, N>{(typename vec<DataT, NumElements>::DataType) !m_Data};
}

// std::byte neither supports ! unary op or casting, so special handling is
// needed. And, worse, Windows has a conflict with 'byte'.
#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
template <typename T = DataT, int N = NumElements>
typename std::enable_if_t<std::is_same<std::byte, T>::value &&
(IsUsingArrayOnDevice || IsUsingArrayOnHost),
vec<T, N>>
operator!() const {
vec Ret{};
for (size_t I = 0; I < NumElements; ++I) {
Ret.setValue(I, !vec_data<DataT>::get(getValue(I)));
Ret.setValue(I, std::byte{!vec_data<DataT>::get(getValue(I))});
}
return Ret;
#endif
}

vec operator+() const {
// Use __SYCL_DEVICE_ONLY__ macro because cast to OpenCL vector type is defined
// by SYCL device compiler only.
#ifdef __SYCL_DEVICE_ONLY__
return vec{+m_Data};
template <typename T = DataT, int N = NumElements>
typename std::enable_if_t<!std::is_same<std::byte, T>::value &&
(IsUsingArrayOnDevice || IsUsingArrayOnHost),
vec<T, N>>
operator!() const {
vec Ret{};
for (size_t I = 0; I < NumElements; ++I)
Ret.setValue(I, !vec_data<DataT>::get(getValue(I)));
return Ret;
}
#else
template <typename T = DataT, int N = NumElements>
EnableIfUsingArray<vec<T, N>> operator!() const {
vec Ret{};
for (size_t I = 0; I < NumElements; ++I)
Ret.setValue(I, vec_data<DataT>::get(+vec_data<DataT>::get(getValue(I))));
Ret.setValue(I, !vec_data<DataT>::get(getValue(I)));
return Ret;
}
#endif

// operator +
template <typename T = vec> EnableIfNotUsingArray<T> operator+() const {
return vec{+m_Data};
}

vec operator-() const {
// Use __SYCL_DEVICE_ONLY__ macro because cast to OpenCL vector type is defined
// by SYCL device compiler only.
#ifdef __SYCL_DEVICE_ONLY__
template <typename T = vec> EnableIfUsingArray<T> operator+() const {
vec Ret{};
for (size_t I = 0; I < NumElements; ++I)
Ret.setValue(I, vec_data<DataT>::get(+vec_data<DataT>::get(getValue(I))));
return Ret;
}

// operator -
template <typename T = vec> EnableIfNotUsingArray<T> operator-() const {
return vec{-m_Data};
#else
}

template <typename T = vec> EnableIfUsingArray<T> operator-() const {
vec Ret{};
for (size_t I = 0; I < NumElements; ++I)
Ret.setValue(I, vec_data<DataT>::get(-vec_data<DataT>::get(getValue(I))));
return Ret;
#endif
}

// OP is: &&, ||
Expand All @@ -1303,7 +1345,7 @@ template <typename Type, int NumElements> class vec {
template <template <typename> class Operation,
typename Ty = vec<DataT, NumElements>>
vec<DataT, NumElements>
operatorHelper(const EnableIfNotUsingArray<Ty> &Rhs) const {
operatorHelper(const EnableIfNotUsingArrayOnDevice<Ty> &Rhs) const {
vec<DataT, NumElements> Result;
Operation<DataType> Op;
Result.m_Data = Op(m_Data, Rhs.m_Data);
Expand All @@ -1313,7 +1355,7 @@ template <typename Type, int NumElements> class vec {
template <template <typename> class Operation,
typename Ty = vec<DataT, NumElements>>
vec<DataT, NumElements>
operatorHelper(const EnableIfUsingArray<Ty> &Rhs) const {
operatorHelper(const EnableIfUsingArrayOnDevice<Ty> &Rhs) const {
vec<DataT, NumElements> Result;
Operation<DataT> Op;
for (size_t I = 0; I < NumElements; ++I) {
Expand Down
63 changes: 63 additions & 0 deletions sycl/test/basic_tests/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,67 @@ template <> inline void checkSizeForFloatingPoint<s::half, sizeof(int16_t)>() {
static_assert(sizeof(s::half) == sizeof(int16_t), "");
}

template <typename vecType, int numOfElems>
std::string vec2string(const sycl::vec<vecType, numOfElems> &vec) {
std::string str = "";
for (size_t i = 0; i < numOfElems - 1; ++i) {
str += std::to_string(vec[i]) + ",";
}
str = "{" + str + std::to_string(vec[numOfElems - 1]) + "}";
return str;
}

// the math built-in testing ensures that the vec binary ops get tested,
// but the unary ops are only tested by the CTS tests. Here we do some
// basic testing of the unary ops, ensuring they compile correctly.
template <typename T> void checkVecUnaryOps(T &v) {

std::cout << vec2string(v) << std::endl;

T d = +v;
std::cout << vec2string(d) << std::endl;

T e = -v;
std::cout << vec2string(e) << std::endl;

// ~ only supported by integral types.
if constexpr (std::is_integral_v<T>) {
T g = ~v;
std::cout << vec2string(g) << std::endl;
}

T f = !v;
std::cout << vec2string(f) << std::endl;
}

void checkVariousVecUnaryOps() {
sycl::vec<int, 1> vi1{1};
checkVecUnaryOps(vi1);
sycl::vec<int, 16> vi{1, 2, 0, -4, 1, 2, 0, -4, 1, 2, 0, -4, 1, 2, 0, -4};
checkVecUnaryOps(vi);

sycl::vec<long, 1> vl1{1};
checkVecUnaryOps(vl1);
sycl::vec<long, 16> vl{2, 3, 0, -5, 2, 3, 0, -5, 2, 3, 0, -5, 2, 3, 0, -5};
checkVecUnaryOps(vl);

sycl::vec<long long, 1> vll1{1};
checkVecUnaryOps(vll1);
sycl::vec<long long, 16> vll{0, 3, 4, -6, 0, 3, 4, -6,
0, 3, 4, -6, 0, 3, 4, -6};
checkVecUnaryOps(vll);

sycl::vec<float, 1> vf1{1};
checkVecUnaryOps(vf1);
sycl::vec<float, 16> vf{0, 4, 5, -9, 0, 4, 5, -9, 0, 4, 5, -9, 0, 4, 5, -9};
checkVecUnaryOps(vf);

sycl::vec<double, 1> vd1{1};
checkVecUnaryOps(vd1);
sycl::vec<double, 16> vd{0, 4, 5, -9, 0, 4, 5, -9, 0, 4, 5, -9, 0, 4, 5, -9};
checkVecUnaryOps(vd);
}

int main() {
// Test for creating constexpr expressions
constexpr sycl::specialization_id<sycl::vec<sycl::half, 2>> id(1.0);
Expand All @@ -126,5 +187,7 @@ int main() {
checkSizeForFloatingPoint<s::opencl::cl_float, sizeof(int32_t)>();
checkSizeForFloatingPoint<s::opencl::cl_double, sizeof(int64_t)>();

checkVariousVecUnaryOps();

return 0;
}