Skip to content

[ESIMD] Fix inconsistencies in the ESIMD API signatures. #4800

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 20 additions & 20 deletions sycl/include/sycl/ext/intel/experimental/esimd/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,7 @@
#ifdef __SYCL_DEVICE_ONLY__
#define SYCL_ESIMD_KERNEL __attribute__((sycl_explicit_simd))
#define SYCL_ESIMD_FUNCTION __attribute__((sycl_explicit_simd))
#else
#define SYCL_ESIMD_KERNEL
#define SYCL_ESIMD_FUNCTION
#endif

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace ext {
namespace intel {
namespace experimental {
namespace esimd {

using uchar = unsigned char;
using ushort = unsigned short;
using uint = unsigned int;

#ifdef __SYCL_DEVICE_ONLY__
// Mark a function being nodebug.
#define ESIMD_NODEBUG __attribute__((nodebug))
// Mark a "ESIMD global": accessible from all functions in current translation
Expand All @@ -43,14 +27,21 @@ using uint = unsigned int;
__attribute__((opencl_private)) __attribute__((sycl_explicit_simd))
// Bind a ESIMD global variable to a specific register.
#define ESIMD_REGISTER(n) __attribute__((register_num(n)))
#else

#define __ESIMD_API ESIMD_NODEBUG ESIMD_INLINE
#else // __SYCL_DEVICE_ONLY__
#define SYCL_ESIMD_KERNEL
#define SYCL_ESIMD_FUNCTION

// TODO ESIMD define what this means on Windows host
#define ESIMD_NODEBUG
// On host device ESIMD global is a thread local static var. This assumes that
// each work-item is mapped to a separate OS thread on host device.
#define ESIMD_PRIVATE thread_local
#define ESIMD_REGISTER(n)
#endif

#define __ESIMD_API ESIMD_INLINE
#endif // __SYCL_DEVICE_ONLY__

// Mark a function being noinline
#define ESIMD_NOINLINE __attribute__((noinline))
Expand All @@ -70,6 +61,17 @@ using uint = unsigned int;
#define __ESIMD_DEPR_ENUM_V(old, new, t) \
old __ESIMD_DEPRECATED(new) = static_cast<t>(new)

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace ext {
namespace intel {
namespace experimental {
namespace esimd {

using uchar = unsigned char;
using ushort = unsigned short;
using uint = unsigned int;

/// Gen hardware supports applying saturation to results of some operation.
/// This enum allows to control this behavior.
enum class saturation : uint8_t { off, on };
Expand Down Expand Up @@ -237,8 +239,6 @@ enum class split_barrier_action : uint8_t {
// For backward compatibility:
using EsimdSbarrierType = split_barrier_action;

#undef __ESIMD_DEPR_ENUM_V

// Since EsimdSbarrierType values are deprecated, these macros will generate
// deprecation message.
#define ESIMD_SBARRIER_WAIT EsimdSbarrierType::WAIT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,7 @@ constexpr unsigned int ElemsPerAddrDecoding(unsigned int ElemsPerAddrEncoded) {
} // __SYCL_INLINE_NAMESPACE(cl)

// flat_read does flat-address gather
template <typename Ty, int N, int NumBlk = 0,
__SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
template <typename Ty, int N, int NumBlk = 0>
__ESIMD_INTRIN
__SEIEED::vector_type_t<Ty, N * __SEIEED::ElemsPerAddrDecoding(NumBlk)>
__esimd_svm_gather(__SEIEED::vector_type_t<uint64_t, N> addrs,
Expand Down Expand Up @@ -111,9 +109,7 @@ __ESIMD_INTRIN
#endif // __SYCL_DEVICE_ONLY__

// flat_write does flat-address scatter
template <typename Ty, int N, int NumBlk = 0,
__SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
template <typename Ty, int N, int NumBlk = 0>
__ESIMD_INTRIN void __esimd_svm_scatter(
__SEIEED::vector_type_t<uint64_t, N> addrs,
__SEIEED::vector_type_t<Ty, N * __SEIEED::ElemsPerAddrDecoding(NumBlk)>
Expand Down Expand Up @@ -144,8 +140,7 @@ __ESIMD_INTRIN void __esimd_svm_scatter(
#endif // __SYCL_DEVICE_ONLY__

// flat_block_read reads a block of data from one flat address
template <typename Ty, int N, __SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
template <typename Ty, int N>
__ESIMD_INTRIN __SEIEED::vector_type_t<Ty, N>
__esimd_svm_block_ld_unaligned(uint64_t addr)
#ifdef __SYCL_DEVICE_ONLY__
Expand All @@ -163,8 +158,7 @@ __esimd_svm_block_ld_unaligned(uint64_t addr)
#endif // __SYCL_DEVICE_ONLY__

// flat_block_write writes a block of data using one flat address
template <typename Ty, int N, __SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
template <typename Ty, int N>
__ESIMD_INTRIN void __esimd_svm_block_st(uint64_t addr,
__SEIEED::vector_type_t<Ty, N> vals)
#ifdef __SYCL_DEVICE_ONLY__
Expand Down Expand Up @@ -203,9 +197,7 @@ __ESIMD_INTRIN void __esimd_oword_st(SurfIndAliasTy surf_ind, uint32_t offset,
#endif // __SYCL_DEVICE_ONLY__

// flat_read4 does flat-address gather4
template <typename Ty, int N, __SEIEE::rgba_channel_mask Mask,
__SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
template <typename Ty, int N, __SEIEE::rgba_channel_mask Mask>
__SEIEED::vector_type_t<Ty, N * get_num_channels_enabled(Mask)> __ESIMD_INTRIN
__esimd_svm_gather4_scaled(__SEIEED::vector_type_t<uint64_t, N> addrs,
__SEIEED::simd_mask_storage_t<N> pred = 1)
Expand Down Expand Up @@ -258,9 +250,7 @@ __esimd_svm_gather4_scaled(__SEIEED::vector_type_t<uint64_t, N> addrs,
#endif // __SYCL_DEVICE_ONLY__

// flat_write does flat-address scatter
template <typename Ty, int N, __SEIEE::rgba_channel_mask Mask,
__SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
template <typename Ty, int N, __SEIEE::rgba_channel_mask Mask>
__ESIMD_INTRIN void __esimd_svm_scatter4_scaled(
__SEIEED::vector_type_t<uint64_t, N> addrs,
__SEIEED::vector_type_t<Ty, N * get_num_channels_enabled(Mask)> vals,
Expand Down Expand Up @@ -333,8 +323,7 @@ __ESIMD_INTRIN void __esimd_svm_scatter4_scaled(
// @param elem_offsets - per-element offsets
//
template <typename Ty, int N, typename SurfIndAliasTy, int TySizeLog2,
int16_t Scale = 0, __SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
int16_t Scale = 0>
__ESIMD_INTRIN __SEIEED::vector_type_t<Ty, N>
__esimd_gather_scaled2(SurfIndAliasTy surf_ind, uint32_t global_offset,
__SEIEED::vector_type_t<uint32_t, N> elem_offsets)
Expand Down Expand Up @@ -374,8 +363,7 @@ __esimd_gather_scaled2(SurfIndAliasTy surf_ind, uint32_t global_offset,
// @param vals - values to write
//
template <typename Ty, int N, typename SurfIndAliasTy, int TySizeLog2,
int16_t Scale = 0, __SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
int16_t Scale = 0>
__ESIMD_INTRIN void
__esimd_scatter_scaled(__SEIEED::simd_mask_storage_t<N> pred,
SurfIndAliasTy surf_ind, uint32_t global_offset,
Expand All @@ -393,9 +381,7 @@ __esimd_scatter_scaled(__SEIEED::simd_mask_storage_t<N> pred,
#endif // __SYCL_DEVICE_ONLY__

// flat_atomic: flat-address atomic
template <__SEIEE::atomic_op Op, typename Ty, int N,
__SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
template <__SEIEE::atomic_op Op, typename Ty, int N>
__ESIMD_INTRIN __SEIEED::vector_type_t<Ty, N>
__esimd_svm_atomic0(__SEIEED::vector_type_t<uint64_t, N> addrs,
__SEIEED::simd_mask_storage_t<N> pred)
Expand All @@ -407,9 +393,7 @@ __esimd_svm_atomic0(__SEIEED::vector_type_t<uint64_t, N> addrs,
}
#endif // __SYCL_DEVICE_ONLY__

template <__SEIEE::atomic_op Op, typename Ty, int N,
__SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
template <__SEIEE::atomic_op Op, typename Ty, int N>
__ESIMD_INTRIN __SEIEED::vector_type_t<Ty, N>
__esimd_svm_atomic1(__SEIEED::vector_type_t<uint64_t, N> addrs,
__SEIEED::vector_type_t<Ty, N> src0,
Expand All @@ -422,9 +406,7 @@ __esimd_svm_atomic1(__SEIEED::vector_type_t<uint64_t, N> addrs,
}
#endif // __SYCL_DEVICE_ONLY__

template <__SEIEE::atomic_op Op, typename Ty, int N,
__SEIEE::CacheHint L1H = __SEIEE::CacheHint::None,
__SEIEE::CacheHint L3H = __SEIEE::CacheHint::None>
template <__SEIEE::atomic_op Op, typename Ty, int N>
__ESIMD_INTRIN __SEIEED::vector_type_t<Ty, N>
__esimd_svm_atomic2(__SEIEED::vector_type_t<uint64_t, N> addrs,
__SEIEED::vector_type_t<Ty, N> src0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -648,9 +648,7 @@ void simd_obj_impl<T, N, T1, SFINAE>::copy_from(const T *Addr)
"block size must be at most 8 owords");

uintptr_t AddrVal = reinterpret_cast<uintptr_t>(Addr);
*this =
__esimd_svm_block_ld_unaligned<T, N, CacheHint::None, CacheHint::None>(
AddrVal);
*this = __esimd_svm_block_ld_unaligned<T, N>(AddrVal);
}

template <typename T, int N, class T1, class SFINAE>
Expand Down Expand Up @@ -691,7 +689,7 @@ void simd_obj_impl<T, N, T1, SFINAE>::copy_to(T *addr) const
"block size must be at most 8 owords");

uintptr_t AddrVal = reinterpret_cast<uintptr_t>(addr);
__esimd_svm_block_st<T, N, CacheHint::None, CacheHint::None>(AddrVal, data());
__esimd_svm_block_st<T, N>(AddrVal, data());
}

template <typename T, int N, class T1, class SFINAE>
Expand Down
Loading