Skip to content

Commit 430c722

Browse files
[SYCL] Implement device_has kernel property and macro (#7159)
This commit implements the `device_has` kernel property and the SYCL_EXT_ONEAPI_FUNCTION_PROPERTY macro from the [sycl_ext_oneapi_kernel_properties](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_kernel_properties.asciidoc) extension. Known current limitations: - The LLVM IR attributes from add_ir_attributes_function are not correctly generated on SYCL_EXTERNAL functions. - The SYCL_EXT_ONEAPI_FUNCTION_PROPERTY cannot currently be placed after SYCL_EXTERNAL. Signed-off-by: Larsen, Steffen <steffen.larsen@intel.com>
1 parent efa7b0d commit 430c722

File tree

7 files changed

+453
-19
lines changed

7 files changed

+453
-19
lines changed

sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#pragma once
1010

11+
#include <sycl/aspects.hpp>
1112
#include <sycl/ext/oneapi/properties/property.hpp>
1213
#include <sycl/ext/oneapi/properties/property_value.hpp>
1314

@@ -53,9 +54,18 @@ struct SizeListToStrHelper<SizeList<0, Values...>, CharList<ParsedChars...>,
5354
Chars...>
5455
: SizeListToStrHelper<SizeList<Values...>,
5556
CharList<ParsedChars..., Chars..., ','>> {};
57+
template <size_t... Values, char... ParsedChars>
58+
struct SizeListToStrHelper<SizeList<0, Values...>, CharList<ParsedChars...>>
59+
: SizeListToStrHelper<SizeList<Values...>,
60+
CharList<ParsedChars..., '0', ','>> {};
5661
template <char... ParsedChars, char... Chars>
5762
struct SizeListToStrHelper<SizeList<0>, CharList<ParsedChars...>, Chars...>
5863
: CharsToStr<ParsedChars..., Chars...> {};
64+
template <char... ParsedChars>
65+
struct SizeListToStrHelper<SizeList<0>, CharList<ParsedChars...>>
66+
: CharsToStr<ParsedChars..., '0'> {};
67+
template <>
68+
struct SizeListToStrHelper<SizeList<>, CharList<>> : CharsToStr<> {};
5969

6070
// Converts size_t values to a comma-separated string representation.
6171
template <size_t... Sizes>
@@ -82,6 +92,12 @@ struct sub_group_size_key {
8292
std::integral_constant<uint32_t, Size>>;
8393
};
8494

95+
struct device_has_key {
96+
template <aspect... Aspects>
97+
using value_t = property_value<device_has_key,
98+
std::integral_constant<aspect, Aspects>...>;
99+
};
100+
85101
template <size_t Dim0, size_t... Dims>
86102
struct property_value<work_group_size_key, std::integral_constant<size_t, Dim0>,
87103
std::integral_constant<size_t, Dims>...> {
@@ -127,6 +143,13 @@ struct property_value<sub_group_size_key,
127143
static constexpr uint32_t value = Size;
128144
};
129145

146+
template <aspect... Aspects>
147+
struct property_value<device_has_key,
148+
std::integral_constant<aspect, Aspects>...> {
149+
using key_t = device_has_key;
150+
static constexpr std::array<aspect, sizeof...(Aspects)> value{Aspects...};
151+
};
152+
130153
template <size_t Dim0, size_t... Dims>
131154
inline constexpr work_group_size_key::value_t<Dim0, Dims...> work_group_size;
132155

@@ -137,10 +160,14 @@ inline constexpr work_group_size_hint_key::value_t<Dim0, Dims...>
137160
template <uint32_t Size>
138161
inline constexpr sub_group_size_key::value_t<Size> sub_group_size;
139162

163+
template <aspect... Aspects>
164+
inline constexpr device_has_key::value_t<Aspects...> device_has;
165+
140166
template <> struct is_property_key<work_group_size_key> : std::true_type {};
141167
template <>
142168
struct is_property_key<work_group_size_hint_key> : std::true_type {};
143169
template <> struct is_property_key<sub_group_size_key> : std::true_type {};
170+
template <> struct is_property_key<device_has_key> : std::true_type {};
144171

145172
namespace detail {
146173
template <> struct PropertyToKind<work_group_size_key> {
@@ -152,13 +179,17 @@ template <> struct PropertyToKind<work_group_size_hint_key> {
152179
template <> struct PropertyToKind<sub_group_size_key> {
153180
static constexpr PropKind Kind = PropKind::SubGroupSize;
154181
};
182+
template <> struct PropertyToKind<device_has_key> {
183+
static constexpr PropKind Kind = PropKind::DeviceHas;
184+
};
155185

156186
template <>
157187
struct IsCompileTimeProperty<work_group_size_key> : std::true_type {};
158188
template <>
159189
struct IsCompileTimeProperty<work_group_size_hint_key> : std::true_type {};
160190
template <>
161191
struct IsCompileTimeProperty<sub_group_size_key> : std::true_type {};
192+
template <> struct IsCompileTimeProperty<device_has_key> : std::true_type {};
162193

163194
template <size_t Dim0, size_t... Dims>
164195
struct PropertyMetaInfo<work_group_size_key::value_t<Dim0, Dims...>> {
@@ -175,6 +206,12 @@ struct PropertyMetaInfo<sub_group_size_key::value_t<Size>> {
175206
static constexpr const char *name = "sycl-sub-group-size";
176207
static constexpr uint32_t value = Size;
177208
};
209+
template <aspect... Aspects>
210+
struct PropertyMetaInfo<device_has_key::value_t<Aspects...>> {
211+
static constexpr const char *name = "sycl-device-has";
212+
static constexpr const char *value =
213+
SizeListToStr<static_cast<size_t>(Aspects)...>::value;
214+
};
178215

179216
template <typename T, typename = void>
180217
struct HasKernelPropertiesGetMethod : std::false_type {};
@@ -193,3 +230,15 @@ struct HasKernelPropertiesGetMethod<
193230
} // namespace ext
194231
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
195232
} // namespace sycl
233+
234+
#ifdef __SYCL_DEVICE_ONLY__
235+
#define SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(PROP) \
236+
[[__sycl_detail__::add_ir_attributes_function( \
237+
{"sycl-device-has"}, \
238+
sycl::ext::oneapi::experimental::detail::PropertyMetaInfo< \
239+
std::remove_cv_t<std::remove_reference_t<decltype(PROP)>>>::name, \
240+
sycl::ext::oneapi::experimental::detail::PropertyMetaInfo< \
241+
std::remove_cv_t<std::remove_reference_t<decltype(PROP)>>>::value)]]
242+
#else
243+
#define SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(PROP)
244+
#endif

sycl/include/sycl/ext/oneapi/properties/property.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,9 @@ enum PropKind : uint32_t {
172172
WorkGroupSize = 6,
173173
WorkGroupSizeHint = 7,
174174
SubGroupSize = 8,
175+
DeviceHas = 9,
175176
// PropKindSize must always be the last value.
176-
PropKindSize = 9,
177+
PropKindSize = 10,
177178
};
178179

179180
// This trait must be specialized for all properties and must have a unique

sycl/include/sycl/ext/oneapi/properties/property_utils.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ namespace oneapi {
2020
namespace experimental {
2121

2222
// Forward declaration
23-
template <typename PropertyT, typename T, typename... Ts> struct property_value;
23+
template <typename PropertyT, typename... Ts> struct property_value;
2424

2525
namespace detail {
2626

sycl/include/sycl/ext/oneapi/properties/property_value.hpp

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,33 +18,26 @@ namespace oneapi {
1818
namespace experimental {
1919
namespace detail {
2020

21-
// Base class for property values with a single type value.
22-
struct SingleTypePropertyValueBase {};
23-
24-
// Base class for properties with 0 or more than 1 values.
25-
struct EmptyPropertyValueBase {};
26-
2721
// Base class for property values with a single non-type value
28-
template <typename T> struct SingleNontypePropertyValueBase {
22+
template <typename T, typename = void> struct SingleNontypePropertyValueBase {};
23+
24+
template <typename T>
25+
struct SingleNontypePropertyValueBase<T, std::enable_if_t<HasValue<T>::value>> {
2926
static constexpr auto value = T::value;
3027
};
3128

32-
// Helper class for property values with a single value
29+
// Helper base class for property_value.
30+
template <typename... Ts> struct PropertyValueBase {};
31+
3332
template <typename T>
34-
struct SinglePropertyValue
35-
: public sycl::detail::conditional_t<HasValue<T>::value,
36-
SingleNontypePropertyValueBase<T>,
37-
SingleTypePropertyValueBase> {
33+
struct PropertyValueBase<T> : public detail::SingleNontypePropertyValueBase<T> {
3834
using value_t = T;
3935
};
4036

4137
} // namespace detail
4238

43-
template <typename PropertyT, typename T = void, typename... Ts>
44-
struct property_value
45-
: public sycl::detail::conditional_t<
46-
sizeof...(Ts) == 0 && !std::is_same<T, void>::value,
47-
detail::SinglePropertyValue<T>, detail::EmptyPropertyValueBase> {
39+
template <typename PropertyT, typename... Ts>
40+
struct property_value : public detail::PropertyValueBase<Ts...> {
4841
using key_t = PropertyT;
4942
};
5043

sycl/test/extensions/properties/properties_kernel.cpp

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,44 @@
33

44
#include <sycl/sycl.hpp>
55

6+
using namespace sycl;
67
using namespace sycl::ext::oneapi::experimental;
78

9+
using device_has_all =
10+
decltype(device_has<
11+
aspect::host, aspect::cpu, aspect::gpu, aspect::accelerator,
12+
aspect::custom, aspect::fp16, aspect::fp64, aspect::image,
13+
aspect::online_compiler, aspect::online_linker,
14+
aspect::queue_profiling, aspect::usm_device_allocations,
15+
aspect::usm_host_allocations, aspect::usm_shared_allocations,
16+
aspect::usm_restricted_shared_allocations,
17+
aspect::usm_system_allocations, aspect::ext_intel_pci_address,
18+
aspect::ext_intel_gpu_eu_count,
19+
aspect::ext_intel_gpu_eu_simd_width, aspect::ext_intel_gpu_slices,
20+
aspect::ext_intel_gpu_subslices_per_slice,
21+
aspect::ext_intel_gpu_eu_count_per_subslice,
22+
aspect::ext_intel_max_mem_bandwidth, aspect::ext_intel_mem_channel,
23+
aspect::usm_atomic_host_allocations,
24+
aspect::usm_atomic_shared_allocations, aspect::atomic64,
25+
aspect::ext_intel_device_info_uuid, aspect::ext_oneapi_srgb,
26+
aspect::ext_oneapi_native_assert, aspect::host_debuggable,
27+
aspect::ext_intel_gpu_hw_threads_per_eu,
28+
aspect::ext_oneapi_cuda_async_barrier, aspect::ext_oneapi_bfloat16,
29+
aspect::ext_intel_free_memory, aspect::ext_intel_device_id>);
30+
31+
template <aspect Aspect> inline void singleAspectDeviceHasChecks() {
32+
static_assert(is_property_value<decltype(device_has<Aspect>)>::value);
33+
static_assert(std::is_same_v<device_has_key,
34+
typename decltype(device_has<Aspect>)::key_t>);
35+
static_assert(decltype(device_has<Aspect>)::value.size() == 1);
36+
static_assert(decltype(device_has<Aspect>)::value[0] == Aspect);
37+
}
38+
839
int main() {
940
static_assert(is_property_key<work_group_size_key>::value);
1041
static_assert(is_property_key<work_group_size_hint_key>::value);
1142
static_assert(is_property_key<sub_group_size_key>::value);
43+
static_assert(is_property_key<device_has_key>::value);
1244

1345
static_assert(is_property_value<decltype(work_group_size<1>)>::value);
1446
static_assert(is_property_value<decltype(work_group_size<2, 2>)>::value);
@@ -52,5 +84,96 @@ int main() {
5284
static_assert(std::is_same_v<decltype(sub_group_size<28>)::value_t,
5385
std::integral_constant<uint32_t, 28>>);
5486

87+
singleAspectDeviceHasChecks<aspect::host>();
88+
singleAspectDeviceHasChecks<aspect::cpu>();
89+
singleAspectDeviceHasChecks<aspect::gpu>();
90+
singleAspectDeviceHasChecks<aspect::accelerator>();
91+
singleAspectDeviceHasChecks<aspect::custom>();
92+
singleAspectDeviceHasChecks<aspect::fp16>();
93+
singleAspectDeviceHasChecks<aspect::fp64>();
94+
singleAspectDeviceHasChecks<aspect::image>();
95+
singleAspectDeviceHasChecks<aspect::online_compiler>();
96+
singleAspectDeviceHasChecks<aspect::online_linker>();
97+
singleAspectDeviceHasChecks<aspect::queue_profiling>();
98+
singleAspectDeviceHasChecks<aspect::usm_device_allocations>();
99+
singleAspectDeviceHasChecks<aspect::usm_host_allocations>();
100+
singleAspectDeviceHasChecks<aspect::usm_shared_allocations>();
101+
singleAspectDeviceHasChecks<aspect::usm_restricted_shared_allocations>();
102+
singleAspectDeviceHasChecks<aspect::usm_system_allocations>();
103+
singleAspectDeviceHasChecks<aspect::ext_intel_pci_address>();
104+
singleAspectDeviceHasChecks<aspect::ext_intel_gpu_eu_count>();
105+
singleAspectDeviceHasChecks<aspect::ext_intel_gpu_eu_simd_width>();
106+
singleAspectDeviceHasChecks<aspect::ext_intel_gpu_slices>();
107+
singleAspectDeviceHasChecks<aspect::ext_intel_gpu_subslices_per_slice>();
108+
singleAspectDeviceHasChecks<aspect::ext_intel_gpu_eu_count_per_subslice>();
109+
singleAspectDeviceHasChecks<aspect::ext_intel_max_mem_bandwidth>();
110+
singleAspectDeviceHasChecks<aspect::ext_intel_mem_channel>();
111+
singleAspectDeviceHasChecks<aspect::usm_atomic_host_allocations>();
112+
singleAspectDeviceHasChecks<aspect::usm_atomic_shared_allocations>();
113+
singleAspectDeviceHasChecks<aspect::atomic64>();
114+
singleAspectDeviceHasChecks<aspect::ext_intel_device_info_uuid>();
115+
singleAspectDeviceHasChecks<aspect::ext_oneapi_srgb>();
116+
singleAspectDeviceHasChecks<aspect::ext_oneapi_native_assert>();
117+
singleAspectDeviceHasChecks<aspect::host_debuggable>();
118+
singleAspectDeviceHasChecks<aspect::ext_intel_gpu_hw_threads_per_eu>();
119+
singleAspectDeviceHasChecks<aspect::ext_oneapi_cuda_async_barrier>();
120+
singleAspectDeviceHasChecks<aspect::ext_oneapi_bfloat16>();
121+
singleAspectDeviceHasChecks<aspect::ext_intel_free_memory>();
122+
singleAspectDeviceHasChecks<aspect::ext_intel_device_id>();
123+
124+
static_assert(is_property_value<decltype(device_has<>)>::value);
125+
static_assert(std::is_same_v<device_has_key, decltype(device_has<>)::key_t>);
126+
static_assert(decltype(device_has<>)::value.size() == 0);
127+
128+
static_assert(is_property_value<device_has_all>::value);
129+
static_assert(std::is_same_v<device_has_key, device_has_all::key_t>);
130+
static_assert(device_has_all::value.size() == 36);
131+
static_assert(device_has_all::value[0] == aspect::host);
132+
static_assert(device_has_all::value[1] == aspect::cpu);
133+
static_assert(device_has_all::value[2] == aspect::gpu);
134+
static_assert(device_has_all::value[3] == aspect::accelerator);
135+
static_assert(device_has_all::value[4] == aspect::custom);
136+
static_assert(device_has_all::value[5] == aspect::fp16);
137+
static_assert(device_has_all::value[6] == aspect::fp64);
138+
static_assert(device_has_all::value[7] == aspect::image);
139+
static_assert(device_has_all::value[8] == aspect::online_compiler);
140+
static_assert(device_has_all::value[9] == aspect::online_linker);
141+
static_assert(device_has_all::value[10] == aspect::queue_profiling);
142+
static_assert(device_has_all::value[11] == aspect::usm_device_allocations);
143+
static_assert(device_has_all::value[12] == aspect::usm_host_allocations);
144+
static_assert(device_has_all::value[13] == aspect::usm_shared_allocations);
145+
static_assert(device_has_all::value[14] ==
146+
aspect::usm_restricted_shared_allocations);
147+
static_assert(device_has_all::value[15] == aspect::usm_system_allocations);
148+
static_assert(device_has_all::value[16] == aspect::ext_intel_pci_address);
149+
static_assert(device_has_all::value[17] == aspect::ext_intel_gpu_eu_count);
150+
static_assert(device_has_all::value[18] ==
151+
aspect::ext_intel_gpu_eu_simd_width);
152+
static_assert(device_has_all::value[19] == aspect::ext_intel_gpu_slices);
153+
static_assert(device_has_all::value[20] ==
154+
aspect::ext_intel_gpu_subslices_per_slice);
155+
static_assert(device_has_all::value[21] ==
156+
aspect::ext_intel_gpu_eu_count_per_subslice);
157+
static_assert(device_has_all::value[22] ==
158+
aspect::ext_intel_max_mem_bandwidth);
159+
static_assert(device_has_all::value[23] == aspect::ext_intel_mem_channel);
160+
static_assert(device_has_all::value[24] ==
161+
aspect::usm_atomic_host_allocations);
162+
static_assert(device_has_all::value[25] ==
163+
aspect::usm_atomic_shared_allocations);
164+
static_assert(device_has_all::value[26] == aspect::atomic64);
165+
static_assert(device_has_all::value[27] ==
166+
aspect::ext_intel_device_info_uuid);
167+
static_assert(device_has_all::value[28] == aspect::ext_oneapi_srgb);
168+
static_assert(device_has_all::value[29] == aspect::ext_oneapi_native_assert);
169+
static_assert(device_has_all::value[30] == aspect::host_debuggable);
170+
static_assert(device_has_all::value[31] ==
171+
aspect::ext_intel_gpu_hw_threads_per_eu);
172+
static_assert(device_has_all::value[32] ==
173+
aspect::ext_oneapi_cuda_async_barrier);
174+
static_assert(device_has_all::value[33] == aspect::ext_oneapi_bfloat16);
175+
static_assert(device_has_all::value[34] == aspect::ext_intel_free_memory);
176+
static_assert(device_has_all::value[35] == aspect::ext_intel_device_id);
177+
55178
return 0;
56179
}

0 commit comments

Comments
 (0)