Skip to content

Commit 632946a

Browse files
authored
[SYCL] Adds support for atomic fence capabilities device queries (#8586)
Adds support to query devices for `atomic_fence_order_capabilities` and `atomic_fence_scope_capabilities`. The backends supported are OpenCL and Level Zero. For the rest of backends, it has been left unsupported. Fixes #8293. --------- Signed-off-by: Maronas, Marcos <marcos.maronas@intel.com>
1 parent 0fb9a95 commit 632946a

21 files changed

+580
-77
lines changed

sycl/include/sycl/detail/pi.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,11 @@
8080
// 12.24 Added new PI_EXT_KERNEL_EXEC_INFO_CACHE_CONFIG property to the
8181
// _pi_kernel_exec_info. Defined _pi_kernel_cache_config enum with values of
8282
// the new PI_EXT_KERNEL_EXEC_INFO_CACHE_CONFIG property.
83+
// 12.25 Added PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES and
84+
// PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES for piDeviceGetInfo.
8385

8486
#define _PI_H_VERSION_MAJOR 12
85-
#define _PI_H_VERSION_MINOR 24
87+
#define _PI_H_VERSION_MINOR 25
8688

8789
#define _PI_STRING_HELPER(a) #a
8890
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
@@ -314,8 +316,8 @@ typedef enum {
314316
// return the number of queue indices that are available for this device.
315317
PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES = 0x10032,
316318
PI_DEVICE_INFO_ATOMIC_64 = 0x10110,
317-
PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111,
318-
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000,
319+
PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111,
320+
PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000,
319321
PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 0x10112,
320322
PI_DEVICE_INFO_BACKEND_VERSION = 0x10113,
321323
// Return whether bfloat16 math functions are supported by device
@@ -326,6 +328,8 @@ typedef enum {
326328
PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D = 0x20003,
327329
PI_EXT_ONEAPI_DEVICE_INFO_CUDA_ASYNC_BARRIER = 0x20004,
328330
PI_EXT_CODEPLAY_DEVICE_INFO_SUPPORTS_FUSION = 0x20005,
331+
PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES = 0x20006,
332+
PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES = 0x20007,
329333
} _pi_device_info;
330334

331335
typedef enum {
@@ -347,8 +351,10 @@ typedef enum {
347351
PI_CONTEXT_INFO_PROPERTIES = 0x1082,
348352
PI_CONTEXT_INFO_REFERENCE_COUNT = 0x1080,
349353
// Atomics capabilities extensions
350-
PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10010,
351-
PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x10011,
354+
PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10010,
355+
PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x10011,
356+
PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES = 0x10012,
357+
PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES = 0x10013,
352358
// Native 2D USM memory operation support
353359
PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT = 0x30000,
354360
PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT = 0x30001,
Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
__SYCL_PARAM_TRAITS_SPEC(context, reference_count, uint32_t, PI_CONTEXT_INFO_REFERENCE_COUNT)
22
__SYCL_PARAM_TRAITS_SPEC(context, platform, sycl::platform, PI_CONTEXT_INFO_PLATFORM)
33
__SYCL_PARAM_TRAITS_SPEC(context, devices, std::vector<sycl::device>, PI_CONTEXT_INFO_DEVICES)
4-
__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_order_capabilities, std::vector<sycl::memory_order>, PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES)
5-
__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_scope_capabilities, std::vector<sycl::memory_scope>, PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES)
4+
__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_order_capabilities, std::vector<sycl::memory_order>, PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES)
5+
__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_scope_capabilities, std::vector<sycl::memory_scope>, PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES)
6+
__SYCL_PARAM_TRAITS_SPEC(context, atomic_fence_order_capabilities, std::vector<sycl::memory_order>, PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES)
7+
__SYCL_PARAM_TRAITS_SPEC(context, atomic_fence_scope_capabilities, std::vector<sycl::memory_scope>, PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES)

sycl/include/sycl/info/device_traits.def

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,16 @@ __SYCL_PARAM_TRAITS_SPEC(device, host_unified_memory, bool,
111111
PI_DEVICE_INFO_HOST_UNIFIED_MEMORY)
112112
__SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_order_capabilities,
113113
std::vector<sycl::memory_order>,
114-
PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES)
114+
PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES)
115+
__SYCL_PARAM_TRAITS_SPEC(device, atomic_fence_order_capabilities,
116+
std::vector<sycl::memory_order>,
117+
PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES)
115118
__SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_scope_capabilities,
116119
std::vector<sycl::memory_scope>,
117-
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES)
120+
PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES)
121+
__SYCL_PARAM_TRAITS_SPEC(device, atomic_fence_scope_capabilities,
122+
std::vector<sycl::memory_scope>,
123+
PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES)
118124
__SYCL_PARAM_TRAITS_SPEC(device, profiling_timer_resolution, size_t,
119125
PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION)
120126
__SYCL_PARAM_TRAITS_SPEC(device, is_endian_little, bool,

sycl/include/sycl/info/info_desc.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ namespace device {
9595
// atomic_fence_order_capabilities, atomic_fence_scope_capabilities, aspects,
9696
// il_version.
9797

98+
struct atomic_fence_order_capabilities;
99+
struct atomic_fence_scope_capabilities;
100+
98101
#define __SYCL_PARAM_TRAITS_DEPRECATED(Desc, Message) \
99102
struct __SYCL2020_DEPRECATED(Message) Desc;
100103
#include <sycl/info/device_traits_deprecated.def>

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,27 +1003,15 @@ pi_result cuda_piContextGetInfo(pi_context context, pi_context_info param_name,
10031003
case PI_CONTEXT_INFO_REFERENCE_COUNT:
10041004
return getInfo(param_value_size, param_value, param_value_size_ret,
10051005
context->get_reference_count());
1006-
case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
1007-
pi_memory_order_capabilities capabilities =
1008-
PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
1009-
PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL;
1010-
return getInfo(param_value_size, param_value, param_value_size_ret,
1011-
capabilities);
1012-
}
1013-
case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
1014-
int major = 0;
1015-
sycl::detail::pi::assertion(
1016-
cuDeviceGetAttribute(&major,
1017-
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
1018-
context->get_device()->get()) == CUDA_SUCCESS);
1019-
pi_memory_order_capabilities capabilities =
1020-
(major >= 7) ? PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP |
1021-
PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE |
1022-
PI_MEMORY_SCOPE_SYSTEM
1023-
: PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP |
1024-
PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE;
1025-
return getInfo(param_value_size, param_value, param_value_size_ret,
1026-
capabilities);
1006+
case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES:
1007+
case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
1008+
case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES:
1009+
case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: {
1010+
// These queries should be dealt with in context_impl.cpp by calling the
1011+
// queries of each device separately and building the intersection set.
1012+
setErrorMessage("These queries should have never come here.",
1013+
PI_ERROR_INVALID_ARG_VALUE);
1014+
return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
10271015
}
10281016
case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT:
10291017
return getInfo<pi_bool>(param_value_size, param_value, param_value_size_ret,
@@ -1293,14 +1281,14 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
12931281
return getInfo(param_value_size, param_value, param_value_size_ret,
12941282
atomic64);
12951283
}
1296-
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
1284+
case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
12971285
pi_memory_order_capabilities capabilities =
12981286
PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
12991287
PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL;
13001288
return getInfo(param_value_size, param_value, param_value_size_ret,
13011289
capabilities);
13021290
}
1303-
case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
1291+
case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
13041292
int major = 0;
13051293
sycl::detail::pi::assertion(
13061294
cuDeviceGetAttribute(&major,
@@ -1315,6 +1303,12 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
13151303
return getInfo(param_value_size, param_value, param_value_size_ret,
13161304
capabilities);
13171305
}
1306+
case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES:
1307+
case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES:
1308+
// There is no way to query this in the backend
1309+
setErrorMessage("CUDA backend does not support this query",
1310+
PI_ERROR_INVALID_ARG_VALUE);
1311+
return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
13181312
case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS: {
13191313
int major = 0;
13201314
sycl::detail::pi::assertion(

sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -804,8 +804,10 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
804804
CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_MAX_MEM_BANDWIDTH)
805805
CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_IMAGE_SRGB)
806806
CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_ATOMIC_64)
807-
CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES)
808-
CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES)
807+
CASE_PI_UNSUPPORTED(PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES)
808+
CASE_PI_UNSUPPORTED(PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES)
809+
CASE_PI_UNSUPPORTED(PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES)
810+
CASE_PI_UNSUPPORTED(PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES)
809811
CASE_PI_UNSUPPORTED(PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS)
810812
CASE_PI_UNSUPPORTED(PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D)
811813
CASE_PI_UNSUPPORTED(PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D)

sycl/plugins/hip/pi_hip.cpp

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1007,7 +1007,16 @@ pi_result hip_piContextGetInfo(pi_context context, pi_context_info param_name,
10071007
// 2D USM operations currently not supported.
10081008
return getInfo<pi_bool>(param_value_size, param_value, param_value_size_ret,
10091009
false);
1010-
case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
1010+
case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES:
1011+
case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
1012+
case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES:
1013+
case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: {
1014+
// These queries should be dealt with in context_impl.cpp by calling the
1015+
// queries of each device separately and building the intersection set.
1016+
setErrorMessage("These queries should have never come here.",
1017+
PI_ERROR_INVALID_ARG_VALUE);
1018+
return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
1019+
}
10111020
default:
10121021
__SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
10131022
}
@@ -1856,13 +1865,35 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
18561865
pi_int32{1});
18571866
}
18581867

1859-
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
1868+
case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
18601869
pi_memory_order_capabilities capabilities = PI_MEMORY_ORDER_RELAXED |
18611870
PI_MEMORY_ORDER_ACQUIRE |
18621871
PI_MEMORY_ORDER_RELEASE;
18631872
return getInfo(param_value_size, param_value, param_value_size_ret,
18641873
capabilities);
18651874
}
1875+
case PI_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
1876+
case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: {
1877+
// SYCL2020 4.6.4.2 minimum mandated capabilities for
1878+
// atomic_fence/memory_scope_capabilities.
1879+
// Because scopes are hierarchical, wider scopes support all narrower
1880+
// scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and
1881+
// WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382)
1882+
pi_memory_scope_capabilities capabilities = PI_MEMORY_SCOPE_WORK_ITEM |
1883+
PI_MEMORY_SCOPE_SUB_GROUP |
1884+
PI_MEMORY_SCOPE_WORK_GROUP;
1885+
return getInfo(param_value_size, param_value, param_value_size_ret,
1886+
capabilities);
1887+
}
1888+
case PI_EXT_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: {
1889+
// SYCL2020 4.6.4.2 minimum mandated capabilities for
1890+
// atomic_fence_order_capabilities.
1891+
pi_memory_order_capabilities capabilities =
1892+
PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
1893+
PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL;
1894+
return getInfo(param_value_size, param_value, param_value_size_ret,
1895+
capabilities);
1896+
}
18661897

18671898
case PI_DEVICE_INFO_DEVICE_ID: {
18681899
int value = 0;
@@ -1889,7 +1920,6 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
18891920
}
18901921

18911922
// TODO: Investigate if this information is available on HIP.
1892-
case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
18931923
case PI_DEVICE_INFO_PCI_ADDRESS:
18941924
case PI_DEVICE_INFO_GPU_EU_COUNT:
18951925
case PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH:
@@ -1899,7 +1929,9 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
18991929
case PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU:
19001930
case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH:
19011931
case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS:
1902-
return PI_ERROR_INVALID_VALUE;
1932+
setErrorMessage("HIP backend does not support this query",
1933+
PI_ERROR_INVALID_ARG_VALUE);
1934+
return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
19031935

19041936
default:
19051937
__SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2309,14 +2309,16 @@ pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName,
23092309
case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT:
23102310
// 2D USM fill and memset is not supported.
23112311
return ReturnValue(pi_bool{false});
2312-
case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
2313-
pi_memory_order_capabilities capabilities =
2314-
PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
2315-
PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL |
2316-
PI_MEMORY_ORDER_SEQ_CST;
2317-
return ReturnValue(capabilities);
2318-
}
2319-
case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
2312+
case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES:
2313+
case PI_EXT_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
2314+
case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES:
2315+
case PI_EXT_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: {
2316+
// These queries should be dealt with in context_impl.cpp by calling the
2317+
// queries of each device separately and building the intersection set.
2318+
setErrorMessage("These queries should have never come here.",
2319+
UR_RESULT_ERROR_INVALID_VALUE);
2320+
return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
2321+
}
23202322
default:
23212323
// TODO: implement other parameters
23222324
die("piGetContextInfo: unsuppported ParamName.");

0 commit comments

Comments
 (0)