Skip to content

[SYCL][PI][CUDA] Update queries for atomic order and scope for CUDA #4853

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jan 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions sycl/include/CL/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -304,12 +304,12 @@ typedef enum {
PI_DEVICE_INFO_IMAGE_SRGB = 0x10027,
PI_DEVICE_INFO_ATOMIC_64 = 0x10110,
PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111,
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000,
PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 0x10112,
PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS = 0x20000,
PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D = 0x20001,
PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D = 0x20002,
PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D = 0x20003

} _pi_device_info;

typedef enum {
Expand All @@ -330,7 +330,8 @@ typedef enum {
PI_CONTEXT_INFO_PROPERTIES = CL_CONTEXT_PROPERTIES,
PI_CONTEXT_INFO_REFERENCE_COUNT = CL_CONTEXT_REFERENCE_COUNT,
// Atomics capabilities extensions
PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10010
PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10010,
PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x10011
} _pi_context_info;

typedef enum {
Expand Down Expand Up @@ -537,6 +538,13 @@ constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELEASE = 0x04;
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_ACQ_REL = 0x08;
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_SEQ_CST = 0x10;

using pi_memory_scope_capabilities = pi_bitfield;
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_ITEM = 0x01;
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SUB_GROUP = 0x02;
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_GROUP = 0x04;
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_DEVICE = 0x08;
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SYSTEM = 0x10;

typedef enum {
PI_PROFILING_INFO_COMMAND_QUEUED = CL_PROFILING_COMMAND_QUEUED,
PI_PROFILING_INFO_COMMAND_SUBMIT = CL_PROFILING_COMMAND_SUBMIT,
Expand Down
1 change: 1 addition & 0 deletions sycl/include/CL/sycl/info/context_traits.def
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ __SYCL_PARAM_TRAITS_SPEC(context, reference_count, cl_uint)
__SYCL_PARAM_TRAITS_SPEC(context, platform, cl::sycl::platform)
__SYCL_PARAM_TRAITS_SPEC(context, devices, std::vector<cl::sycl::device>)
__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_order_capabilities, std::vector<cl::sycl::memory_order>)
__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_scope_capabilities, std::vector<cl::sycl::memory_scope>)
2 changes: 2 additions & 0 deletions sycl/include/CL/sycl/info/device_traits.def
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ __SYCL_PARAM_TRAITS_SPEC(device, image_support, bool)
__SYCL_PARAM_TRAITS_SPEC(device, atomic64, bool)
__SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_order_capabilities,
std::vector<cl::sycl::memory_order>)
__SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_scope_capabilities,
std::vector<cl::sycl::memory_scope>)
__SYCL_PARAM_TRAITS_SPEC(device, max_read_image_args, pi_uint32)
__SYCL_PARAM_TRAITS_SPEC(device, max_write_image_args, pi_uint32)
__SYCL_PARAM_TRAITS_SPEC(device, image2d_max_width, size_t)
Expand Down
7 changes: 6 additions & 1 deletion sycl/include/CL/sycl/info/info_desc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class program;
class device;
class platform;
class kernel_id;
enum class memory_scope;

// TODO: stop using OpenCL directly, use PI.
namespace info {
Expand All @@ -44,6 +45,8 @@ enum class context : cl_context_info {
devices = CL_CONTEXT_DEVICES,
atomic_memory_order_capabilities =
PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES,
atomic_memory_scope_capabilities =
PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES,
};

// A.3 Device information descriptors
Expand Down Expand Up @@ -168,7 +171,9 @@ enum class device : cl_device_info {
PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS,
ext_oneapi_max_work_groups_1d = PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D,
ext_oneapi_max_work_groups_2d = PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D,
ext_oneapi_max_work_groups_3d = PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D
ext_oneapi_max_work_groups_3d = PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D,
atomic_memory_scope_capabilities =
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
};

enum class device_type : pi_uint64 {
Expand Down
16 changes: 16 additions & 0 deletions sycl/include/CL/sycl/memory_enums.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,22 @@ readMemoryOrderBitfield(pi_memory_order_capabilities bits) {
return result;
}

inline std::vector<memory_scope>
readMemoryScopeBitfield(pi_memory_scope_capabilities bits) {
std::vector<memory_scope> result;
if (bits & PI_MEMORY_SCOPE_WORK_ITEM)
result.push_back(memory_scope::work_item);
if (bits & PI_MEMORY_SCOPE_SUB_GROUP)
result.push_back(memory_scope::sub_group);
if (bits & PI_MEMORY_SCOPE_WORK_GROUP)
result.push_back(memory_scope::work_group);
if (bits & PI_MEMORY_SCOPE_DEVICE)
result.push_back(memory_scope::device);
if (bits & PI_MEMORY_SCOPE_SYSTEM)
result.push_back(memory_scope::system);
return result;
}

#ifndef __SYCL_DEVICE_ONLY__
static constexpr std::memory_order getStdMemoryOrder(sycl::memory_order order) {
switch (order) {
Expand Down
55 changes: 51 additions & 4 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,33 @@ pi_result cuda_piContextGetInfo(pi_context context, pi_context_info param_name,
case PI_CONTEXT_INFO_REFERENCE_COUNT:
return getInfo(param_value_size, param_value, param_value_size_ret,
context->get_reference_count());
case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
int major = 0;
cl::sycl::detail::pi::assertion(
cuDeviceGetAttribute(&major,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
context->get_device()->get()) == CUDA_SUCCESS);
pi_memory_order_capabilities capabilities =
(major >= 6) ? PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL
: PI_MEMORY_ORDER_RELAXED;
return getInfo(param_value_size, param_value, param_value_size_ret,
capabilities);
}
case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
int major = 0;
cl::sycl::detail::pi::assertion(
cuDeviceGetAttribute(&major,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
context->get_device()->get()) == CUDA_SUCCESS);
pi_memory_order_capabilities capabilities =
(major >= 5) ? PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP |
PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE |
PI_MEMORY_SCOPE_SYSTEM
: PI_MEMORY_SCOPE_DEVICE;
return getInfo(param_value_size, param_value, param_value_size_ret,
capabilities);
}
default:
__SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
}
Expand Down Expand Up @@ -1112,11 +1139,31 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
atomic64);
}
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
// NVPTX currently only support at most monotonic atomic load/store.
// Acquire and release is present in newer PTX, but is not yet supported
// in LLVM NVPTX.
int major = 0;
cl::sycl::detail::pi::assertion(
cuDeviceGetAttribute(&major,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
device->get()) == CUDA_SUCCESS);
pi_memory_order_capabilities capabilities =
(major >= 6) ? PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL
: PI_MEMORY_ORDER_RELAXED;
return getInfo(param_value_size, param_value, param_value_size_ret,
capabilities);
}
case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
int major = 0;
cl::sycl::detail::pi::assertion(
cuDeviceGetAttribute(&major,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
device->get()) == CUDA_SUCCESS);
pi_memory_order_capabilities capabilities =
(major >= 5) ? PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP |
PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE |
PI_MEMORY_SCOPE_SYSTEM
: PI_MEMORY_SCOPE_DEVICE;
return getInfo(param_value_size, param_value, param_value_size_ret,
PI_MEMORY_ORDER_RELAXED);
capabilities);
}
case PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: {
// NVIDIA devices only support one sub-group size (the warp size)
Expand Down
2 changes: 2 additions & 0 deletions sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,7 @@ pi_result hip_piContextGetInfo(pi_context context, pi_context_info param_name,
case PI_CONTEXT_INFO_REFERENCE_COUNT:
return getInfo(param_value_size, param_value, param_value_size_ret,
context->get_reference_count());
case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
default:
__SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
}
Expand Down Expand Up @@ -1625,6 +1626,7 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
case PI_DEVICE_INFO_ATOMIC_64:
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES:
// TODO: Investigate if this information is available on HIP.
case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
case PI_DEVICE_INFO_PCI_ADDRESS:
case PI_DEVICE_INFO_GPU_EU_COUNT:
case PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH:
Expand Down
3 changes: 3 additions & 0 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2599,6 +2599,8 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
// currently not supported in level zero runtime
return PI_INVALID_VALUE;

// TODO: Implement.
case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
default:
zePrint("Unsupported ParamName in piGetDeviceInfo\n");
zePrint("ParamName=%d(0x%x)\n", ParamName, ParamName);
Expand Down Expand Up @@ -2793,6 +2795,7 @@ pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName,
return ReturnValue(pi_uint32(Context->Devices.size()));
case PI_CONTEXT_INFO_REFERENCE_COUNT:
return ReturnValue(pi_uint32{Context->RefCount});
case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
default:
// TODO: implement other parameters
die("piGetContextInfo: unsuppported ParamName.");
Expand Down
1 change: 1 addition & 0 deletions sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName,
// TODO: Implement.
case PI_DEVICE_INFO_ATOMIC_64:
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES:
case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
return PI_INVALID_VALUE;
case PI_DEVICE_INFO_IMAGE_SRGB: {
cl_bool result = true;
Expand Down
18 changes: 18 additions & 0 deletions sycl/source/detail/context_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,24 @@ context_impl::get_info<info::context::atomic_memory_order_capabilities>()
sizeof(Result), &Result, nullptr);
return readMemoryOrderBitfield(Result);
}
template <>
std::vector<cl::sycl::memory_scope>
context_impl::get_info<info::context::atomic_memory_scope_capabilities>()
const {
if (is_host())
return {cl::sycl::memory_scope::work_item,
cl::sycl::memory_scope::sub_group,
cl::sycl::memory_scope::work_group, cl::sycl::memory_scope::device,
cl::sycl::memory_scope::system};

pi_memory_scope_capabilities Result;
getPlugin().call<PiApiKind::piContextGetInfo>(
MContext,
pi::cast<pi_context_info>(
info::context::atomic_memory_scope_capabilities),
sizeof(Result), &Result, nullptr);
return readMemoryScopeBitfield(Result);
}

RT::PiContext &context_impl::getHandleRef() { return MContext; }
const RT::PiContext &context_impl::getHandleRef() const { return MContext; }
Expand Down
22 changes: 22 additions & 0 deletions sycl/source/detail/device_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,21 @@ struct get_device_info<std::vector<memory_order>,
}
};

// Specialization for atomic_memory_scope_capabilities, PI returns a bitfield
template <>
struct get_device_info<std::vector<memory_scope>,
info::device::atomic_memory_scope_capabilities> {
static std::vector<memory_scope> get(RT::PiDevice dev, const plugin &Plugin) {
pi_memory_scope_capabilities result;
Plugin.call_nocheck<PiApiKind::piDeviceGetInfo>(
dev,
pi::cast<RT::PiDeviceInfo>(
info::device::atomic_memory_scope_capabilities),
sizeof(pi_memory_scope_capabilities), &result, nullptr);
return readMemoryScopeBitfield(result);
}
};

// Specialization for exec_capabilities, OpenCL returns a bitfield
template <>
struct get_device_info<std::vector<info::execution_capability>,
Expand Down Expand Up @@ -764,6 +779,13 @@ get_device_info_host<info::device::atomic_memory_order_capabilities>() {
memory_order::acq_rel, memory_order::seq_cst};
}

template <>
inline std::vector<memory_scope>
get_device_info_host<info::device::atomic_memory_scope_capabilities>() {
return {memory_scope::work_item, memory_scope::sub_group,
memory_scope::work_group, memory_scope::device, memory_scope::system};
}

template <>
inline cl_uint get_device_info_host<info::device::max_read_image_args>() {
// current value is the required minimum
Expand Down
2 changes: 2 additions & 0 deletions sycl/test/abi/sycl_symbols_linux.dump
Original file line number Diff line number Diff line change
Expand Up @@ -4248,6 +4248,7 @@ _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65575EEENS3_12param_traitsIS4_XT_
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65808EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65809EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65810EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE69632EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device9getNativeEv
_ZNK2cl4sycl6kernel11get_backendEv
_ZNK2cl4sycl6kernel11get_contextEv
Expand Down Expand Up @@ -4347,6 +4348,7 @@ _ZNK2cl4sycl7context8get_infoILNS0_4info7contextE4224EEENS3_12param_traitsIS4_XT
_ZNK2cl4sycl7context8get_infoILNS0_4info7contextE4225EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl7context8get_infoILNS0_4info7contextE4228EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl7context8get_infoILNS0_4info7contextE65552EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl7context8get_infoILNS0_4info7contextE65553EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl7context9getNativeEv
_ZNK2cl4sycl7handler14getHandlerImplEv
_ZNK2cl4sycl7handler27isStateExplicitKernelBundleEv
Expand Down
2 changes: 2 additions & 0 deletions sycl/test/abi/sycl_symbols_windows.dump
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
??$get_info@$0BAAA@@device@sycl@cl@@QEBA?AW4device_type@info@12@XZ
??$get_info@$0BAAB@@device@sycl@cl@@QEBAIXZ
??$get_info@$0BAABA@@context@sycl@cl@@QEBA?AV?$vector@W4memory_order@sycl@cl@@V?$allocator@W4memory_order@sycl@cl@@@std@@@std@@XZ
??$get_info@$0BAABB@@context@sycl@cl@@QEBA?AV?$vector@W4memory_scope@sycl@cl@@V?$allocator@W4memory_scope@sycl@cl@@@std@@@std@@XZ
??$get_info@$0BAAC@@device@sycl@cl@@QEBAIXZ
??$get_info@$0BAACA@@device@sycl@cl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ
??$get_info@$0BAACB@@device@sycl@cl@@QEBAIXZ
Expand Down Expand Up @@ -115,6 +116,7 @@
??$get_info@$0BAJA@@queue@sycl@cl@@QEBA?AVcontext@12@XZ
??$get_info@$0BAJB@@queue@sycl@cl@@QEBA?AVdevice@12@XZ
??$get_info@$0BAJC@@queue@sycl@cl@@QEBAIXZ
??$get_info@$0BBAAA@@device@sycl@cl@@QEBA?AV?$vector@W4memory_scope@sycl@cl@@V?$allocator@W4memory_scope@sycl@cl@@@std@@@std@@XZ
??$get_info@$0BBGA@@program@sycl@cl@@QEBAIXZ
??$get_info@$0BBGB@@program@sycl@cl@@QEBA?AVcontext@12@XZ
??$get_info@$0BBGD@@program@sycl@cl@@QEBA?AV?$vector@Vdevice@sycl@cl@@V?$allocator@Vdevice@sycl@cl@@@std@@@std@@XZ
Expand Down