Skip to content

[SYCL] atomic_memory_order_capabilities query for device and context #8517

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
89f989f
Initial implementation of atomic_memory_order_capabilities query
Feb 28, 2023
4518322
Implementation of context atomic_memory_order_capabilities query
Mar 2, 2023
3ae7de0
Merge branch 'sycl' into alamzeds/atomic_mem_order_caps_fix
Mar 2, 2023
275dc69
Device info atomic memory order query impl
Mar 8, 2023
39cdff0
level_zero implementation of atomic_memory_order_caps
Mar 9, 2023
9579a03
Implement context atomic_mem_order query higher up
Mar 9, 2023
2f99c5a
Clang format fixes to ur_level_zero atomic_mem_order impl
Mar 9, 2023
a508c49
Fixed level_zero atomic_mem_order_caps value
Mar 9, 2023
d9c49ff
Added bitmask for returned value from OpenCL 3.0 runtime
Mar 9, 2023
657da82
Added missing PI mem order enum values to level_zero api
Mar 10, 2023
1e6d671
More robust handling of out param values in piDeviceGetInfo
Mar 10, 2023
58c53ad
Resolved clang formatting fixes
Mar 10, 2023
50ef284
Merge branch 'sycl' into alamzeds/atomic_mem_order_caps_fix
Mar 10, 2023
d18bcca
Small fix to UR code for atomic_mem_order_caps case
Mar 10, 2023
96186d0
Merge branch 'sycl' into alamzeds/atomic_mem_order_caps_fix
Mar 10, 2023
be8ff59
Formatting fix in UR
Mar 10, 2023
90e60ac
Context memory_order impl for level_zero
Mar 14, 2023
cc95f60
Merge branch 'alamzeds/atomic_mem_order_caps_fix' of https://github.c…
Mar 14, 2023
64e7f27
Formatting issue resolved
Mar 16, 2023
904fcc5
Used actual type to return.
Mar 16, 2023
c64ae3a
Merge branch 'sycl' into alamzeds/atomic_mem_order_caps_fix
Mar 16, 2023
b9f460d
Merge branch 'sycl' into alamzeds/atomic_mem_order_caps_fix
Mar 17, 2023
9b92fbe
Add unit test for memory_order device query
Mar 17, 2023
5b87263
Refined query implementation with fixes and less code duplication
Mar 17, 2023
9652f61
Formatting fix on pi_level_zero that was missed
Mar 17, 2023
dc00b7a
Merge branch 'sycl' into alamzeds/atomic_mem_order_caps_fix
Mar 22, 2023
6f4a2f7
Swapped PI enum use for UR
Mar 22, 2023
bf903d5
Merge branch 'sycl' into alamzeds/atomic_mem_order_caps_fix
Mar 23, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <set>
#include <sstream>
#include <string>
#include <sycl/detail/pi.h>
#include <sycl/detail/spinlock.hpp>
#include <thread>
#include <utility>
Expand Down Expand Up @@ -2308,6 +2309,13 @@ pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName,
case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT:
// 2D USM fill and memset is not supported.
return ReturnValue(pi_bool{false});
case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
pi_memory_order_capabilities capabilities =
PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL |
PI_MEMORY_ORDER_SEQ_CST;
return ReturnValue(capabilities);
}
case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
default:
// TODO: implement other parameters
Expand Down
59 changes: 58 additions & 1 deletion sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,65 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName,
// For details about Intel UUID extension, see
// sycl/doc/extensions/supported/sycl_ext_intel_device_info.md
case PI_DEVICE_INFO_UUID:
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES:
return PI_ERROR_INVALID_VALUE;
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
// This query is missing beore OpenCL 3.0
// Check version and handle appropriately
OCLV::OpenCLVersion devVer;
cl_device_id deviceID = cast<cl_device_id>(device);
cl_int ret_err = getDeviceVersion(deviceID, devVer);
if (ret_err != CL_SUCCESS) {
return cast<pi_result>(ret_err);
}

// Minimum required capability to be returned
// For OpenCL 1.2, this is all that is required
pi_memory_order_capabilities capabilities = PI_MEMORY_ORDER_RELAXED;

if (devVer >= OCLV::V3_0) {
// For OpenCL >=3.0, the query should be implemented
cl_device_atomic_capabilities cl_capabilities = 0;
cl_int ret_err = clGetDeviceInfo(
deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr);
if (ret_err != CL_SUCCESS)
return cast<pi_result>(ret_err);

// Mask operation to only consider atomic_memory_order* capabilities
cl_int mask = CL_DEVICE_ATOMIC_ORDER_RELAXED |
CL_DEVICE_ATOMIC_ORDER_ACQ_REL |
CL_DEVICE_ATOMIC_ORDER_SEQ_CST;
cl_capabilities &= mask;

// The memory order capabilities are hierarchical, if one is implied, all
// preceding capbilities are implied as well. Especially in the case of
// ACQ_REL.
if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) {
capabilities |= PI_MEMORY_ORDER_SEQ_CST;
}
if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) {
capabilities |= PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_ACQUIRE |
PI_MEMORY_ORDER_RELEASE;
}
} else if (devVer >= OCLV::V2_0) {
// For OpenCL 2.x, return all capabilities
// (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_memory_consistency_model)
capabilities |= PI_MEMORY_ORDER_ACQUIRE | PI_MEMORY_ORDER_RELEASE |
PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_SEQ_CST;
}

if (paramValue) {
if (paramValueSize < sizeof(pi_memory_order_capabilities))
return static_cast<pi_result>(CL_INVALID_VALUE);

std::memcpy(paramValue, &capabilities, sizeof(capabilities));
}

if (paramValueSizeRet)
*paramValueSizeRet = sizeof(capabilities);

return static_cast<pi_result>(CL_SUCCESS);
}
case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
// Initialize result to minimum mandated capabilities according to
// SYCL2020 4.6.3.2
Expand Down
2 changes: 2 additions & 0 deletions sycl/plugins/unified_runtime/pi2ur.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,8 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
(ur_device_info_t)UR_DEVICE_INFO_BFLOAT16},
{PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES,
(ur_device_info_t)UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES},
{PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES,
(ur_device_info_t)UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES},
};

auto InfoType = InfoMapping.find(ParamName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1186,6 +1186,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
return ReturnValue(result);
}

case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
ur_memory_order_capability_flags_t capabilities =
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED |
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE |
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE |
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL |
UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST;
return ReturnValue(capabilities);
}

// TODO: Implement.
default:
zePrint("Unsupported ParamName in piGetDeviceInfo\n");
zePrint("ParamName=%d(0x%x)\n", ParamName, ParamName);
Expand Down Expand Up @@ -1716,7 +1727,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition(
// Currently supported partitioning (by affinity domain/numa) would always
// partition to all sub-devices.
//
if (NumDevices !=0)
if (NumDevices != 0)
PI_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE);

for (uint32_t I = 0; I < NumDevices; I++) {
Expand Down
29 changes: 20 additions & 9 deletions sycl/source/detail/context_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
#include <sycl/property_list.hpp>
#include <sycl/stl.hpp>

#include <algorithm>

namespace sycl {
__SYCL_INLINE_VER_NAMESPACE(_V1) {
namespace detail {
Expand Down Expand Up @@ -166,17 +168,26 @@ template <>
std::vector<sycl::memory_order>
context_impl::get_info<info::context::atomic_memory_order_capabilities>()
const {
std::vector<sycl::memory_order> CapabilityList{
sycl::memory_order::relaxed, sycl::memory_order::acquire,
sycl::memory_order::release, sycl::memory_order::acq_rel,
sycl::memory_order::seq_cst};
if (is_host())
return {sycl::memory_order::relaxed, sycl::memory_order::acquire,
sycl::memory_order::release, sycl::memory_order::acq_rel,
sycl::memory_order::seq_cst};
return CapabilityList;

for (const sycl::device &Device : MDevices) {
std::vector<sycl::memory_order> NewCapabilityList(CapabilityList.size());
std::vector<sycl::memory_order> DeviceCapabilities =
Device.get_info<info::device::atomic_memory_order_capabilities>();
std::set_intersection(
CapabilityList.begin(), CapabilityList.end(),
DeviceCapabilities.begin(), DeviceCapabilities.end(),
std::inserter(NewCapabilityList, NewCapabilityList.begin()));
CapabilityList = NewCapabilityList;
}
CapabilityList.shrink_to_fit();

pi_memory_order_capabilities Result;
getPlugin().call<PiApiKind::piContextGetInfo>(
MContext,
PiInfoCode<info::context::atomic_memory_order_capabilities>::value,
sizeof(Result), &Result, nullptr);
return readMemoryOrderBitfield(Result);
return CapabilityList;
}
template <>
std::vector<sycl::memory_scope>
Expand Down
11 changes: 0 additions & 11 deletions sycl/source/detail/context_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,6 @@ typename Param::return_type get_context_info(RT::PiContext Ctx,
return Result;
}

// Specialization for atomic_memory_order_capabilities, PI returns a bitfield
template <>
std::vector<sycl::memory_order>
get_context_info<info::context::atomic_memory_order_capabilities>(
RT::PiContext Ctx, const plugin &Plugin) {
pi_memory_order_capabilities Result;
Plugin.call<PiApiKind::piContextGetInfo>(
Ctx, PiInfoCode<info::context::atomic_memory_order_capabilities>::value,
sizeof(Result), &Result, nullptr);
return readMemoryOrderBitfield(Result);
}
} // namespace detail
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
65 changes: 65 additions & 0 deletions sycl/unittests/SYCL2020/AtomicMemoryOrderCapabilities.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
//==---- AtomicMemoryOrderCapabilities.cpp --- memory order query test -----==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <algorithm>
#include <gtest/gtest.h>
#include <helpers/PiMock.hpp>
#include <sycl/sycl.hpp>

using namespace sycl;

namespace {

static constexpr size_t expectedCapabilityVecSize = 5;
static thread_local bool deviceGetInfoCalled = false;

static bool has_capability(const std::vector<memory_order> &deviceCapabilities,
memory_order capabilityToFind) {
return std::find(deviceCapabilities.begin(), deviceCapabilities.end(),
capabilityToFind) != deviceCapabilities.end();
}

pi_result redefinedDeviceGetInfo(pi_device device, pi_device_info param_name,
size_t param_value_size, void *param_value,
size_t *param_value_size_ret) {
if (param_name == PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) {
deviceGetInfoCalled = true;
if (param_value) {
pi_memory_order_capabilities *Capabilities =
reinterpret_cast<pi_memory_order_capabilities *>(param_value);
*Capabilities = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL |
PI_MEMORY_ORDER_SEQ_CST;
}
}
return PI_SUCCESS;
}

TEST(AtomicMemoryOrderCapabilities, DeviceQueryReturnsCorrectCapabilities) {
unittest::PiMock Mock;
platform Plt = Mock.getPlatform();

Mock.redefineAfter<detail::PiApiKind::piDeviceGetInfo>(
redefinedDeviceGetInfo);

const device Dev = Plt.get_devices()[0];
context Ctx{Dev};

auto Capabilities =
Dev.get_info<info::device::atomic_memory_order_capabilities>();
EXPECT_TRUE(deviceGetInfoCalled);
EXPECT_EQ(Capabilities.size(), expectedCapabilityVecSize);

EXPECT_TRUE(has_capability(Capabilities, memory_order::relaxed));
EXPECT_TRUE(has_capability(Capabilities, memory_order::acquire));
EXPECT_TRUE(has_capability(Capabilities, memory_order::release));
EXPECT_TRUE(has_capability(Capabilities, memory_order::acq_rel));
EXPECT_TRUE(has_capability(Capabilities, memory_order::seq_cst));
}

} // namespace
1 change: 1 addition & 0 deletions sycl/unittests/SYCL2020/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ add_sycl_unittest(SYCL2020Tests OBJECT
IsCompatible.cpp
DeviceGetInfoAspects.cpp
DeviceAspectTraits.cpp
AtomicMemoryOrderCapabilities.cpp
AtomicMemoryScopeCapabilities.cpp
)