Skip to content

Commit dc94f2c

Browse files
authored
[Offload] Add device UID (#164391)
Introduced in OpenMP 6.0, the device UID shall be a unique identifier of a device on a given system. (Not necessarily a UUID.) Since it is not guaranteed that the (U)UIDs defined by the device vendor libraries, such as HSA, do not overlap with those of other vendors, the device UIDs in offload are always combined with the offload plugin name. In case the vendor library does not specify any device UID for a given device, we fall back to the offload-internal device ID. The device UID can be retrieved using the `llvm-offload-device-info` tool.
1 parent 92a1eb3 commit dc94f2c

File tree

12 files changed

+91
-6
lines changed

12 files changed

+91
-6
lines changed

offload/liboffload/API/Device.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def ol_device_info_t : Enum {
2929
TaggedEtor<"PLATFORM", "ol_platform_handle_t", "the platform associated with the device">,
3030
TaggedEtor<"NAME", "char[]", "Device name">,
3131
TaggedEtor<"PRODUCT_NAME", "char[]", "Device user-facing marketing name">,
32+
TaggedEtor<"UID", "char[]", "Device UID">,
3233
TaggedEtor<"VENDOR", "char[]", "Device vendor">,
3334
TaggedEtor<"DRIVER_VERSION", "char[]", "Driver version">,
3435
TaggedEtor<"MAX_WORK_GROUP_SIZE", "uint32_t", "Maximum total work group size in work items">,

offload/liboffload/src/OffloadImpl.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,8 @@ llvm::Error ol_platform_impl_t::init() {
147147
if (llvm::Error Err = Plugin->initDevice(Id))
148148
return Err;
149149

150-
auto Device = &Plugin->getDevice(Id);
151-
auto Info = Device->obtainInfoImpl();
150+
GenericDeviceTy *Device = &Plugin->getDevice(Id);
151+
llvm::Expected<InfoTreeNode> Info = Device->obtainInfo();
152152
if (llvm::Error Err = Info.takeError())
153153
return Err;
154154
Devices.emplace_back(std::make_unique<ol_device_impl_t>(Id, Device, *this,
@@ -467,6 +467,7 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
467467
switch (PropName) {
468468
case OL_DEVICE_INFO_NAME:
469469
case OL_DEVICE_INFO_PRODUCT_NAME:
470+
case OL_DEVICE_INFO_UID:
470471
case OL_DEVICE_INFO_VENDOR:
471472
case OL_DEVICE_INFO_DRIVER_VERSION: {
472473
// String values
@@ -544,6 +545,8 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
544545
return Info.writeString("Virtual Host Device");
545546
case OL_DEVICE_INFO_PRODUCT_NAME:
546547
return Info.writeString("Virtual Host Device");
548+
case OL_DEVICE_INFO_UID:
549+
return Info.writeString(GenericPluginTy::getHostDeviceUid());
547550
case OL_DEVICE_INFO_VENDOR:
548551
return Info.writeString("Liboffload");
549552
case OL_DEVICE_INFO_DRIVER_VERSION:

offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ typedef enum hsa_amd_agent_info_s {
7272
HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU = 0xA00A,
7373
HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU = 0xA00B,
7474
HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES = 0xA010,
75+
HSA_AMD_AGENT_INFO_UUID = 0xA011,
7576
HSA_AMD_AGENT_INFO_TIMESTAMP_FREQUENCY = 0xA016,
7677
} hsa_amd_agent_info_t;
7778

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2083,6 +2083,20 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
20832083
return Err;
20842084
ComputeUnitKind = GPUName;
20852085

2086+
// From the ROCm HSA documentation:
2087+
// Query the UUID of the agent. The value is an Ascii string with a maximum
2088+
// of 21 chars including NUL. The string value consists of two parts: header
2089+
// and body. The header identifies the device type (GPU, CPU, DSP) while the
2090+
// body encodes the UUID as a 16 digit hex string.
2091+
//
2092+
// Agents that do not support UUID will return the string "GPU-XX" or
2093+
// "CPU-XX" or "DSP-XX" depending on their device type.
2094+
char UUID[24] = {0};
2095+
if (auto Err = getDeviceAttr(HSA_AMD_AGENT_INFO_UUID, UUID))
2096+
return Err;
2097+
if (!StringRef(UUID).ends_with("-XX"))
2098+
setDeviceUidFromVendorUid(UUID);
2099+
20862100
// Get the wavefront size.
20872101
uint32_t WavefrontSize = 0;
20882102
if (auto Err = getDeviceAttr(HSA_AGENT_INFO_WAVEFRONT_SIZE, WavefrontSize))

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -791,6 +791,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
791791
/// this id is not unique between different plugins; they may overlap.
792792
int32_t getDeviceId() const { return DeviceId; }
793793

794+
/// Get the unique identifier of the device.
795+
const char *getDeviceUid() const { return DeviceUid.c_str(); }
796+
794797
/// Set the context of the device if needed, before calling device-specific
795798
/// functions. Plugins may implement this function as a no-op if not needed.
796799
virtual Error setContext() = 0;
@@ -989,9 +992,12 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
989992
Error syncEvent(void *EventPtr);
990993
virtual Error syncEventImpl(void *EventPtr) = 0;
991994

995+
/// Obtain information about the device.
996+
Expected<InfoTreeNode> obtainInfo();
997+
virtual Expected<InfoTreeNode> obtainInfoImpl() = 0;
998+
992999
/// Print information about the device.
9931000
Error printInfo();
994-
virtual Expected<InfoTreeNode> obtainInfoImpl() = 0;
9951001

9961002
/// Return true if the device has work that is either queued or currently
9971003
/// running
@@ -1204,6 +1210,14 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
12041210
/// global device id and is not the device id visible to the OpenMP user.
12051211
const int32_t DeviceId;
12061212

1213+
/// The unique identifier of the device.
1214+
/// Per default, the unique identifier of the device is set to the device id,
1215+
/// combined with the plugin name, since the offload device id may overlap
1216+
/// between different plugins.
1217+
std::string DeviceUid;
1218+
/// Construct the device UID from the vendor (U)UID.
1219+
void setDeviceUidFromVendorUid(StringRef VendorUid);
1220+
12071221
/// The default grid values used for this device.
12081222
llvm::omp::GV GridValues;
12091223

@@ -1290,6 +1304,9 @@ struct GenericPluginTy {
12901304
return UserDeviceIds.at(DeviceId);
12911305
}
12921306

1307+
/// Get the UID for the host device.
1308+
static constexpr const char *getHostDeviceUid() { return "HOST"; }
1309+
12931310
/// Get the ELF code to recognize the binary image of this plugin.
12941311
virtual uint16_t getMagicElfBits() const = 0;
12951312

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,10 @@ GenericDeviceTy::GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId,
715715
DeviceId(DeviceId), GridValues(OMPGridValues),
716716
PeerAccesses(NumDevices, PeerAccessState::PENDING), PeerAccessesLock(),
717717
PinnedAllocs(*this), RPCServer(nullptr) {
718+
// Conservative fall-back to the plugin's device uid for the case that no real
719+
// vendor (u)uid will become available later.
720+
setDeviceUidFromVendorUid(std::to_string(static_cast<uint64_t>(DeviceId)));
721+
718722
#ifdef OMPT_SUPPORT
719723
OmptInitialized.store(false);
720724
// Bind the callbacks to this device's member functions
@@ -1524,15 +1528,22 @@ Error GenericDeviceTy::enqueueHostCall(void (*Callback)(void *), void *UserData,
15241528
return Err;
15251529
}
15261530

1531+
Expected<InfoTreeNode> GenericDeviceTy::obtainInfo() {
1532+
auto InfoOrErr = obtainInfoImpl();
1533+
if (InfoOrErr)
1534+
InfoOrErr->add("UID", getDeviceUid(), "", DeviceInfo::UID);
1535+
return InfoOrErr;
1536+
}
1537+
15271538
Error GenericDeviceTy::printInfo() {
1528-
auto Info = obtainInfoImpl();
1539+
auto InfoOrErr = obtainInfo();
15291540

15301541
// Get the vendor-specific info entries describing the device properties.
1531-
if (auto Err = Info.takeError())
1542+
if (auto Err = InfoOrErr.takeError())
15321543
return Err;
15331544

15341545
// Print all info entries.
1535-
Info->print();
1546+
InfoOrErr->print();
15361547

15371548
return Plugin::success();
15381549
}
@@ -1603,6 +1614,10 @@ Expected<bool> GenericDeviceTy::isAccessiblePtr(const void *Ptr, size_t Size) {
16031614
return isAccessiblePtrImpl(Ptr, Size);
16041615
}
16051616

1617+
void GenericDeviceTy::setDeviceUidFromVendorUid(StringRef VendorUid) {
1618+
DeviceUid = std::string(Plugin.getName()) + "-" + std::string(VendorUid);
1619+
}
1620+
16061621
Error GenericPluginTy::init() {
16071622
if (Initialized)
16081623
return Plugin::success();

offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ DLWRAP(cuFuncSetAttribute, 3)
3535

3636
// Device info
3737
DLWRAP(cuDeviceGetName, 3)
38+
DLWRAP(cuDeviceGetUuid, 2)
3839
DLWRAP(cuDeviceTotalMem, 2)
3940
DLWRAP(cuDriverGetVersion, 1)
4041

offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ typedef struct CUfunc_st *CUfunction;
3333
typedef void (*CUhostFn)(void *userData);
3434
typedef struct CUstream_st *CUstream;
3535
typedef struct CUevent_st *CUevent;
36+
typedef struct CUuuid_st {
37+
char bytes[16];
38+
} CUuuid;
3639

3740
#define CU_DEVICE_INVALID ((CUdevice)(-2))
3841

@@ -301,6 +304,7 @@ CUresult cuFuncSetAttribute(CUfunction, CUfunction_attribute, int);
301304

302305
// Device info
303306
CUresult cuDeviceGetName(char *, int, CUdevice);
307+
CUresult cuDeviceGetUuid(CUuuid *, CUdevice);
304308
CUresult cuDeviceTotalMem(size_t *, CUdevice);
305309
CUresult cuDriverGetVersion(int *);
306310

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "PluginInterface.h"
2626
#include "Utils/ELF.h"
2727

28+
#include "llvm/ADT/StringExtras.h"
2829
#include "llvm/BinaryFormat/ELF.h"
2930
#include "llvm/Frontend/OpenMP/OMPConstants.h"
3031
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
@@ -293,6 +294,12 @@ struct CUDADeviceTy : public GenericDeviceTy {
293294
if (auto Err = Plugin::check(Res, "error in cuDeviceGet: %s"))
294295
return Err;
295296

297+
CUuuid UUID = {0};
298+
Res = cuDeviceGetUuid(&UUID, Device);
299+
if (auto Err = Plugin::check(Res, "error in cuDeviceGetUuid: %s"))
300+
return Err;
301+
setDeviceUidFromVendorUid(toHex(UUID.bytes, true));
302+
296303
// Query the current flags of the primary context and set its flags if
297304
// it is inactive.
298305
unsigned int FormerPrimaryCtxFlags = 0;

offload/tools/deviceinfo/llvm-offload-device-info.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ ol_result_t printDevice(std::ostream &S, ol_device_handle_t D) {
176176
printDeviceValue<const char *>(S, D, OL_DEVICE_INFO_NAME, "Name"));
177177
OFFLOAD_ERR(printDeviceValue<const char *>(S, D, OL_DEVICE_INFO_PRODUCT_NAME,
178178
"Product Name"));
179+
OFFLOAD_ERR(printDeviceValue<const char *>(S, D, OL_DEVICE_INFO_UID, "UID"));
179180
OFFLOAD_ERR(
180181
printDeviceValue<ol_device_type_t>(S, D, OL_DEVICE_INFO_TYPE, "Type"));
181182
OFFLOAD_ERR(printDeviceValue<const char *>(

0 commit comments

Comments
 (0)