Skip to content

[llvm][offload] Move AMDGPU offload utilities to LLVM #102487

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions llvm/include/llvm/Frontend/Offloading/Utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,14 @@
#ifndef LLVM_FRONTEND_OFFLOADING_UTILITY_H
#define LLVM_FRONTEND_OFFLOADING_UTILITY_H

#include <cstdint>

#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Module.h"
#include "llvm/Object/OffloadBinary.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBufferRef.h"

namespace llvm {
namespace offloading {
Expand Down Expand Up @@ -73,6 +79,60 @@ getOffloadingEntryInitializer(Module &M, Constant *Addr, StringRef Name,
std::pair<GlobalVariable *, GlobalVariable *>
getOffloadEntryArray(Module &M, StringRef SectionName);

namespace amdgpu {
/// Check if an image is compatible with current system's environment. The
/// system environment is given as a 'target-id' which has the form:
///
/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
///
/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
/// and is compatible with either '+' or '-'. The HSA runtime returns this
/// information using the target-id, while we use the ELF header to determine
/// these features.
bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
StringRef EnvTargetID);

/// Struct for holding metadata related to AMDGPU kernels, for more information
/// about the metadata and its meaning see:
/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3
struct AMDGPUKernelMetaData {
/// Constant indicating that a value is invalid.
static constexpr uint32_t KInvalidValue =
std::numeric_limits<uint32_t>::max();
/// The amount of group segment memory required by a work-group in bytes.
uint32_t GroupSegmentList = KInvalidValue;
/// The amount of fixed private address space memory required for a work-item
/// in bytes.
uint32_t PrivateSegmentSize = KInvalidValue;
/// Number of scalar registers required by a wavefront.
uint32_t SGPRCount = KInvalidValue;
/// Number of vector registers required by each work-item.
uint32_t VGPRCount = KInvalidValue;
/// Number of stores from a scalar register to a register allocator created
/// spill location.
uint32_t SGPRSpillCount = KInvalidValue;
/// Number of stores from a vector register to a register allocator created
/// spill location.
uint32_t VGPRSpillCount = KInvalidValue;
/// Number of accumulator registers required by each work-item.
uint32_t AGPRCount = KInvalidValue;
/// Corresponds to the OpenCL reqd_work_group_size attribute.
uint32_t RequestedWorkgroupSize[3] = {KInvalidValue, KInvalidValue,
KInvalidValue};
/// Corresponds to the OpenCL work_group_size_hint attribute.
uint32_t WorkgroupSizeHint[3] = {KInvalidValue, KInvalidValue, KInvalidValue};
/// Wavefront size.
uint32_t WavefrontSize = KInvalidValue;
/// Maximum flat work-group size supported by the kernel in work-items.
uint32_t MaxFlatWorkgroupSize = KInvalidValue;
};

/// Reads AMDGPU specific metadata from the ELF file and propagates the
/// KernelInfoMap.
Error getAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
StringMap<AMDGPUKernelMetaData> &KernelInfoMap,
uint16_t &ELFABIVersion);
} // namespace amdgpu
} // namespace offloading
} // namespace llvm

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Frontend/Offloading/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ add_llvm_component_library(LLVMFrontendOffloading
LINK_COMPONENTS
Core
BinaryFormat
Object
Support
TransformUtils
TargetParser
Expand Down
232 changes: 232 additions & 0 deletions llvm/lib/Frontend/Offloading/Utility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,16 @@
//===----------------------------------------------------------------------===//

#include "llvm/Frontend/Offloading/Utility.h"
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Value.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"

using namespace llvm;
Expand Down Expand Up @@ -126,3 +132,229 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {

return std::make_pair(EntriesB, EntriesE);
}

bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
uint32_t ImageFlags,
StringRef EnvTargetID) {
using namespace llvm::ELF;
StringRef EnvArch = EnvTargetID.split(":").first;

// Trivial check if the base processors match.
if (EnvArch != ImageArch)
return false;

// Check if the image is requesting xnack on or off.
switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
// The image is 'xnack-' so the environment must be 'xnack-'.
if (!EnvTargetID.contains("xnack-"))
return false;
break;
case EF_AMDGPU_FEATURE_XNACK_ON_V4:
// The image is 'xnack+' so the environment must be 'xnack+'.
if (!EnvTargetID.contains("xnack+"))
return false;
break;
case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
default:
break;
}

// Check if the image is requesting sramecc on or off.
switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
// The image is 'sramecc-' so the environment must be 'sramecc-'.
if (!EnvTargetID.contains("sramecc-"))
return false;
break;
case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
// The image is 'sramecc+' so the environment must be 'sramecc+'.
if (!EnvTargetID.contains("sramecc+"))
return false;
break;
case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
break;
}

return true;
}

namespace {
/// Reads the AMDGPU specific per-kernel-metadata from an image.
class KernelInfoReader {
public:
KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
: KernelInfoMap(KIM) {}

/// Process ELF note to read AMDGPU metadata from respective information
/// fields.
Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
if (Note.getName() != "AMDGPU")
return Error::success(); // We are not interested in other things

assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
"Parse AMDGPU MetaData");
auto Desc = Note.getDesc(Align);
StringRef MsgPackString =
StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
msgpack::Document MsgPackDoc;
if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
return Error::success();

AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
if (!Verifier.verify(MsgPackDoc.getRoot()))
return Error::success();

auto RootMap = MsgPackDoc.getRoot().getMap(true);

if (auto Err = iterateAMDKernels(RootMap))
return Err;

return Error::success();
}

private:
/// Extracts the relevant information via simple string look-up in the msgpack
/// document elements.
Error
extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
std::string &KernelName,
offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
if (!V.first.isString())
return Error::success();

const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
return DK.getString() == SK;
};

const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
uint32_t *Vals) {
assert(DN.isArray() && "MsgPack DocNode is an array node");
auto DNA = DN.getArray();
assert(DNA.size() == 3 && "ArrayNode has at most three elements");

int I = 0;
for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
++DNABegin) {
Vals[I++] = DNABegin->getUInt();
}
};

if (IsKey(V.first, ".name")) {
KernelName = V.second.toString();
} else if (IsKey(V.first, ".sgpr_count")) {
KernelData.SGPRCount = V.second.getUInt();
} else if (IsKey(V.first, ".sgpr_spill_count")) {
KernelData.SGPRSpillCount = V.second.getUInt();
} else if (IsKey(V.first, ".vgpr_count")) {
KernelData.VGPRCount = V.second.getUInt();
} else if (IsKey(V.first, ".vgpr_spill_count")) {
KernelData.VGPRSpillCount = V.second.getUInt();
} else if (IsKey(V.first, ".agpr_count")) {
KernelData.AGPRCount = V.second.getUInt();
} else if (IsKey(V.first, ".private_segment_fixed_size")) {
KernelData.PrivateSegmentSize = V.second.getUInt();
} else if (IsKey(V.first, ".group_segment_fixed_size")) {
KernelData.GroupSegmentList = V.second.getUInt();
} else if (IsKey(V.first, ".reqd_workgroup_size")) {
GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
} else if (IsKey(V.first, ".workgroup_size_hint")) {
GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
} else if (IsKey(V.first, ".wavefront_size")) {
KernelData.WavefrontSize = V.second.getUInt();
} else if (IsKey(V.first, ".max_flat_workgroup_size")) {
KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
}

return Error::success();
}

/// Get the "amdhsa.kernels" element from the msgpack Document
Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
auto Res = MDN.find("amdhsa.kernels");
if (Res == MDN.end())
return createStringError(inconvertibleErrorCode(),
"Could not find amdhsa.kernels key");

auto Pair = *Res;
assert(Pair.second.isArray() &&
"AMDGPU kernel entries are arrays of entries");

return Pair.second.getArray();
}

/// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
/// MapDocNode that either maps a string to a single value (most of them) or
/// to another array of things. Currently, we only handle the case that maps
/// to scalar value.
Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
offloading::amdgpu::AMDGPUKernelMetaData KernelData;
std::string KernelName;
auto Entry = (*It).getMap();
for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
if (auto Err = extractKernelData(*MI, KernelName, KernelData))
return Err;

KernelInfoMap.insert({KernelName, KernelData});
return Error::success();
}

/// Go over the list of AMD kernels in the "amdhsa.kernels" entry
Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
auto KernelsOrErr = getAMDKernelsArray(MDN);
if (auto Err = KernelsOrErr.takeError())
return Err;

auto KernelsArr = *KernelsOrErr;
for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
if (!It->isMap())
continue; // we expect <key,value> pairs

// Obtain the value for the different entries. Each array entry is a
// MapDocNode
if (auto Err = generateKernelInfo(It))
return Err;
}
return Error::success();
}

// Kernel names are the keys
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
};
} // namespace

Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
MemoryBufferRef MemBuffer,
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
uint16_t &ELFABIVersion) {
Error Err = Error::success(); // Used later as out-parameter

auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
if (auto Err = ELFOrError.takeError())
return Err;

const object::ELF64LEFile ELFObj = ELFOrError.get();
Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
if (!Sections)
return Sections.takeError();
KernelInfoReader Reader(KernelInfoMap);

// Read the code object version from ELF image header
auto Header = ELFObj.getHeader();
ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
for (const auto &S : *Sections) {
if (S.sh_type != ELF::SHT_NOTE)
continue;

for (const auto N : ELFObj.notes(S, Err)) {
if (Err)
return Err;
// Fills the KernelInfoTabel entries in the reader
if ((Err = Reader.processNote(N, S.sh_addralign)))
return Err;
}
}
return Error::success();
}
3 changes: 2 additions & 1 deletion offload/plugins-nextgen/amdgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@ target_include_directories(omptarget.rtl.amdgpu PRIVATE

if(hsa-runtime64_FOUND AND NOT "amdgpu" IN_LIST LIBOMPTARGET_DLOPEN_PLUGINS)
message(STATUS "Building AMDGPU plugin linked against libhsa")
target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64)
target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64 LLVMFrontendOffloading)
else()
message(STATUS "Building AMDGPU plugin for dlopened libhsa")
target_include_directories(omptarget.rtl.amdgpu PRIVATE dynamic_hsa)
target_sources(omptarget.rtl.amdgpu PRIVATE dynamic_hsa/hsa.cpp)
target_link_libraries(omptarget.rtl.amdgpu PRIVATE LLVMFrontendOffloading)
endif()

# Configure testing for the AMDGPU plugin. We will build tests if we could a
Expand Down
12 changes: 6 additions & 6 deletions offload/plugins-nextgen/amdgpu/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
findDeviceSymbol(GenericDeviceTy &Device, StringRef SymbolName) const;

/// Get additional info for kernel, e.g., register spill counts
std::optional<utils::KernelMetaDataTy>
std::optional<offloading::amdgpu::AMDGPUKernelMetaData>
getKernelInfo(StringRef Identifier) const {
auto It = KernelInfoMap.find(Identifier);

Expand All @@ -499,7 +499,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
/// The exectuable loaded on the agent.
hsa_executable_t Executable;
hsa_code_object_t CodeObject;
StringMap<utils::KernelMetaDataTy> KernelInfoMap;
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfoMap;
uint16_t ELFABIVersion;
};

Expand Down Expand Up @@ -600,7 +600,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
uint32_t ImplicitArgsSize;

/// Additional Info for the AMD GPU Kernel
std::optional<utils::KernelMetaDataTy> KernelInfo;
std::optional<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfo;
};

/// Class representing an HSA signal. Signals are used to define dependencies
Expand Down Expand Up @@ -3188,9 +3188,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
if (!TargeTripleAndFeaturesOrError)
return TargeTripleAndFeaturesOrError.takeError();
return utils::isImageCompatibleWithEnv(Processor ? *Processor : "",
ElfOrErr->getPlatformFlags(),
*TargeTripleAndFeaturesOrError);
return offloading::amdgpu::isImageCompatibleWithEnv(
Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
*TargeTripleAndFeaturesOrError);
}

bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {
Expand Down
Loading
Loading