Skip to content

[SYCL][Fusion][NoSTL] Use custom string-like class to represent kernel name #12393

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions sycl-fusion/common/include/DynArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define SYCL_FUSION_COMMON_DYNARRAY_H

#include <algorithm>
#include <cstring>

namespace jit_compiler {

Expand Down Expand Up @@ -84,6 +85,24 @@ template <typename T> class DynArray {
}
};

///
/// String-like class that owns its character storage.
class DynString {
public:
explicit DynString(const char *Str) : Chars{std::strlen(Str) + 1} {
std::copy(Str, Str + Chars.size(), Chars.begin());
}

const char *c_str() const { return Chars.begin(); }

friend bool operator==(const DynString &A, const char *B) {
return std::strcmp(A.c_str(), B) == 0;
}

private:
DynArray<char> Chars;
};

} // namespace jit_compiler

#endif // SYCL_FUSION_COMMON_DYNARRAY_H
18 changes: 9 additions & 9 deletions sycl-fusion/common/include/Kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ class NDRange {
/// Information about a kernel from DPC++.
struct SYCLKernelInfo {

std::string Name;
DynString Name;

SYCLArgumentDescriptor Args;

Expand All @@ -339,13 +339,12 @@ struct SYCLKernelInfo {

SYCLKernelBinaryInfo BinaryInfo;

SYCLKernelInfo(const std::string &KernelName,
const SYCLArgumentDescriptor &ArgDesc, const NDRange &NDR,
const SYCLKernelBinaryInfo &BinInfo)
: Name{KernelName}, Args{ArgDesc}, Attributes{}, NDR{NDR}, BinaryInfo{
BinInfo} {}
SYCLKernelInfo(const char *KernelName, const SYCLArgumentDescriptor &ArgDesc,
const NDRange &NDR, const SYCLKernelBinaryInfo &BinInfo)
: Name{KernelName}, Args{ArgDesc}, Attributes{}, NDR{NDR},
BinaryInfo{BinInfo} {}

SYCLKernelInfo(const std::string &KernelName, size_t NumArgs)
SYCLKernelInfo(const char *KernelName, size_t NumArgs)
: Name{KernelName}, Args{NumArgs}, Attributes{}, NDR{}, BinaryInfo{} {}
};

Expand All @@ -366,8 +365,9 @@ class SYCLModuleInfo {

SYCLKernelInfo *getKernelFor(const std::string &KernelName) {
auto It =
std::find_if(Kernels.begin(), Kernels.end(),
[&](SYCLKernelInfo &K) { return K.Name == KernelName; });
std::find_if(Kernels.begin(), Kernels.end(), [&](SYCLKernelInfo &K) {
return K.Name == KernelName.c_str();
});
return (It != Kernels.end()) ? &*It : nullptr;
}

Expand Down
5 changes: 2 additions & 3 deletions sycl-fusion/jit-compiler/include/KernelFusion.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,8 @@ class KernelFusion {
public:
static FusionResult fuseKernels(
Config &&JITConfig, const std::vector<SYCLKernelInfo> &KernelInformation,
const std::vector<std::string> &KernelsToFuse,
const std::string &FusedKernelName,
jit_compiler::ParamIdentList &Identities, BarrierFlags BarriersFlags,
const char *FusedKernelName, jit_compiler::ParamIdentList &Identities,
BarrierFlags BarriersFlags,
const std::vector<jit_compiler::ParameterInternalization>
&Internalization,
const std::vector<jit_compiler::JITConstant> &JITConstants);
Expand Down
7 changes: 5 additions & 2 deletions sycl-fusion/jit-compiler/lib/KernelFusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,18 @@ static bool isTargetFormatSupported(BinaryFormat TargetFormat) {

FusionResult KernelFusion::fuseKernels(
Config &&JITConfig, const std::vector<SYCLKernelInfo> &KernelInformation,
const std::vector<std::string> &KernelsToFuse,
const std::string &FusedKernelName, ParamIdentList &Identities,
const char *FusedKernelName, ParamIdentList &Identities,
BarrierFlags BarriersFlags,
const std::vector<jit_compiler::ParameterInternalization> &Internalization,
const std::vector<jit_compiler::JITConstant> &Constants) {
// Initialize the configuration helper to make the options for this invocation
// available (on a per-thread basis).
ConfigHelper::setConfig(std::move(JITConfig));

std::vector<std::string> KernelsToFuse;
llvm::transform(KernelInformation, std::back_inserter(KernelsToFuse),
[](const auto &KI) { return std::string{KI.Name.c_str()}; });

const auto NDRanges = gatherNDRanges(KernelInformation);

if (!isValidCombination(NDRanges)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ static Indices getAttributeValues(MDNode *MD) {
/// - reqd_work_group_size
/// - work_group_size_hint
static void restoreKernelAttributes(Module *Mod, SYCLKernelInfo &Info) {
auto *KernelFunction = Mod->getFunction(Info.Name);
auto *KernelFunction = Mod->getFunction(Info.Name.c_str());
assert(KernelFunction && "Kernel function not present in module");
SmallVector<SYCLKernelAttribute, 2> Attrs;
using AttrKind = SYCLKernelAttribute::AttrKind;
Expand Down Expand Up @@ -156,7 +156,7 @@ KernelTranslator::loadLLVMKernel(llvm::LLVMContext &LLVMCtx,
llvm::StringRef RawData(reinterpret_cast<const char *>(BinInfo.BinaryStart),
BinInfo.BinarySize);
return llvm::parseBitcodeFile(
MemoryBuffer::getMemBuffer(RawData, Kernel.Name,
MemoryBuffer::getMemBuffer(RawData, Kernel.Name.c_str(),
/* RequiresNullTermnator*/ false)
->getMemBufferRef(),
LLVMCtx);
Expand Down Expand Up @@ -259,7 +259,7 @@ KernelTranslator::translateToPTX(SYCLKernelInfo &KernelInfo, llvm::Module &Mod,

llvm::StringRef TargetCPU{"sm_50"};
llvm::StringRef TargetFeatures{"+sm_50,+ptx76"};
if (auto *KernelFunc = Mod.getFunction(KernelInfo.Name)) {
if (auto *KernelFunc = Mod.getFunction(KernelInfo.Name.c_str())) {
if (KernelFunc->hasFnAttribute(TARGET_CPU_ATTRIBUTE)) {
TargetCPU =
KernelFunc->getFnAttribute(TARGET_CPU_ATTRIBUTE).getValueAsString();
Expand Down Expand Up @@ -333,7 +333,7 @@ KernelTranslator::translateToAMDGCN(SYCLKernelInfo &KernelInfo,
// "Build DPC++ toolchain with support for HIP AMD"
llvm::StringRef TargetCPU{"gfx906"};
llvm::StringRef TargetFeatures{""};
if (auto *KernelFunc = Mod.getFunction(KernelInfo.Name)) {
if (auto *KernelFunc = Mod.getFunction(KernelInfo.Name.c_str())) {
if (KernelFunc->hasFnAttribute(TARGET_CPU_ATTRIBUTE)) {
TargetCPU =
KernelFunc->getFnAttribute(TARGET_CPU_ATTRIBUTE).getValueAsString();
Expand Down
8 changes: 4 additions & 4 deletions sycl-fusion/passes/kernel-fusion/SYCLKernelFusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ Error SYCLKernelFusion::fuseKernel(
}
}
// Function name for the fused kernel.
StringRef FusedKernelName = KernelName->getString();
auto FusedKernelName = KernelName->getString().str();
// ND-range for the fused kernel.
const auto NDRange = getNDFromMD(NDRangeMD);

Expand Down Expand Up @@ -440,9 +440,9 @@ Error SYCLKernelFusion::fuseKernel(
}

// Add the information about the new kernel to the SYCLModuleInfo.
if (!ModInfo->hasKernelFor(FusedKernelName.str())) {
if (!ModInfo->hasKernelFor(FusedKernelName)) {
assert(FusedParamKinds.size() == FusedArgUsageMask.size());
jit_compiler::SYCLKernelInfo KI{FusedKernelName.str(),
jit_compiler::SYCLKernelInfo KI{FusedKernelName.c_str(),
FusedParamKinds.size()};
KI.Attributes = KernelAttributeList{FusedAttributes.size()};
llvm::copy(FusedParamKinds, KI.Args.Kinds.begin());
Expand All @@ -451,7 +451,7 @@ Error SYCLKernelFusion::fuseKernel(
ModInfo->addKernel(KI);
}
jit_compiler::SYCLKernelInfo &FusedKernelInfo =
*ModInfo->getKernelFor(FusedKernelName.str());
*ModInfo->getKernelFor(FusedKernelName);

// Check that no function with the desired name is already present in the
// module. LLVM would still be able to insert the function (adding a suffix to
Expand Down
4 changes: 2 additions & 2 deletions sycl-fusion/passes/kernel-info/SYCLKernelInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ void SYCLModuleInfoAnalysis::loadModuleInfoFromMetadata(Module &M) {
++It;

assert(ArgsKindsMD->getNumOperands() == ArgsUsageMaskMD->getNumOperands());
SYCLKernelInfo KernelInfo{Name, ArgsKindsMD->getNumOperands()};
SYCLKernelInfo KernelInfo{Name.c_str(), ArgsKindsMD->getNumOperands()};

llvm::transform(
ArgsKindsMD->operands(), KernelInfo.Args.Kinds.begin(),
Expand Down Expand Up @@ -134,7 +134,7 @@ PreservedAnalyses SYCLModuleInfoPrinter::run(Module &Mod,
for (const auto &KernelInfo : ModuleInfo->kernels()) {
Out << "KernelName:";
Out.PadToColumn(Pad);
Out << KernelInfo.Name << '\n';
Out << KernelInfo.Name.c_str() << '\n';

Out.indent(Indent) << "Args:\n";
Out.indent(Indent * 2) << "Kinds:";
Expand Down
26 changes: 12 additions & 14 deletions sycl/source/detail/jit_compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -756,9 +756,8 @@ jit_compiler::fuseKernels(QueueImplPtr Queue,
SYCLTypeToIndices(CurrentNDR.GlobalOffset)};

Ranges.push_back(JITCompilerNDR);
InputKernelInfo.emplace_back(KernelName, ArgDescriptor, JITCompilerNDR,
BinInfo);
InputKernelNames.push_back(KernelName);
InputKernelInfo.emplace_back(KernelName.c_str(), ArgDescriptor,
JITCompilerNDR, BinInfo);

// Collect information for the fused kernel

Expand Down Expand Up @@ -823,8 +822,7 @@ jit_compiler::fuseKernels(QueueImplPtr Queue,
: ::jit_compiler::getLocalAndGlobalBarrierFlag();

static size_t FusedKernelNameIndex = 0;
std::stringstream FusedKernelName;
FusedKernelName << "fused_" << FusedKernelNameIndex++;
auto FusedKernelName = "fused_" + std::to_string(FusedKernelNameIndex++);
::jit_compiler::Config JITConfig;
bool DebugEnabled =
detail::SYCLConfig<detail::SYCL_RT_WARNING_LEVEL>::get() > 0;
Expand All @@ -837,9 +835,8 @@ jit_compiler::fuseKernels(QueueImplPtr Queue,
JITConfig.set<::jit_compiler::option::JITTargetInfo>(TargetInfo);

auto FusionResult = ::jit_compiler::KernelFusion::fuseKernels(
std::move(JITConfig), InputKernelInfo, InputKernelNames,
FusedKernelName.str(), ParamIdentities, BarrierFlags, InternalizeParams,
JITConstants);
std::move(JITConfig), InputKernelInfo, FusedKernelName.c_str(),
ParamIdentities, BarrierFlags, InternalizeParams, JITConstants);

if (FusionResult.failed()) {
if (DebugEnabled) {
Expand All @@ -851,6 +848,7 @@ jit_compiler::fuseKernels(QueueImplPtr Queue,
}

auto &FusedKernelInfo = FusionResult.getKernelInfo();
std::string FusedOrCachedKernelName{FusedKernelInfo.Name.c_str()};

std::vector<ArgDesc> FusedArgs;
int FusedArgIndex = 0;
Expand Down Expand Up @@ -887,7 +885,7 @@ jit_compiler::fuseKernels(QueueImplPtr Queue,
// Create a kernel bundle for the fused kernel.
// Kernel bundles are stored in the CG as one of the "extended" members.
auto FusedKernelId = detail::ProgramManager::getInstance().getSYCLKernelID(
FusedKernelInfo.Name);
FusedOrCachedKernelName);

std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImplPtr;
if (TargetFormat == ::jit_compiler::BinaryFormat::SPIRV) {
Expand All @@ -898,7 +896,7 @@ jit_compiler::fuseKernels(QueueImplPtr Queue,
std::unique_ptr<detail::CG> FusedCG;
FusedCG.reset(new detail::CGExecKernel(
NDRDesc, nullptr, nullptr, std::move(KernelBundleImplPtr),
std::move(CGData), std::move(FusedArgs), FusedKernelInfo.Name, {}, {},
std::move(CGData), std::move(FusedArgs), FusedOrCachedKernelName, {}, {},
CG::CGTYPE::Kernel, KernelCacheConfig));
return FusedCG;
}
Expand Down Expand Up @@ -932,18 +930,18 @@ pi_device_binaries jit_compiler::createPIDeviceBinary(
"Invalid output format");
}

std::string FusedKernelName{FusedKernelInfo.Name.c_str()};
DeviceBinaryContainer Binary;

// Create an offload entry for the fused kernel.
// It seems to be OK to set zero for most of the information here, at least
// that is the case for compiled SPIR-V binaries.
OffloadEntryContainer Entry{FusedKernelInfo.Name, nullptr, 0, 0, 0};
OffloadEntryContainer Entry{FusedKernelName, nullptr, 0, 0, 0};
Binary.addOffloadEntry(std::move(Entry));

// Create a property entry for the argument usage mask for the fused kernel.
auto ArgMask = encodeArgUsageMask(FusedKernelInfo.Args.UsageMask);
PropertyContainer ArgMaskProp{FusedKernelInfo.Name, ArgMask.data(),
ArgMask.size(),
PropertyContainer ArgMaskProp{FusedKernelName, ArgMask.data(), ArgMask.size(),
pi_property_type::PI_PROPERTY_TYPE_BYTE_ARRAY};

// Create a property set for the argument usage masks of all kernels
Expand All @@ -968,7 +966,7 @@ pi_device_binaries jit_compiler::createPIDeviceBinary(
if (ReqdWGS != FusedKernelInfo.Attributes.end()) {
auto Encoded = encodeReqdWorkGroupSize(*ReqdWGS);
std::stringstream PropName;
PropName << FusedKernelInfo.Name;
PropName << FusedKernelInfo.Name.c_str();
PropName << __SYCL_PI_PROGRAM_METADATA_TAG_REQD_WORK_GROUP_SIZE;
PropertyContainer ReqdWorkGroupSizeProp{
PropName.str(), Encoded.data(), Encoded.size(),
Expand Down