Skip to content

Commit

Permalink
Revert "[PGO][OpenMP] Instrumentation for GPU devices (#76587)"
Browse files Browse the repository at this point in the history
This reverts commit 5fd2af3. It caused build issues and broke the buildbot.
  • Loading branch information
EthanLuisMcDonough committed Jun 28, 2024
1 parent 808e0f1 commit 2c8b912
Show file tree
Hide file tree
Showing 16 changed files with 27 additions and 358 deletions.
13 changes: 4 additions & 9 deletions clang/lib/CodeGen/CodeGenPGO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1193,15 +1193,10 @@ void CodeGenPGO::emitCounterSetOrIncrement(CGBuilderTy &Builder, const Stmt *S,

unsigned Counter = (*RegionCounterMap)[S];

// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
auto *NormalizedFuncNameVarPtr =
llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0));

llvm::Value *Args[] = {
NormalizedFuncNameVarPtr, Builder.getInt64(FunctionHash),
Builder.getInt32(NumRegionCounters), Builder.getInt32(Counter), StepV};
llvm::Value *Args[] = {FuncNameVar,
Builder.getInt64(FunctionHash),
Builder.getInt32(NumRegionCounters),
Builder.getInt32(Counter), StepV};

if (llvm::EnableSingleByteCoverage)
Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_cover),
Expand Down
3 changes: 0 additions & 3 deletions llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
Original file line number Diff line number Diff line change
Expand Up @@ -502,9 +502,6 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,)
__OMP_RTL(__kmpc_syncwarp, false, Void, Int64)

__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr)
__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64)

__OMP_RTL(__last, false, Void, )

#undef __OMP_RTL
Expand Down
4 changes: 0 additions & 4 deletions llvm/include/llvm/ProfileData/InstrProf.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,6 @@ inline StringRef getInstrProfCounterBiasVarName() {
/// Return the marker used to separate PGO names during serialization.
inline StringRef getInstrProfNameSeparator() { return "\01"; }

/// Determines whether module targets a GPU eligable for PGO
/// instrumentation
bool isGPUProfTarget(const Module &M);

/// Please use getIRPGOFuncName for LLVM IR instrumentation. This function is
/// for front-end (Clang, etc) instrumentation.
/// Return the modified name for function \c F suitable to be
Expand Down
25 changes: 5 additions & 20 deletions llvm/lib/ProfileData/InstrProf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -432,31 +432,13 @@ std::string getPGOFuncNameVarName(StringRef FuncName,
return VarName;
}

bool isGPUProfTarget(const Module &M) {
const auto &T = Triple(M.getTargetTriple());
return T.isAMDGPU() || T.isNVPTX();
}

void setPGOFuncVisibility(Module &M, GlobalVariable *FuncNameVar) {
// If the target is a GPU, make the symbol protected so it can
// be read from the host device
if (isGPUProfTarget(M))
FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility);
// Hide the symbol so that we correctly get a copy for each executable.
else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);
}

GlobalVariable *createPGOFuncNameVar(Module &M,
GlobalValue::LinkageTypes Linkage,
StringRef PGOFuncName) {
// Ensure profiling variables on GPU are visible to be read from host
if (isGPUProfTarget(M))
Linkage = GlobalValue::ExternalLinkage;
// We generally want to match the function's linkage, but available_externally
// and extern_weak both have the wrong semantics, and anything that doesn't
// need to link across compilation units doesn't need to be visible at all.
else if (Linkage == GlobalValue::ExternalWeakLinkage)
if (Linkage == GlobalValue::ExternalWeakLinkage)
Linkage = GlobalValue::LinkOnceAnyLinkage;
else if (Linkage == GlobalValue::AvailableExternallyLinkage)
Linkage = GlobalValue::LinkOnceODRLinkage;
Expand All @@ -470,7 +452,10 @@ GlobalVariable *createPGOFuncNameVar(Module &M,
new GlobalVariable(M, Value->getType(), true, Linkage, Value,
getPGOFuncNameVarName(PGOFuncName, Linkage));

setPGOFuncVisibility(M, FuncNameVar);
// Hide the symbol so that we correctly get a copy for each executable.
if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);

return FuncNameVar;
}

Expand Down
44 changes: 10 additions & 34 deletions llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -879,8 +879,6 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
llvm::InstrProfValueKind::IPVK_MemOPSize);
CallInst *Call = nullptr;
auto *TLI = &GetTLI(*Ind->getFunction());
auto *NormalizedDataVarPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
DataVar, PointerType::get(M.getContext(), 0));

// To support value profiling calls within Windows exception handlers, funclet
// information contained within operand bundles needs to be copied over to
Expand All @@ -889,13 +887,11 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
SmallVector<OperandBundleDef, 1> OpBundles;
Ind->getOperandBundlesAsDefs(OpBundles);
if (!IsMemOpSize) {
Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
Builder.getInt32(Index)};
Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
Call = Builder.CreateCall(getOrInsertValueProfilingCall(M, *TLI), Args,
OpBundles);
} else {
Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
Builder.getInt32(Index)};
Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
Call = Builder.CreateCall(
getOrInsertValueProfilingCall(M, *TLI, ValueProfilingCallType::MemOp),
Args, OpBundles);
Expand Down Expand Up @@ -1620,8 +1616,7 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
ValuesVar->setAlignment(Align(8));
maybeSetComdat(ValuesVar, Fn, CntsVarName);
ValuesPtrExpr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
ValuesVar, PointerType::get(Fn->getContext(), 0));
ValuesPtrExpr = ValuesVar;
}

uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
Expand All @@ -1645,10 +1640,6 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);

if (isGPUProfTarget(M)) {
Linkage = GlobalValue::ExternalLinkage;
Visibility = GlobalValue::ProtectedVisibility;
}
// If the data variable is not referenced by code (if we don't emit
// @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
// data variable live under linker GC, the data variable can be private. This
Expand All @@ -1660,9 +1651,9 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
// If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
// that other copies must have the same CFG and cannot have value profiling.
// If no hash suffix, other profd copies may be referenced by code.
else if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
(TT.isOSBinFormatELF() ||
(!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
(TT.isOSBinFormatELF() ||
(!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
Linkage = GlobalValue::PrivateLinkage;
Visibility = GlobalValue::DefaultVisibility;
}
Expand Down Expand Up @@ -1785,13 +1776,6 @@ void InstrLowerer::emitNameData() {
NamesVar = new GlobalVariable(M, NamesVal->getType(), true,
GlobalValue::PrivateLinkage, NamesVal,
getInstrProfNamesVarName());

// Make names variable public if current target is a GPU
if (isGPUProfTarget(M)) {
NamesVar->setLinkage(GlobalValue::ExternalLinkage);
NamesVar->setVisibility(GlobalValue::VisibilityTypes::ProtectedVisibility);
}

NamesSize = CompressedNameStr.size();
setGlobalVariableLargeSection(TT, *NamesVar);
NamesVar->setSection(
Expand Down Expand Up @@ -1858,13 +1842,10 @@ void InstrLowerer::emitRegistration() {
IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", RegisterF));
for (Value *Data : CompilerUsedVars)
if (!isa<Function>(Data))
// Check for addrspace cast when profiling GPU
IRB.CreateCall(RuntimeRegisterF,
IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy));
IRB.CreateCall(RuntimeRegisterF, Data);
for (Value *Data : UsedVars)
if (Data != NamesVar && !isa<Function>(Data))
IRB.CreateCall(RuntimeRegisterF,
IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy));
IRB.CreateCall(RuntimeRegisterF, Data);

if (NamesVar) {
Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
Expand All @@ -1873,9 +1854,7 @@ void InstrLowerer::emitRegistration() {
auto *NamesRegisterF =
Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
getInstrProfNamesRegFuncName(), M);
IRB.CreateCall(NamesRegisterF, {IRB.CreatePointerBitCastOrAddrSpaceCast(
NamesVar, VoidPtrTy),
IRB.getInt64(NamesSize)});
IRB.CreateCall(NamesRegisterF, {NamesVar, IRB.getInt64(NamesSize)});
}

IRB.CreateRetVoid();
Expand All @@ -1896,10 +1875,7 @@ bool InstrLowerer::emitRuntimeHook() {
auto *Var =
new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
nullptr, getInstrProfRuntimeHookVarName());
if (isGPUProfTarget(M))
Var->setVisibility(GlobalValue::ProtectedVisibility);
else
Var->setVisibility(GlobalValue::HiddenVisibility);
Var->setVisibility(GlobalValue::HiddenVisibility);

if (TT.isOSBinFormatELF() && !TT.isPS()) {
// Mark the user variable as used so that it isn't stripped out.
Expand Down
24 changes: 6 additions & 18 deletions llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -883,18 +883,14 @@ static void instrumentOneFunc(
auto Name = FuncInfo.FuncNameVar;
auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
FuncInfo.FunctionHash);
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
Name, PointerType::get(M->getContext(), 0));
if (PGOFunctionEntryCoverage) {
auto &EntryBB = F.getEntryBlock();
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
// llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
// i32 <index>)
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
{NormalizedNamePtr, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
{Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
return;
}

Expand Down Expand Up @@ -949,8 +945,7 @@ static void instrumentOneFunc(
// i32 <index>)
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp),
{NormalizedNamePtr, CFGHash, Builder.getInt32(NumCounters),
Builder.getInt32(I)});
{Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});
I += PGOBlockCoverage ? 8 : 1;
}

Expand All @@ -964,8 +959,7 @@ static void instrumentOneFunc(
Intrinsic::getDeclaration(M, PGOBlockCoverage
? Intrinsic::instrprof_cover
: Intrinsic::instrprof_increment),
{NormalizedNamePtr, CFGHash, Builder.getInt32(NumCounters),
Builder.getInt32(I++)});
{Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
}

// Now instrument select instructions:
Expand Down Expand Up @@ -1008,14 +1002,11 @@ static void instrumentOneFunc(
ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
assert(ToProfile && "value profiling Value is of unexpected type");

auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
Name, PointerType::get(M->getContext(), 0));

SmallVector<OperandBundleDef, 1> OpBundles;
populateEHOperandBundle(Cand, BlockColors, OpBundles);
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
{NormalizedNamePtr, Builder.getInt64(FuncInfo.FunctionHash),
{FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash),
ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
OpBundles);
}
Expand Down Expand Up @@ -1690,13 +1681,10 @@ void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
IRBuilder<> Builder(&SI);
Type *Int64Ty = Builder.getInt64Ty();
auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
auto *NormalizedFuncNameVarPtr =
ConstantExpr::getPointerBitCastOrAddrSpaceCast(
FuncNameVar, PointerType::get(M->getContext(), 0));
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
{NormalizedFuncNameVarPtr, Builder.getInt64(FuncHash),
Builder.getInt32(TotalNumCtrs), Builder.getInt32(*CurCtrIdx), Step});
{FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
Builder.getInt32(*CurCtrIdx), Step});
++(*CurCtrIdx);
}

Expand Down
2 changes: 0 additions & 2 deletions offload/DeviceRTL/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ set(include_files
${include_directory}/Interface.h
${include_directory}/LibC.h
${include_directory}/Mapping.h
${include_directory}/Profiling.h
${include_directory}/State.h
${include_directory}/Synchronization.h
${include_directory}/Types.h
Expand All @@ -93,7 +92,6 @@ set(src_files
${source_directory}/Mapping.cpp
${source_directory}/Misc.cpp
${source_directory}/Parallelism.cpp
${source_directory}/Profiling.cpp
${source_directory}/Reduction.cpp
${source_directory}/State.cpp
${source_directory}/Synchronization.cpp
Expand Down
21 changes: 0 additions & 21 deletions offload/DeviceRTL/include/Profiling.h

This file was deleted.

22 changes: 0 additions & 22 deletions offload/DeviceRTL/src/Profiling.cpp

This file was deleted.

29 changes: 1 addition & 28 deletions offload/plugins-nextgen/common/include/GlobalHandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,10 @@
#ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
#define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H

#include <type_traits>
#include <string>

#include "llvm/ADT/DenseMap.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/ProfileData/InstrProf.h"

#include "Shared/Debug.h"
#include "Shared/Utils.h"
Expand Down Expand Up @@ -56,23 +55,6 @@ class GlobalTy {
void setPtr(void *P) { Ptr = P; }
};

using IntPtrT = void *;
struct __llvm_profile_data {
#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
std::remove_const<Type>::type Name;
#include "llvm/ProfileData/InstrProfData.inc"
};

/// PGO profiling data extracted from a GPU device
struct GPUProfGlobals {
SmallVector<uint8_t> NamesData;
SmallVector<SmallVector<int64_t>> Counts;
SmallVector<__llvm_profile_data> Data;
Triple TargetTriple;

void dump() const;
};

/// Subclass of GlobalTy that holds the memory for a global of \p Ty.
template <typename Ty> class StaticGlobalTy : public GlobalTy {
Ty Data;
Expand Down Expand Up @@ -182,15 +164,6 @@ class GenericGlobalHandlerTy {
return moveGlobalBetweenDeviceAndHost(Device, Image, HostGlobal,
/*D2H=*/false);
}

/// Checks whether a given image contains profiling globals.
bool hasProfilingGlobals(GenericDeviceTy &Device, DeviceImageTy &Image);

/// Reads profiling data from a GPU image to supplied profdata struct.
/// Iterates through the image symbol table and stores global values
/// with profiling prefixes.
Expected<GPUProfGlobals> readProfilingGlobals(GenericDeviceTy &Device,
DeviceImageTy &Image);
};

} // namespace plugin
Expand Down
Loading

0 comments on commit 2c8b912

Please sign in to comment.