Skip to content

SWDEV-102314 "[OCL-LC-ROCm] AMD lower kernel calls" #35

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 2, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion lib/Target/AMDGPU/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ FunctionPass *createAMDGPUPromoteAlloca(const TargetMachine *TM = nullptr);
void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
extern char &AMDGPUPromoteAllocaID;

Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUISelDag(TargetMachine &TM,
CodeGenOpt::Level OptLevel);
ModulePass *createAMDGPUAlwaysInlinePass();
Expand All @@ -100,6 +99,10 @@ ModulePass *createAMDGPUOCL12AdapterPass();
void initializeAMDGPUOCL12AdapterPass(PassRegistry&);
extern char &AMDGPUOCL12AdapterID;

ModulePass *createAMDGPULowerKernelCallsPass();
void initializeAMDGPULowerKernelCallsPass(PassRegistry&);
extern char &AMDGPULowerKernelCallsID;

ModulePass *createAMDGPUPrintfRuntimeBinding();
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&);
extern char &AMDGPUPrintfRuntimeBindingID;
Expand Down
99 changes: 99 additions & 0 deletions lib/Target/AMDGPU/AMDGPULowerKernelCalls.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
//===-- AMDGPULowerKernelCalls.cpp - Fix kernel-calling-kernel in HSAIL ------===//
//
// \file
//
// \brief replace calls to OpenCL kernels with equivalent non-kernel
// functions
//
// In OpenCL, a kernel may call another kernel as if it was a
// non-kernel function. But in HSAIL, such a call is not allowed. To
// fix this, we copy the body of kernel A into a new non-kernel
// function fA, if we encounter a call to A. All calls to A are then
// transferred to fA.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/Cloning.h"

using namespace llvm;

namespace {
class AMDGPULowerKernelCalls : public ModulePass {
public:
static char ID;
explicit AMDGPULowerKernelCalls();

private:
bool runOnModule(Module &M) override;
};
} // end anonymous namespace

char AMDGPULowerKernelCalls::ID = 0;

namespace llvm {
void initializeAMDGPULowerKernelCallsPass(PassRegistry &);

ModulePass *createAMDGPULowerKernelCallsPass() {
return new AMDGPULowerKernelCalls();
}
}

char &llvm::AMDGPULowerKernelCallsID = AMDGPULowerKernelCalls::ID;

INITIALIZE_PASS(
AMDGPULowerKernelCalls, "amd-lower-kernel-calls",
"Lower calls to kernel functions into non-kernel function calls.", false,
false);

AMDGPULowerKernelCalls::AMDGPULowerKernelCalls() : ModulePass(ID) {
initializeAMDGPULowerKernelCallsPass(*PassRegistry::getPassRegistry());
};

static void setNameForBody(Function *FBody, const Function &FKernel) {
StringRef Name = FKernel.getName();
if (Name.startswith("__OpenCL_")) {
assert(Name.endswith("_kernel"));
Name = Name.slice(strlen("__OpenCL_"), Name.size() - strlen("_kernel"));
}
SmallString<128> NewName("__amdgpu_");
NewName += Name;
NewName += "_kernel_body";

FBody->setName(NewName.str());
}

static Function *cloneKernel(Function &F) {
ValueToValueMapTy ignored;
assert(!F.isDeclaration() && "called kernel function should have a body");
Function *NewF = CloneFunction(&F, ignored, false);
NewF->setCallingConv(CallingConv::C);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just getOrInsertFunction with the new name from setNameForBody?

setNameForBody(NewF, F);
return NewF;
}

bool AMDGPULowerKernelCalls::runOnModule(Module &M) {
bool Changed = false;
for (auto &F : M) {
if (CallingConv::AMDGPU_KERNEL != F.getCallingConv())
continue;
Function *FBody = NULL;
for (Function::user_iterator UI = F.user_begin(), UE = F.user_end();
UI != UE;) {
CallInst *CI = dyn_cast<CallInst>(*UI++);
if (!CI)
continue;
if (!FBody)
FBody = cloneKernel(F);
CI->setCalledFunction(FBody);
CI->setCallingConv(CallingConv::C);
Changed = true;
}
}

return Changed;
}
2 changes: 2 additions & 0 deletions lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUConvertAtomicLibCallsPass(*PR);
initializeAMDGPUOCL12AdapterPass(*PR);
initializeAMDGPUPrintfRuntimeBindingPass(*PR);
initializeAMDGPULowerKernelCallsPass(*PR);
}

static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
Expand Down Expand Up @@ -188,6 +189,7 @@ StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
}

void AMDGPUTargetMachine::addPreLinkPasses(PassManagerBase & PM) {
PM.add(llvm::createAMDGPULowerKernelCallsPass());
PM.add(llvm::createAMDGPUConvertAtomicLibCallsPass());
PM.add(llvm::createAMDGPUOCL12AdapterPass());
PM.add(llvm::createAMDGPUPrintfRuntimeBinding());
Expand Down
1 change: 1 addition & 0 deletions lib/Target/AMDGPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUTargetObjectFile.cpp
AMDGPUIntrinsicInfo.cpp
AMDGPUISelDAGToDAG.cpp
AMDGPULowerKernelCalls.cpp
AMDGPUMCInstLower.cpp
AMDGPUMachineFunction.cpp
AMDGPUOpenCLImageTypeLoweringPass.cpp
Expand Down
23 changes: 23 additions & 0 deletions test/CodeGen/AMDGPU/lower-kernel-calls.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
; RUN: opt -amd-lower-kernel-calls -mtriple=amdgcn-- -mcpu=fiji -S < %s | FileCheck %s
define amdgpu_kernel void @test_kernel_to_call(i32 addrspace(1)* %p) #0 {
entry:
store i32 2, i32 addrspace(1)* %p, align 4
ret void
}

; Function Attrs: nounwind
define amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* %p) #0 {
entry:
store i32 1, i32 addrspace(1)* %p, align 4
; CHECK: call void @__amdgpu_test_kernel_to_call_kernel_body(i32 addrspace(1)* %p)
call amdgpu_kernel void @test_kernel_to_call(i32 addrspace(1)* %p)
ret void
}

; CHECK: define void @__amdgpu_test_kernel_to_call_kernel_body(i32 addrspace(1)* %p) #0 {
; CHECK: entry:
; CHECK: store i32 2, i32 addrspace(1)* %p, align 4
; CHECK: ret void
; CHECK: }

attributes #0 = { nounwind }