-
Notifications
You must be signed in to change notification settings - Fork 794
[SYCL] Refactor SYCL kernel object handling in hierarchical parallelism #6212
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
eec1da1
6c60344
3530e1d
0e13ee9
e1511a7
eb6d2d7
969b468
ca26f57
1a61c46
b970a77
3886d8d
959e1aa
2d987ac
fdeaab6
0a5834b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -65,6 +65,19 @@ | |
| // (1) - materialization of a PFWI object | ||
| // (2) - "fixup" of the private variable address. | ||
| // | ||
| // TODO: add support for the case when there are other functions between | ||
bader marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // parallel_for_work_group and parallel_for_work_item in the call stack. | ||
| // For example: | ||
| // | ||
| // void foo(sycl::group<1> group, ...) { | ||
| // group.parallel_for_work_item(range<1>(), [&](h_item<1> i) { ... }); | ||
| // } | ||
| // ... | ||
| // cgh.parallel_for_work_group<class kernel>( | ||
| // range<1>(...), range<1>(...), [=](group<1> g) { | ||
| // foo(g, ...); | ||
| // }); | ||
| // | ||
| // TODO The approach employed by this pass generates lots of barriers and data | ||
| // copying between private and local memory, which might not be efficient. There | ||
| // are optimization opportunities listed below. Also other approaches can be | ||
|
|
@@ -385,16 +398,8 @@ static void copyBetweenPrivateAndShadow(Value *L, GlobalVariable *Shadow, | |
| LocAlign = MaybeAlign(AI->getAlignment()); | ||
| } else { | ||
| auto Arg = cast<Argument>(L); | ||
| if (Arg->hasByValAttr()) { | ||
| T = Arg->getParamByValType(); | ||
| LocAlign = MaybeAlign(Arg->getParamAlignment()); | ||
| } else { | ||
| Type *Ty = Arg->getType(); | ||
| Module &M = *Shadow->getParent(); | ||
| LocAlign = M.getDataLayout().getValueOrABITypeAlignment( | ||
| MaybeAlign(Arg->getParamAlignment()), Ty); | ||
| T = Arg->getType()->getPointerElementType(); | ||
| } | ||
| T = Arg->getParamByValType(); | ||
| LocAlign = MaybeAlign(Arg->getParamAlignment()); | ||
| } | ||
|
|
||
| assert(T && "Unexpected type"); | ||
|
|
@@ -698,16 +703,7 @@ static void fixupPrivateMemoryPFWILambdaCaptures(CallInst *PFWICall) { | |
| // Go through "byval" parameters which are passed as AS(0) pointers | ||
| // and: (1) create local shadows for them (2) and initialize them from the | ||
| // leader's copy and (3) materialize the value in the local variable before use | ||
| // | ||
| // Do the same for 'this' pointer which points to PFWG lamda object which is | ||
| // allocated in the caller. Caller is a kernel function which is generated by | ||
| // SYCL frontend. Kernel function allocates PFWG lambda object and initalizes | ||
| // captured objects (like accessors) using arguments of the kernel. After | ||
| // intialization kernel calls PFWG function (which is the operator() of the PFWG | ||
| // object). PFWG object captures all objects by value and all uses (except | ||
| // initialization from kernel arguments) of this values can only be in scope of | ||
| // PFWG function that is why copy back of PFWG object is not needed. | ||
| static void sharePFWGPrivateObjects(Function &F, const Triple &TT) { | ||
| static void shareByValParams(Function &F, const Triple &TT) { | ||
| // Skip alloca instructions and split. Alloca instructions must be in the | ||
| // beginning of the function otherwise they are considered as dynamic which | ||
| // can cause the problems with inlining. | ||
|
|
@@ -726,29 +722,20 @@ static void sharePFWGPrivateObjects(Function &F, const Triple &TT) { | |
| Instruction &At = LeaderBB->back(); | ||
|
|
||
| for (auto &Arg : F.args()) { | ||
| Type *T; | ||
| LLVMContext &Ctx = At.getContext(); | ||
| IRBuilder<> Builder(Ctx); | ||
| Builder.SetInsertPoint(&LeaderBB->front()); | ||
| if (!Arg.hasByValAttr()) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: we skip "this" because it is allocated in the proper AS by the FE, correct? Comment would be helpful for the reader.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right. I just reverted the changes from #1455 and tried to re-implement it by fixing address space in clang instead. |
||
| continue; | ||
|
|
||
| assert(Arg.getType()->getPointerAddressSpace() == | ||
| asUInt(spirv::AddrSpace::Private)); | ||
|
|
||
| // Create the shared copy - "shadow" - for current arg | ||
| GlobalVariable *Shadow = nullptr; | ||
| if (Arg.hasByValAttr()) { | ||
| assert(Arg.getType()->getPointerAddressSpace() == | ||
| asUInt(spirv::AddrSpace::Private)); | ||
| T = Arg.getParamByValType(); | ||
| Shadow = spirv::createWGLocalVariable(*F.getParent(), T, "ArgShadow"); | ||
| } | ||
| // Process 'this' pointer which points to PFWG lambda object | ||
| else if (Arg.getArgNo() == 0) { | ||
| PointerType *PtrT = dyn_cast<PointerType>(Arg.getType()); | ||
| assert(PtrT && "Expected this pointer as the first argument"); | ||
| T = PtrT->getPointerElementType(); | ||
| Shadow = spirv::createWGLocalVariable(*F.getParent(), T, "ArgShadow"); | ||
| } | ||
| Type *T = Arg.getParamByValType(); | ||
| GlobalVariable *Shadow = | ||
| spirv::createWGLocalVariable(*F.getParent(), T, "ArgShadow"); | ||
|
|
||
| if (!Shadow) | ||
| continue; | ||
| LLVMContext &Ctx = At.getContext(); | ||
| IRBuilder<> Builder(Ctx); | ||
| Builder.SetInsertPoint(&LeaderBB->front()); | ||
|
|
||
| copyBetweenPrivateAndShadow(&Arg, Shadow, Builder, | ||
| true /*private->shadow*/); | ||
|
|
@@ -766,6 +753,7 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F, | |
| FunctionAnalysisManager &FAM) { | ||
| if (!F.getMetadata(WG_SCOPE_MD)) | ||
| return PreservedAnalyses::all(); | ||
| LLVM_DEBUG(llvm::dbgs() << "Function name: " << F.getName() << "\n"); | ||
| const auto &TT = llvm::Triple(F.getParent()->getTargetTriple()); | ||
| // Ranges of "side effect" instructions | ||
| SmallVector<InstrRange, 16> Ranges; | ||
|
|
@@ -866,9 +854,8 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F, | |
| for (auto *PFWICall : PFWICalls) | ||
| fixupPrivateMemoryPFWILambdaCaptures(PFWICall); | ||
|
|
||
| // Finally, create shadows for and replace usages of byval pointer params and | ||
| // PFWG lambda object ('this' pointer). | ||
| sharePFWGPrivateObjects(F, TT); | ||
| // Finally, create shadows for and replace usages of byval pointer params. | ||
| shareByValParams(F, TT); | ||
|
|
||
| #ifndef NDEBUG | ||
| if (HaveChanges && Debug > 0) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.