|
5 | 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
6 | 6 | //
|
7 | 7 | //===----------------------------------------------------------------------===//
|
8 |
| -// |
9 |
| -// This pass operates on SYCL kernels being compiled to CUDA. It looks for uses |
10 |
| -// of the `llvm.nvvm.implicit.offset` intrinsic and replaces it with a offset |
11 |
| -// parameter which will be threaded through from the kernel entry point. |
12 |
| -// |
13 |
| -//===----------------------------------------------------------------------===// |
14 | 8 |
|
15 | 9 | #ifndef LLVM_SYCL_GLOBALOFFSET_H
|
16 | 10 | #define LLVM_SYCL_GLOBALOFFSET_H
|
17 | 11 |
|
18 |
| -#include "llvm/Pass.h" |
| 12 | +#include "llvm/IR/Module.h" |
| 13 | +#include "llvm/IR/PassManager.h" |
| 14 | +#include "llvm/SYCLLowerIR/TargetHelpers.h" |
19 | 15 |
|
20 | 16 | namespace llvm {
|
21 | 17 |
|
22 |
| -ModulePass *createGlobalOffsetPass(); |
| 18 | +class ModulePass; |
| 19 | +class PassRegistry; |
| 20 | + |
| 21 | +/// This pass operates on SYCL kernels that target AMDGPU or NVVM. It looks for |
| 22 | +/// uses of the `llvm.{amdgcn|nvvm}.implicit.offset` intrinsic and replaces it |
| 23 | +/// with an offset parameter which will be threaded through from the kernel |
| 24 | +/// entry point. |
| 25 | +class GlobalOffsetPass : public PassInfoMixin<GlobalOffsetPass> { |
| 26 | +private: |
| 27 | + using KernelPayload = TargetHelpers::KernelPayload; |
| 28 | + using ArchType = TargetHelpers::ArchType; |
| 29 | + |
| 30 | +public: |
| 31 | + explicit GlobalOffsetPass() {} |
| 32 | + |
| 33 | + PreservedAnalyses run(Module &M, ModuleAnalysisManager &); |
| 34 | + static StringRef getPassName() { return "Add implicit SYCL global offset"; } |
| 35 | + |
| 36 | +private: |
| 37 | + /// After the execution of this function, the module to which the kernel |
| 38 | + /// `Func` belongs, contains both the original function and its clone with the |
| 39 | + /// signature extended with the implicit offset parameter and `_with_offset` |
| 40 | + /// appended to the name. |
| 41 | + /// An alloca of 3 zeros (corresponding to offsets in x, y and z) is added to |
| 42 | + /// the original kernel, in order to keep the interface of kernel's call |
| 43 | + /// graph unified, regardless of the fact if the global offset has been used. |
| 44 | + /// |
| 45 | + /// \param Func Kernel to be processed. |
| 46 | + void processKernelEntryPoint(Function *Func); |
| 47 | + |
| 48 | + /// This function adds an implicit parameter to the function containing a |
| 49 | + /// call instruction to the implicit offset intrinsic or another function |
| 50 | + /// (which eventually calls the instrinsic). If the call instruction is to |
| 51 | + /// the implicit offset intrinsic, then the intrinisic is replaced with the |
| 52 | + /// parameter that was added. |
| 53 | + /// |
| 54 | + /// Once the function, say `F`, containing a call to `Callee` has the |
| 55 | + /// implicit parameter added, callers of `F` are processed by recursively |
| 56 | + /// calling this function, passing `F` to `CalleeWithImplicitParam`. |
| 57 | + /// |
| 58 | + /// Since the cloning of entry points may alter the users of a function, the |
| 59 | + /// cloning must be done as early as possible, as to ensure that no users are |
| 60 | + /// added to previous callees in the call-tree. |
| 61 | + /// |
| 62 | + /// \param Callee is the function (to which this transformation has already |
| 63 | + /// been applied), or to the implicit offset intrinsic. |
| 64 | + /// |
| 65 | + /// \param CalleeWithImplicitParam indicates whether Callee is to the |
| 66 | + /// implicit intrinsic (when `nullptr`) or to another function (not |
| 67 | + /// `nullptr`) - this is used to know whether calls to it needs to have the |
| 68 | + /// implicit parameter added to it or replaced with the implicit parameter. |
| 69 | + void addImplicitParameterToCallers(Module &M, Value *Callee, |
| 70 | + Function *CalleeWithImplicitParam); |
| 71 | + |
| 72 | + /// For a given function `Func` extend signature to contain an implicit |
| 73 | + /// offset argument. |
| 74 | + /// |
| 75 | + /// \param Func A function to add offset to. |
| 76 | + /// |
| 77 | + /// \param ImplicitArgumentType Architecture dependant type of the implicit |
| 78 | + /// argument holding the global offset. |
| 79 | + /// |
| 80 | + /// \param KeepOriginal If set to true, rather than splicing the old `Func`, |
| 81 | + /// keep it intact and create a clone of it with `_wit_offset` appended to |
| 82 | + /// the name. |
| 83 | + /// |
| 84 | + /// \returns A pair of new function with the offset argument added and a |
| 85 | + /// pointer to the implicit argument (either a func argument or a bitcast |
| 86 | + /// turning it to the correct type). |
| 87 | + std::pair<Function *, Value *> |
| 88 | + addOffsetArgumentToFunction(Module &M, Function *Func, |
| 89 | + Type *ImplicitArgumentType = nullptr, |
| 90 | + bool KeepOriginal = false); |
| 91 | + |
| 92 | + /// Create a mapping of kernel entry points to their metadata nodes. While |
| 93 | + /// iterating over kernels make sure that a given kernel entry point has no |
| 94 | + /// llvm uses. |
| 95 | + /// |
| 96 | + /// \param KernelPayloads A collection of kernel functions present in a |
| 97 | + /// module `M`. |
| 98 | + /// |
| 99 | + /// \returns A map of kernel functions to corresponding metadata nodes. |
| 100 | + DenseMap<Function *, MDNode *> |
| 101 | + generateKernelMDNodeMap(Module &M, |
| 102 | + SmallVectorImpl<KernelPayload> &KernelPayloads); |
| 103 | + |
| 104 | +private: |
| 105 | + /// Keep track of which functions have been processed to avoid processing |
| 106 | + /// twice. |
| 107 | + llvm::DenseMap<Function *, Value *> ProcessedFunctions; |
| 108 | + /// Keep a map of all entry point functions with metadata. |
| 109 | + llvm::DenseMap<Function *, MDNode *> EntryPointMetadata; |
| 110 | + /// A type of implicit argument added to the kernel signature. |
| 111 | + llvm::Type *KernelImplicitArgumentType = nullptr; |
| 112 | + /// A type used for the alloca holding the values of global offsets. |
| 113 | + llvm::Type *ImplicitOffsetPtrType = nullptr; |
| 114 | + |
| 115 | + ArchType AT; |
| 116 | + unsigned TargetAS = 0; |
| 117 | +}; |
| 118 | + |
| 119 | +ModulePass *createGlobalOffsetPassLegacy(); |
| 120 | +void initializeGlobalOffsetLegacyPass(PassRegistry &); |
23 | 121 |
|
24 | 122 | } // end namespace llvm
|
25 | 123 |
|
|
0 commit comments