|
| 1 | +The SPIRV-LLVM-Translator will "lower" some LLVM intrinsic calls to another function or implementation |
| 2 | +using one of the following four methods: |
| 3 | + |
| 4 | +Method 1: |
| 5 | + |
| 6 | + Variation A: |
| 7 | + |
| 8 | + In transIntrinsicInst in SPIRVWriter, calls to LLVM intrinsics are replaced with a SPIRV ExtInst. |
| 9 | + For example: |
| 10 | + |
| 11 | + %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) |
| 12 | + |
| 13 | + is translated into SPIRV with an OpenCL ExtInst clz: |
| 14 | + |
| 15 | + 6 ExtInst 2 7 1 clz 5 |
| 16 | + |
| 17 | + The code in transIntrinsicInst to do this translation is: |
| 18 | + |
| 19 | + case Intrinsic::ctlz: |
| 20 | + case Intrinsic::cttz: { |
| 21 | + SPIRVWord ExtOp = IID == Intrinsic::ctlz ? OpenCLLIB::Clz : OpenCLLIB::Ctz; |
| 22 | + SPIRVType *Ty = transType(II->getType()); |
| 23 | + std::vector<SPIRVValue *> Ops(1, transValue(II->getArgOperand(0), BB)); |
| 24 | + return BM->addExtInst(Ty, BM->getExtInstSetId(SPIRVEIS_OpenCL), ExtOp, Ops, |
| 25 | + BB); |
| 26 | + } |
| 27 | + |
| 28 | + When these ExtInst are reverse translated with (llvm-spirv -r) they are converted to calls: |
| 29 | + |
| 30 | + %0 = call spir_func i32 @_Z3clzi(i32 %x) #0 |
| 31 | + |
| 32 | + Implementation of the spir_func is in an OpenCL library. |
| 33 | + |
| 34 | + If reverse translation is done with (llvm-spirv -r --spirv-target-env=SPV-IR) the calls are converted to |
| 35 | + SPIRV Friendly IR: |
| 36 | + |
| 37 | + %0 = call spir_func i32 @_Z15__spirv_ocl_clzi(i32 %x) |
| 38 | + |
| 39 | + This is the cleanest method of lowering. If a LLVM intrinsic naturally maps to a SPIRV instruction, and if there is an |
| 40 | + external library that supports the instructions this way should be chosen. |
| 41 | + |
| 42 | + ----------------------------------------------------------------------------------------------------------------------------------- |
| 43 | + Varation B: |
| 44 | + |
| 45 | + Sometimes an intrinsic can be translated to an instruction that is only available with an extension. For example translating: |
| 46 | + |
| 47 | + %ret = call i8 @llvm.bitreverse.i8(i8 %a) |
| 48 | + |
| 49 | + when llvm-spirv is invoked with: |
| 50 | + |
| 51 | + llvm-spirv --spirv-ext=+SPV_KHR_bit_instructions |
| 52 | + |
| 53 | + is translated into SPIRV: |
| 54 | + |
| 55 | + 4 BitReverse 51 66 58 |
| 56 | + |
| 57 | + The code in transIntrinsicInst to do this translation is: |
| 58 | + |
| 59 | + case Intrinsic::bitreverse: { |
| 60 | + if (!BM->getErrorLog().checkError( |
| 61 | + BM->isAllowedToUseExtension(ExtensionID::SPV_KHR_bit_instructions), |
| 62 | + SPIRVEC_InvalidFunctionCall, II, |
| 63 | + "Translation of llvm.bitreverse intrinsic requires " |
| 64 | + "SPV_KHR_bit_instructions extension.")) { |
| 65 | + return nullptr; |
| 66 | + } |
| 67 | + SPIRVType *Ty = transType(II->getType()); |
| 68 | + SPIRVValue *Op = transValue(II->getArgOperand(0), BB); |
| 69 | + return BM->addUnaryInst(OpBitReverse, Ty, Op, BB); |
| 70 | + } |
| 71 | + |
| 72 | +Method 2: |
| 73 | + |
| 74 | + Some intrinsics are emulated by basic operations in SPIRVWriter. For example: |
| 75 | + |
| 76 | + %0 = call float @llvm.vector.reduce.fadd.v4float(float %sp, <4 x float> %v) |
| 77 | + |
| 78 | + is emulated in SPIRV with: |
| 79 | + |
| 80 | + 5 VectorExtractDynamic 2 11 7 10 |
| 81 | + 5 VectorExtractDynamic 2 13 7 12 |
| 82 | + 5 VectorExtractDynamic 2 15 7 14 |
| 83 | + 5 VectorExtractDynamic 2 17 7 16 |
| 84 | + 5 FAdd 2 18 6 11 |
| 85 | + 5 FAdd 2 19 18 13 |
| 86 | + 5 FAdd 2 20 19 15 |
| 87 | + 5 FAdd 2 21 20 |
| 88 | + |
| 89 | + The code in transIntrinsicInst to do this emulation is: |
| 90 | + |
| 91 | + case Intrinsic::vector_reduce_add: { |
| 92 | + Op Op; |
| 93 | + if (IID == Intrinsic::vector_reduce_add) { |
| 94 | + Op = OpIAdd; |
| 95 | + } |
| 96 | + VectorType *VecTy = cast<VectorType>(II->getArgOperand(0)->getType()); |
| 97 | + SPIRVValue *VecSVal = transValue(II->getArgOperand(0), BB); |
| 98 | + SPIRVTypeInt *ResultSType = |
| 99 | + BM->addIntegerType(VecTy->getElementType()->getIntegerBitWidth()); |
| 100 | + SPIRVTypeInt *I32STy = BM->addIntegerType(32); |
| 101 | + unsigned VecSize = VecTy->getElementCount().getFixedValue(); |
| 102 | + SmallVector<SPIRVValue *, 16> Extracts(VecSize); |
| 103 | + for (unsigned Idx = 0; Idx < VecSize; ++Idx) { |
| 104 | + Extracts[Idx] = BM->addVectorExtractDynamicInst( |
| 105 | + VecSVal, BM->addIntegerConstant(I32STy, Idx), BB); |
| 106 | + } |
| 107 | + unsigned Counter = VecSize >> 1; |
| 108 | + while (Counter != 0) { |
| 109 | + for (unsigned Idx = 0; Idx < Counter; ++Idx) { |
| 110 | + Extracts[Idx] = BM->addBinaryInst(Op, ResultSType, Extracts[Idx << 1], |
| 111 | + Extracts[(Idx << 1) + 1], BB); |
| 112 | + } |
| 113 | + Counter >>= 1; |
| 114 | + } |
| 115 | + if ((VecSize & 1) != 0) { |
| 116 | + Extracts[0] = BM->addBinaryInst(Op, ResultSType, Extracts[0], |
| 117 | + Extracts[VecSize - 1], BB); |
| 118 | + } |
| 119 | + return Extracts[0]; |
| 120 | + } |
| 121 | + |
| 122 | + |
| 123 | +Method 3: |
| 124 | + |
| 125 | + In SPIRVRegularizeLLVMPass, calls to LLVM intrinsics are replaced with a call to an emulation function. |
| 126 | + The emulation function is created by LLVM API calls and will be translated to SPIRV. The calls to the emulation |
| 127 | + functions and the emulation functions themselves will be translated to SPIRV. After reverse translation, the calls to the emulation |
| 128 | + functions and the emulation functions themselves will appear in the LLVM IR. |
| 129 | + |
| 130 | + For example, calls to llvm.bswap.i16: |
| 131 | + |
| 132 | + %ret = call i16 @llvm.bswap.i16(i16 %0) |
| 133 | + |
| 134 | + will be re-directed to an emulation function: |
| 135 | + |
| 136 | + %ret = call i16 @spirv.llvm_bswap_i16(i16 %0) |
| 137 | + |
| 138 | + The emulation function is constructed by the translator in SPIRVRegularizeLLVM (note that this code |
| 139 | + handles all types): |
| 140 | + |
| 141 | + case Intrinsic::bswap: { |
| 142 | + BasicBlock *EntryBB = BasicBlock::Create(M->getContext(), "entry", F); |
| 143 | + IRBuilder<> IRB(EntryBB); |
| 144 | + auto *BSwap = IRB.CreateIntrinsic(Intrinsic::bswap, Intrinsic->getType(), |
| 145 | + F->getArg(0)); |
| 146 | + IRB.CreateRet(BSwap); |
| 147 | + IntrinsicLowering IL(M->getDataLayout()); |
| 148 | + IL.LowerIntrinsicCall(BSwap); |
| 149 | + break; |
| 150 | + } |
| 151 | + |
| 152 | + This will produce a function like: |
| 153 | + |
| 154 | + define i16 @spirv.llvm_bswap_i16(i16 %0) { |
| 155 | + entry: |
| 156 | + %bswap.2 = shl i16 %0, 8 |
| 157 | + %bswap.1 = lshr i16 %0, 8 |
| 158 | + %bswap.i16 = or i16 %bswap.2, %bswap.1 |
| 159 | + ret i16 %bswap.i16 |
| 160 | + } |
| 161 | + |
| 162 | + After forward translation the emulation calls and functions will appear in SPIRV: |
| 163 | + |
| 164 | + 8 Name 24 "spirv.llvm_bswap_i16" |
| 165 | + ... |
| 166 | + 5 FunctionCall 8 26 24 22 |
| 167 | + ... |
| 168 | + 5 Function 8 24 0 23 |
| 169 | + 3 FunctionParameter 8 25 |
| 170 | + 2 Label 42 |
| 171 | + 5 ShiftLeftLogical 8 44 25 43 |
| 172 | + 5 ShiftRightLogical 8 45 25 43 |
| 173 | + 5 BitwiseOr 8 46 44 45 |
| 174 | + 2 ReturnValue 46 |
| 175 | + |
| 176 | + In reverse translation, the lowering is undone. Calls are reverted to the original llvm.bswap.i16 intrinsic |
| 177 | + |
| 178 | + %ret = call i16 @llvm.bswap.i16(i16 %0) |
| 179 | + |
| 180 | + The emulation functions are deleted. |
| 181 | + |
| 182 | + The functionality of the intrinsic is created by a call to LLVM's CreateIntrinsic, so the complexity within the |
| 183 | + translator is small. However, this is effectively using the translator to insert a library function at translation |
| 184 | + time. |
| 185 | + |
| 186 | +Method 4: |
| 187 | + |
| 188 | + In SPIRVLowerLLVMIntrinsicPass, calls to LLVM intrinsics are replaced with a call to an emulation function. |
| 189 | + The emulation function is represented as a text string of LLVM assembly and is parsed and added to the LLVM IR |
| 190 | + to be translated. The calls to the emulation functions and the emulation functions themselves will be translated |
| 191 | + to SPIRV. After reverse translation, the calls to the emulation functions and the emulation functions themselves will appear |
| 192 | + in the LLVM IR. |
| 193 | + |
| 194 | + For example if SPV_KHR_bit_instructions is not enabled then bit instructions are not supported and llvm.bitreverse.i8 |
| 195 | + will be emulated (Note that this is the same intrinsic example used in section 1.B). Calls to it: |
| 196 | + |
| 197 | + %ret = call i8 @llvm.bitreverse.i8(i8 %a) |
| 198 | + |
| 199 | + will be re-directed to an emulation function: |
| 200 | + |
| 201 | + %ret = call i8 @llvm_bitreverse_i8(i8 %a) |
| 202 | + |
| 203 | + The emulation function is built into the translator. The source is recorded as a string in LLVMBitreverse.h (note that a separate |
| 204 | + emulation function is needed for each type): |
| 205 | + |
| 206 | + static const char LLVMBitreversei8[]{R"( |
| 207 | + define zeroext i8 @llvm_bitreverse_i8(i8 %A) { |
| 208 | + entry: |
| 209 | + %and = shl i8 %A, 4 |
| 210 | + %shr = lshr i8 %A, 4 |
| 211 | + %or = or disjoint i8 %and, %shr |
| 212 | + %and5 = shl i8 %or, 2 |
| 213 | + %shl6 = and i8 %and5, -52 |
| 214 | + %shr8 = lshr i8 %or, 2 |
| 215 | + %and9 = and i8 %shr8, 51 |
| 216 | + %or10 = or disjoint i8 %shl6, %and9 |
| 217 | + %and13 = shl i8 %or10, 1 |
| 218 | + %shl14 = and i8 %and13, -86 |
| 219 | + %shr16 = lshr i8 %or10, 1 |
| 220 | + %and17 = and i8 %shr16, 85 |
| 221 | + %or18 = or disjoint i8 %shl14, %and17 |
| 222 | + ret i8 %or18 |
| 223 | + } |
| 224 | + )"}; |
| 225 | + |
| 226 | + The supported lowerings are recorded in a table in SPIRVLowerLLVMIntrinsic: |
| 227 | + |
| 228 | + // LLVM Intrinsic Name Required Extension Forbidden Extension Module with |
| 229 | + // emulation function |
| 230 | + ... |
| 231 | + { "llvm.bitreverse.i8", {NO_REQUIRED_EXTENSION, ExtensionID::SPV_KHR_bit_instructions, LLVMBitreversei8}}, |
| 232 | + ... |
| 233 | + |
| 234 | + |
| 235 | + This is the most flexible way of lowering, but it requires a lot of work to create emulation functions for all the necesary types. |
| 236 | + The functionality of a call is provided by LLVM IR supplied as text. The complexity of the intrinsic functionality is inside the translator. |
| 237 | + Each function signature variation for an intrinsic that needs to be lowered must be supplied by the developer. As with #2 |
| 238 | + this method is effectively using the translator to insert a library function at translation time. |
0 commit comments