|
| 1 | +//===---- SYCLPropagateAspectsUsage.cpp - SYCLPropagateAspectsUsage Pass --===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | +// |
| 9 | +// Pass propagates optional kernel features metadata through a module call graph |
| 10 | +// |
| 11 | +// The pass consists of four main steps: |
| 12 | +// |
| 13 | +// I. It builds Type -> set of aspects mapping for usage in step II |
| 14 | +// II. It builds Function -> set of aspects mapping to use in further steps |
| 15 | +// III. FIXME: this step is not yet implemented |
| 16 | +// Analyzes aspects usage and emit warnings if necessary |
| 17 | +// IV. Generates metadata with information about aspects used by each function |
| 18 | +// |
| 19 | +// Note: step I is not strictly necessary, because we can simply check if a |
| 20 | +// function actually uses one or another type to say whether or not it uses any |
| 21 | +// aspects. However, from customers point of view it could be more transparent |
| 22 | +// that if a function is declared accepting an optional type, then it means that |
| 23 | +// it uses an associated aspect, regardless of whether or not compiler was able |
| 24 | +// to optimize out that variable. |
| 25 | +// |
| 26 | +//===----------------------------------------------------------------------===// |
| 27 | + |
| 28 | +#include "llvm/SYCLLowerIR/SYCLPropagateAspectsUsage.h" |
| 29 | + |
| 30 | +#include "llvm/ADT/SetVector.h" |
| 31 | +#include "llvm/ADT/SmallPtrSet.h" |
| 32 | +#include "llvm/ADT/SmallSet.h" |
| 33 | +#include "llvm/IR/Constants.h" |
| 34 | +#include "llvm/IR/InstIterator.h" |
| 35 | +#include "llvm/IR/Instructions.h" |
| 36 | +#include "llvm/IR/IntrinsicInst.h" |
| 37 | +#include "llvm/IR/Module.h" |
| 38 | +#include "llvm/Pass.h" |
| 39 | +#include "llvm/Support/Path.h" |
| 40 | + |
| 41 | +#include <queue> |
| 42 | +#include <unordered_map> |
| 43 | +#include <unordered_set> |
| 44 | + |
| 45 | +using namespace llvm; |
| 46 | + |
| 47 | +namespace { |
| 48 | + |
| 49 | +using AspectsSetTy = SmallSet<int, 4>; |
| 50 | +using TypeToAspectsMapTy = std::unordered_map<const Type *, AspectsSetTy>; |
| 51 | + |
| 52 | +/// Retrieves from metadata (intel_types_that_use_aspects) types |
| 53 | +/// and aspects these types depend on. |
| 54 | +TypeToAspectsMapTy getTypesThatUseAspectsFromMetadata(const Module &M) { |
| 55 | + const NamedMDNode *Node = M.getNamedMetadata("intel_types_that_use_aspects"); |
| 56 | + TypeToAspectsMapTy Result; |
| 57 | + if (!Node) |
| 58 | + return Result; |
| 59 | + |
| 60 | + LLVMContext &C = M.getContext(); |
| 61 | + for (const auto OperandIt : Node->operands()) { |
| 62 | + const MDNode &N = *OperandIt; |
| 63 | + assert(N.getNumOperands() > 1 && "intel_types_that_use_aspect metadata " |
| 64 | + "shouldn't contain empty metadata nodes"); |
| 65 | + |
| 66 | + const auto *TypeName = cast<MDString>(N.getOperand(0)); |
| 67 | + const Type *T = StructType::getTypeByName(C, TypeName->getString()); |
| 68 | + assert(T && |
| 69 | + "invalid type referenced by intel_types_that_use_aspect metadata"); |
| 70 | + |
| 71 | + AspectsSetTy &Aspects = Result[T]; |
| 72 | + for (size_t I = 1; I != N.getNumOperands(); ++I) { |
| 73 | + const auto *CAM = cast<ConstantAsMetadata>(N.getOperand(I)); |
| 74 | + const Constant *C = CAM->getValue(); |
| 75 | + Aspects.insert(cast<ConstantInt>(C)->getSExtValue()); |
| 76 | + } |
| 77 | + } |
| 78 | + |
| 79 | + return Result; |
| 80 | +} |
| 81 | + |
| 82 | +using TypesEdgesTy = |
| 83 | + std::unordered_map<const Type *, std::vector<const Type *>>; |
| 84 | + |
| 85 | +/// Propagates aspects from type @Start to all types which |
| 86 | +/// are reachable by edges @Edges by BFS algorithm. |
| 87 | +/// Result is recorded in @Aspects. |
| 88 | +void propagateAspectsThroughTypes(const TypesEdgesTy &Edges, const Type *Start, |
| 89 | + TypeToAspectsMapTy &Aspects) { |
| 90 | + const AspectsSetTy &AspectsToPropagate = Aspects[Start]; |
| 91 | + SmallSetVector<const Type *, 16> TypesToPropagate; |
| 92 | + TypesToPropagate.insert(Start); |
| 93 | + for (size_t I = 0; I < TypesToPropagate.size(); ++I) { |
| 94 | + const Type *T = TypesToPropagate[I]; |
| 95 | + Aspects[T].insert(AspectsToPropagate.begin(), AspectsToPropagate.end()); |
| 96 | + const auto It = Edges.find(T); |
| 97 | + if (It != Edges.end()) |
| 98 | + TypesToPropagate.insert(It->second.begin(), It->second.end()); |
| 99 | + } |
| 100 | +} |
| 101 | + |
| 102 | +/// Propagates given aspects to all types in module @M. Function accepts |
| 103 | +/// aspects in @TypesWithAspects reference and writes a result in this |
| 104 | +/// reference. |
| 105 | +/// Type T in the result uses an aspect A if Type T is a composite |
| 106 | +/// type (array, struct, vector) which contains elements/fields of |
| 107 | +/// another type TT, which in turn uses the aspect A. |
| 108 | +/// @TypesWithAspects argument consist of known types with aspects |
| 109 | +/// from metadata information. |
| 110 | +/// |
| 111 | +/// The algorithm is the following: |
| 112 | +/// 1) Make a list of all structure types from module @M. The list also |
| 113 | +/// contains DoubleTy since it is optional as well. |
| 114 | +/// 2) Make from list a type graph which consists of nodes corresponding to |
| 115 | +/// types and directed edges between nodes. An edge from type A to type B |
| 116 | +/// corresponds to the fact that A is contained within B. |
| 117 | +/// Examples: B is a pointer to A, B is a struct containing field of type A. |
| 118 | +/// 3) For every known type with aspects propagate it's aspects over graph. |
| 119 | +/// Every propagation is a separate run of BFS algorithm. |
| 120 | +/// |
| 121 | +/// Time complexity: O((V + E) * T) where T is the number of input types |
| 122 | +/// containing aspects. |
| 123 | +void propagateAspectsToOtherTypesInModule( |
| 124 | + const Module &M, TypeToAspectsMapTy &TypesWithAspects) { |
| 125 | + std::unordered_set<const Type *> TypesToProcess; |
| 126 | + const Type *DoubleTy = Type::getDoubleTy(M.getContext()); |
| 127 | + |
| 128 | + // 6 is taken from sycl/include/CL/sycl/aspects.hpp |
| 129 | + // Note: that magic number must strictly correspond to the one assigned to |
| 130 | + // 'fp64' value of 'aspect' enum. |
| 131 | + // FIXME: we should develop some kind of mechanism which will allow us to |
| 132 | + // avoid hardcoding this number here and having a build dependency between |
| 133 | + // the compiler and the runtime. See intel/llvm#5892 |
| 134 | + static constexpr int AspectFP64 = 6; |
| 135 | + TypesWithAspects[DoubleTy].insert(AspectFP64); |
| 136 | + |
| 137 | + TypesToProcess.insert(DoubleTy); |
| 138 | + for (const Type *T : M.getIdentifiedStructTypes()) |
| 139 | + TypesToProcess.insert(T); |
| 140 | + |
| 141 | + TypesEdgesTy Edges; |
| 142 | + for (const Type *T : TypesToProcess) { |
| 143 | + for (const Type *TT : T->subtypes()) { |
| 144 | + if (TT->isPointerTy()) |
| 145 | + // We don't know the pointee type in opaque pointers world |
| 146 | + continue; |
| 147 | + |
| 148 | + // If TT = [4 x [4 x [4 x %A]]] then we want to get TT = %A |
| 149 | + // The same with vectors |
| 150 | + while (TT->isArrayTy() || TT->isVectorTy()) { |
| 151 | + TT = TT->getContainedType(0); |
| 152 | + } |
| 153 | + |
| 154 | + // We are not interested in some types. For example, IntTy. |
| 155 | + if (TypesToProcess.count(TT)) |
| 156 | + Edges[TT].push_back(T); |
| 157 | + } |
| 158 | + } |
| 159 | + |
| 160 | + TypeToAspectsMapTy Result; |
| 161 | + for (const Type *T : TypesToProcess) |
| 162 | + propagateAspectsThroughTypes(Edges, T, TypesWithAspects); |
| 163 | +} |
| 164 | + |
| 165 | +/// Returns all aspects which might be reached from type @T. |
| 166 | +/// It encompases composite structures and pointers. |
| 167 | +/// NB! This function inserts new records in @Types map for new discovered |
| 168 | +/// types. For the best perfomance pass this map in the next invocations. |
| 169 | +const AspectsSetTy &getAspectsFromType(const Type *T, |
| 170 | + TypeToAspectsMapTy &Types) { |
| 171 | + const auto It = Types.find(T); |
| 172 | + if (It != Types.end()) |
| 173 | + return It->second; |
| 174 | + |
| 175 | + // Empty value is inserted for absent key T. |
| 176 | + // This is essential to no get into infinite recursive loops. |
| 177 | + AspectsSetTy &Result = Types[T]; |
| 178 | + |
| 179 | + for (const Type *TT : T->subtypes()) { |
| 180 | + const AspectsSetTy &Aspects = getAspectsFromType(TT, Types); |
| 181 | + Result.insert(Aspects.begin(), Aspects.end()); |
| 182 | + } |
| 183 | + |
| 184 | + return Result; |
| 185 | +} |
| 186 | + |
| 187 | +/// Returns aspects which might be used in instruction @I. |
| 188 | +/// Function inspects return type and all operand's types. |
| 189 | +/// NB! This function inserts new records in @Types map for new discovered |
| 190 | +/// types. For the best perfomance pass this map in the next invocations. |
| 191 | +AspectsSetTy getAspectsUsedByInstruction(const Instruction &I, |
| 192 | + TypeToAspectsMapTy &Types) { |
| 193 | + const Type *ReturnType = I.getType(); |
| 194 | + AspectsSetTy Result = getAspectsFromType(ReturnType, Types); |
| 195 | + for (const auto &OperandIt : I.operands()) { |
| 196 | + const AspectsSetTy &Aspects = |
| 197 | + getAspectsFromType(OperandIt->getType(), Types); |
| 198 | + Result.insert(Aspects.begin(), Aspects.end()); |
| 199 | + } |
| 200 | + |
| 201 | + return Result; |
| 202 | +} |
| 203 | + |
| 204 | +using FunctionToAspectsMapTy = DenseMap<Function *, AspectsSetTy>; |
| 205 | +using CallGraphTy = DenseMap<Function *, SmallPtrSet<Function *, 8>>; |
| 206 | + |
| 207 | +void createUsedAspectsMetadataForFunctions(FunctionToAspectsMapTy &Map) { |
| 208 | + for (auto &It : Map) { |
| 209 | + AspectsSetTy &Aspects = It.second; |
| 210 | + if (Aspects.empty()) |
| 211 | + continue; |
| 212 | + |
| 213 | + Function *F = It.first; |
| 214 | + LLVMContext &C = F->getContext(); |
| 215 | + |
| 216 | + SmallVector<Metadata *, 16> AspectsMetadata; |
| 217 | + for (const auto &A : Aspects) |
| 218 | + AspectsMetadata.push_back(ConstantAsMetadata::get( |
| 219 | + ConstantInt::getSigned(Type::getInt32Ty(C), A))); |
| 220 | + |
| 221 | + MDNode *MDN = MDNode::get(C, AspectsMetadata); |
| 222 | + F->setMetadata("intel_used_aspects", MDN); |
| 223 | + } |
| 224 | +} |
| 225 | + |
| 226 | +/// Propagates aspects from leaves up to the top of call graph. |
| 227 | +/// NB! Call graph corresponds to call graph of SYCL code which |
| 228 | +/// can't contain recursive calls. So there can't be loops in |
| 229 | +/// a call graph. But there can be path's intersections. |
| 230 | +void propagateAspectsThroughCG(Function *F, CallGraphTy &CG, |
| 231 | + FunctionToAspectsMapTy &AspectsMap, |
| 232 | + SmallPtrSet<const Function *, 16> &Visited) { |
| 233 | + const auto It = CG.find(F); |
| 234 | + if (It == CG.end()) |
| 235 | + return; |
| 236 | + |
| 237 | + AspectsSetTy LocalAspects; |
| 238 | + for (Function *Callee : It->second) { |
| 239 | + if (Visited.insert(Callee).second) |
| 240 | + propagateAspectsThroughCG(Callee, CG, AspectsMap, Visited); |
| 241 | + |
| 242 | + const auto &CalleeAspects = AspectsMap[Callee]; |
| 243 | + LocalAspects.insert(CalleeAspects.begin(), CalleeAspects.end()); |
| 244 | + } |
| 245 | + |
| 246 | + AspectsMap[F].insert(LocalAspects.begin(), LocalAspects.end()); |
| 247 | +} |
| 248 | + |
| 249 | +/// Processes a function: |
| 250 | +/// - checks if return and argument types are using any aspects |
| 251 | +/// - checks if instructions are using any aspects |
| 252 | +/// - updates call graph information |
| 253 | +/// |
| 254 | +void processFunction(Function &F, FunctionToAspectsMapTy &FunctionToAspects, |
| 255 | + TypeToAspectsMapTy &TypesWithAspects, CallGraphTy &CG) { |
| 256 | + const AspectsSetTy RetTyAspects = |
| 257 | + getAspectsFromType(F.getReturnType(), TypesWithAspects); |
| 258 | + FunctionToAspects[&F].insert(RetTyAspects.begin(), RetTyAspects.end()); |
| 259 | + for (Argument &Arg : F.args()) { |
| 260 | + const AspectsSetTy ArgAspects = |
| 261 | + getAspectsFromType(Arg.getType(), TypesWithAspects); |
| 262 | + FunctionToAspects[&F].insert(ArgAspects.begin(), ArgAspects.end()); |
| 263 | + } |
| 264 | + |
| 265 | + for (Instruction &I : instructions(F)) { |
| 266 | + const AspectsSetTy Aspects = |
| 267 | + getAspectsUsedByInstruction(I, TypesWithAspects); |
| 268 | + FunctionToAspects[&F].insert(Aspects.begin(), Aspects.end()); |
| 269 | + |
| 270 | + if (const auto *CI = dyn_cast<CallInst>(&I)) { |
| 271 | + if (!CI->isIndirectCall() && CI->getCalledFunction()) |
| 272 | + CG[&F].insert(CI->getCalledFunction()); |
| 273 | + } |
| 274 | + } |
| 275 | +} |
| 276 | + |
| 277 | +// Return true if the function is a SPIRV or SYCL builtin, e.g. |
| 278 | +// _Z28__spirv_GlobalInvocationId_xv |
| 279 | +// Note: this function was copied from sycl-post-link/ModuleSplitter.cpp and the |
| 280 | +// definition of entry point (i.e. implementation of the function) should be in |
| 281 | +// sync between those two. |
| 282 | +bool isSpirvSyclBuiltin(StringRef FName) { |
| 283 | + if (!FName.consume_front("_Z")) |
| 284 | + return false; |
| 285 | + // now skip the digits |
| 286 | + FName = FName.drop_while([](char C) { return std::isdigit(C); }); |
| 287 | + |
| 288 | + return FName.startswith("__spirv_") || FName.startswith("__sycl_"); |
| 289 | +} |
| 290 | + |
| 291 | +bool isEntryPoint(const Function &F) { |
| 292 | + // Skip declarations, we can't analyze them |
| 293 | + if (F.isDeclaration()) |
| 294 | + return false; |
| 295 | + |
| 296 | + // Kernels are always considered to be entry points |
| 297 | + if (CallingConv::SPIR_KERNEL == F.getCallingConv()) |
| 298 | + return true; |
| 299 | + |
| 300 | + // FIXME: sycl-post-link allows to disable treating SYCL_EXTERNAL's as entry |
| 301 | + // points - do we need similar flag here? |
| 302 | + // SYCL_EXTERNAL functions with sycl-module-id attribute |
| 303 | + // are also considered as entry points (except __spirv_* and __sycl_* |
| 304 | + // functions) |
| 305 | + return F.hasFnAttribute("sycl-module-id") && !isSpirvSyclBuiltin(F.getName()); |
| 306 | +} |
| 307 | + |
| 308 | +/// Returns a map of functions with corresponding used aspects. |
| 309 | +FunctionToAspectsMapTy |
| 310 | +buildFunctionsToAspectsMap(Module &M, TypeToAspectsMapTy &TypesWithAspects) { |
| 311 | + FunctionToAspectsMapTy FunctionToAspects; |
| 312 | + CallGraphTy CG; |
| 313 | + std::vector<Function *> EntryPoints; |
| 314 | + for (Function &F : M.functions()) { |
| 315 | + if (F.isDeclaration()) |
| 316 | + continue; |
| 317 | + |
| 318 | + if (isEntryPoint(F)) |
| 319 | + EntryPoints.push_back(&F); |
| 320 | + |
| 321 | + processFunction(F, FunctionToAspects, TypesWithAspects, CG); |
| 322 | + } |
| 323 | + |
| 324 | + SmallPtrSet<const Function *, 16> Visited; |
| 325 | + for (Function *F : EntryPoints) |
| 326 | + propagateAspectsThroughCG(F, CG, FunctionToAspects, Visited); |
| 327 | + |
| 328 | + return FunctionToAspects; |
| 329 | +} |
| 330 | + |
| 331 | +} // anonymous namespace |
| 332 | + |
| 333 | +PreservedAnalyses |
| 334 | +SYCLPropagateAspectsUsagePass::run(Module &M, ModuleAnalysisManager &MAM) { |
| 335 | + TypeToAspectsMapTy TypesWithAspects = getTypesThatUseAspectsFromMetadata(M); |
| 336 | + propagateAspectsToOtherTypesInModule(M, TypesWithAspects); |
| 337 | + |
| 338 | + FunctionToAspectsMapTy FunctionToAspects = |
| 339 | + buildFunctionsToAspectsMap(M, TypesWithAspects); |
| 340 | + |
| 341 | + createUsedAspectsMetadataForFunctions(FunctionToAspects); |
| 342 | + // FIXME: check and diagnose if a function uses an aspect which was not |
| 343 | + // declared through [[sycl::device_has()]] attribute |
| 344 | + |
| 345 | + return PreservedAnalyses::all(); |
| 346 | +} |
0 commit comments