@@ -155,6 +155,7 @@ SPDX-License-Identifier: MIT
155155#include  " llvm/GenXIntrinsics/GenXIntrOpts.h" 
156156#include  " llvm/GenXIntrinsics/GenXIntrinsics.h" 
157157#include  " llvm/GenXIntrinsics/GenXMetadata.h" 
158+ #include  " llvm/InitializePasses.h" 
158159#include  " llvm/IR/Constants.h" 
159160#include  " llvm/IR/DebugInfo.h" 
160161#include  " llvm/IR/DiagnosticInfo.h" 
@@ -260,6 +261,9 @@ class ISPCSimdCFLowering : public ModulePass {
260261
261262//  The CM SIMD CF lowering pass (a function pass)
262263class  CMSimdCFLowering  : public  FunctionPass  {
264+   using  GListType = std::vector<llvm::GlobalVariable*>;
265+   std::map<const  Function *, DominatorTree *> DTs;
266+   GListType VolList;
263267public: 
264268  static  char  ID;
265269
@@ -273,13 +277,17 @@ class CMSimdCFLowering : public FunctionPass {
273277  virtual  bool  doInitialization (Module &M) override ;
274278  virtual  bool  runOnFunction (Function &F) override  { return  false ; }
275279private: 
280+   DominatorTree *getDomTree (Function *F);
281+   bool  isGlobalInterseptVol (GlobalVariable &G, const  GListType& VolList);
282+   void  initializeVolatileGlobals (Module &M);
276283  void  calculateVisitOrder (Module *M, std::vector<Function *> *VisitOrder);
277284};
278285
279286} //  namespace
280287
281288char  CMSimdCFLowering::ID = 0 ;
282289INITIALIZE_PASS_BEGIN (CMSimdCFLowering, " cmsimdcflowering" " Lower CM SIMD control flow" false , false )
290+ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
283291INITIALIZE_PASS_END (CMSimdCFLowering, " cmsimdcflowering" " Lower CM SIMD control flow" false , false )
284292
285293Pass *llvm::createCMSimdCFLoweringPass() { return  new  CMSimdCFLowering (); }
@@ -300,29 +308,70 @@ bool ISPCSimdCFLowering::runOnModule(Module &M) {
300308    return  CMSimdCFLowering ().doInitialization (M);
301309}
302310
311+ 
312+ DominatorTree *CMSimdCFLowering::getDomTree (Function *F)
313+ {
314+   if  (!DTs[F]) {
315+     auto  DT = new  DominatorTree;
316+     DT->recalculate (*F);
317+     DTs[F] = DT;
318+   }
319+   return  DTs[F];
320+ }
321+ 
303322/* **********************************************************************
304-  * doInitialization : per-module initialization for CM simd CF lowering 
305-  * 
306-  * Really we want a module pass for CM simd CF lowering. But, without modifying 
307-  * llvm's PassManagerBuilder, the earliest place to insert a pass is 
308-  * EP_EarlyAsPossible, which must be a function pass. So, we do our 
309-  * per-module processing here in doInitialization. 
323+  * isGlobalInterseptVol : Check interseption between global var and 
324+  * a list of global volatile variables 
310325 */  
311- bool  CMSimdCFLowering::doInitialization (Module &M)
312- {
313- #if  0 
314-   for (auto &F : M.getFunctionList()) {
315-     if (F.hasFnAttribute("CMGenxSIMT")) {
316-       if (F.hasFnAttribute(Attribute::AlwaysInline)) {
317-         F.removeFnAttr(Attribute::AlwaysInline);
318-         F.removeFnAttr(Attribute::InlineHint);
319-         F.addFnAttr("CMGenxInline");
320-         F.addFnAttr(Attribute::NoInline);
326+ bool  CMSimdCFLowering::isGlobalInterseptVol (GlobalVariable &G, const  GListType& VolList) {
327+   for  (auto  UI = G.user_begin (), UE = G.user_end (); UI != UE; UI++) {
328+     llvm::Instruction *U = dyn_cast<Instruction>(*UI);
329+     if  (!U)
330+       continue ;
331+     auto  *F = U->getParent ()->getParent ();
332+     auto  *DT = getDomTree (F);
333+     for (auto  &VG : VolList) {
334+       for (auto  SUI = VG->user_begin (), SUIE = VG->user_end (); SUI != SUIE;SUI++) {
335+         auto  *I = dyn_cast<Instruction>(*SUI);
336+         if  (I && DT->dominates (I,&*U)) {
337+           return  true ;
338+         }
321339      }
322340    }
323341  }
324- #endif 
342+   return  false ;
343+ }
325344
345+ /* **********************************************************************
346+  * initializeVolatileGlobals : Check and modify global variables for vc 
347+  * 
348+  * Special case for volatile globals, because there is agreement - they 
349+  * will be put to hw-register (this agreement give a lot of perfomance). 
350+  * That's why we need save load and store instruction until the end of 
351+  * vc-pipeline. And for reach the goal - they will be replaced by 
352+  * genx.vload/vstore instructions. 
353+  * But if volatile global overlap other non-volatile global variable 
354+  * it generate issue in register allocator - because it will be put to 
355+  * same register. It is special case in coalescing, that's why here 
356+  * we mark them as volatile too. 
357+  */  
358+ void  CMSimdCFLowering::initializeVolatileGlobals (Module &M) {
359+   //  Analise interseption between globals
360+   for  (auto  &G : M.getGlobalList ()) {
361+     if  (G.hasAttribute (genx::FunctionMD::GenXVolatile)) {
362+       VolList.push_back (&G);
363+     }
364+   }
365+   //  If non-volatile global intersept with volatile global
366+   //  mark him volatile too
367+   for  (auto  &G : M.getGlobalList ()) {
368+     if  (!G.hasAttribute (genx::FunctionMD::GenXVolatile)) {
369+       if  (isGlobalInterseptVol (G, VolList))
370+         G.addAttribute (genx::FunctionMD::GenXVolatile);
371+     }
372+   }
373+ 
374+   //  Replace instructions to save them untill the end of vc
326375  for  (auto  &G : M.getGlobalList ()) {
327376    if  (!G.hasAttribute (genx::FunctionMD::GenXVolatile))
328377      continue ;
@@ -390,6 +439,35 @@ bool CMSimdCFLowering::doInitialization(Module &M)
390439      }
391440    }
392441  }
442+ }
443+ 
444+ 
445+ /* **********************************************************************
446+  * doInitialization : per-module initialization for CM simd CF lowering 
447+  * 
448+  * Really we want a module pass for CM simd CF lowering. But, without modifying 
449+  * llvm's PassManagerBuilder, the earliest place to insert a pass is 
450+  * EP_EarlyAsPossible, which must be a function pass. So, we do our 
451+  * per-module processing here in doInitialization. 
452+  */  
453+ bool  CMSimdCFLowering::doInitialization (Module &M)
454+ {
455+   VolList.clear ();
456+   DTs.clear ();
457+ #if  0 
458+   for (auto &F : M.getFunctionList()) {
459+     if (F.hasFnAttribute("CMGenxSIMT")) {
460+       if (F.hasFnAttribute(Attribute::AlwaysInline)) {
461+         F.removeFnAttr(Attribute::AlwaysInline);
462+         F.removeFnAttr(Attribute::InlineHint);
463+         F.addFnAttr("CMGenxInline");
464+         F.addFnAttr(Attribute::NoInline);
465+       }
466+     }
467+   }
468+ #endif 
469+ 
470+   initializeVolatileGlobals (M);
393471
394472  //  See if simd CF is used anywhere in this module.
395473  //  We have to try each overload of llvm.genx.simdcf.any separately.
@@ -612,7 +690,7 @@ void CMSimdCFLower::determinePredicatedBlocks()
612690      //  Get BlockL, the closest common postdominator.
613691      auto  BlockL = PDT.findNearestCommonDominator (BlockM, BlockN);
614692      if  (BlockL == BlockM) {
615-         //  need to include BlockM into the chain  
693+         //  need to include BlockM into the chain
616694        //  if the branch is the do-while back-edge
617695        if  (auto  ParentNode = PDT.getNode (BlockM))
618696          if  (auto  IDom = ParentNode->getIDom ())
@@ -1075,7 +1153,7 @@ void CMSimdCFLower::predicateInst(Instruction *Inst, unsigned SimdWidth) {
10751153    //  An IntrNoMem intrinsic is an ALU intrinsic and can be ignored.
10761154    if  (Callee->doesNotAccessMemory () || CI->arg_size () == 0 )
10771155      return ;
1078-     //  no predication for intrinsic marked as ISPC uniform,  
1156+     //  no predication for intrinsic marked as ISPC uniform,
10791157	//  for example, atomic and oword_store used in printf
10801158    if  (CI->getMetadata (" ISPC-Uniform" nullptr )
10811159      return ;
@@ -1246,11 +1324,11 @@ void CMSimdCFLower::predicateStore(Instruction *SI, unsigned SimdWidth)
12461324  auto  StoreVT = dyn_cast<VectorType>(V->getType ());
12471325  //  Scalar store not predicated
12481326  if  (!StoreVT || VCINTR::VectorType::getNumElements (StoreVT) == 1 )
1249-     return ;  
1327+     return ;
12501328  //  no predication for ISPC uniform store
12511329  if  (SI->getMetadata (" ISPC-Uniform" nullptr )
12521330    return ;
1253-   //  local-variable store that is only used within the same basic block  
1331+   //  local-variable store that is only used within the same basic block
12541332  //  do not need predicate
12551333  if  (isSingleBlockLocalStore (SI))
12561334    return ;
@@ -1749,7 +1827,7 @@ void CMSimdCFLower::lowerUnmaskOps() {
17491827          auto  Savemask = CallInst::Create (SavemaskFunc, Args, " savemask" 
17501828          Savemask->setDebugLoc (DL);
17511829          //  the use should be the store for savemask
1752-           CIB->replaceAllUsesWith (Savemask);  
1830+           CIB->replaceAllUsesWith (Savemask);
17531831          Type *Ty1s[] = {OldEM->getType ()};
17541832          auto  UnmaskFunc = GenXIntrinsic::getGenXDeclaration (
17551833              BB->getParent ()->getParent (), GenXIntrinsic::genx_simdcf_unmask,
0 commit comments