3535#include " llvm/Support/Debug.h"
3636#include " llvm/Support/ErrorHandling.h"
3737#include " llvm/Support/raw_ostream.h"
38+ #include < algorithm>
3839#include < cassert>
3940#include < cstdint>
4041
@@ -80,8 +81,10 @@ class HexagonOptAddrMode : public MachineFunctionPass {
8081private:
8182 using MISetType = DenseSet<MachineInstr *>;
8283 using InstrEvalMap = DenseMap<MachineInstr *, bool >;
84+ DenseSet<MachineInstr *> ProcessedAddiInsts;
8385
8486 MachineRegisterInfo *MRI = nullptr ;
87+ const TargetRegisterInfo *TRI = nullptr ;
8588 const HexagonInstrInfo *HII = nullptr ;
8689 const HexagonRegisterInfo *HRI = nullptr ;
8790 MachineDominatorTree *MDT = nullptr ;
@@ -93,6 +96,15 @@ class HexagonOptAddrMode : public MachineFunctionPass {
9396 bool processBlock (NodeAddr<BlockNode *> BA);
9497 bool xformUseMI (MachineInstr *TfrMI, MachineInstr *UseMI,
9598 NodeAddr<UseNode *> UseN, unsigned UseMOnum);
99+ bool processAddBases (NodeAddr<StmtNode *> AddSN, MachineInstr *AddMI);
100+ bool usedInLoadStore (NodeAddr<StmtNode *> CurrentInstSN, int64_t NewOffset);
101+ bool findFirstReachedInst (
102+ MachineInstr *AddMI,
103+ std::vector<std::pair<NodeAddr<StmtNode *>, NodeAddr<UseNode *>>>
104+ &AddiList,
105+ NodeAddr<StmtNode *> &UseSN);
106+ bool updateAddBases (MachineInstr *CurrentMI, MachineInstr *FirstReachedMI,
107+ int64_t NewOffset);
96108 bool processAddUses (NodeAddr<StmtNode *> AddSN, MachineInstr *AddMI,
97109 const NodeList &UNodeList);
98110 bool updateAddUses (MachineInstr *AddMI, MachineInstr *UseMI);
@@ -207,8 +219,17 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN,
207219 return false ;
208220
209221 for (auto &Mo : UseMI.operands ())
222+ // Is it a frame index?
210223 if (Mo.isFI ())
211224 return false ;
225+ // Is the OffsetReg definition actually reaches UseMI?
226+ if (!UseMI.getParent ()->isLiveIn (OffsetReg) &&
227+ MI.getParent () != UseMI.getParent ()) {
228+ LLVM_DEBUG (dbgs () << " The offset reg " << printReg (OffsetReg, TRI)
229+ << " is NOT live in to MBB "
230+ << UseMI.getParent ()->getName () << " \n " );
231+ return false ;
232+ }
212233 }
213234 return true ;
214235}
@@ -327,6 +348,14 @@ bool HexagonOptAddrMode::isSafeToExtLR(NodeAddr<StmtNode *> SN,
327348 if ((LRExtRegDN.Addr ->getFlags () & NodeAttrs::PhiRef) &&
328349 MI->getParent () != UseMI->getParent ())
329350 return false ;
351+ // Is the OffsetReg definition actually reaches UseMI?
352+ if (!UseMI->getParent ()->isLiveIn (LRExtReg) &&
353+ MI->getParent () != UseMI->getParent ()) {
354+ LLVM_DEBUG (dbgs () << " The LRExtReg reg " << printReg (LRExtReg, TRI)
355+ << " is NOT live in to MBB "
356+ << UseMI->getParent ()->getName () << " \n " );
357+ return false ;
358+ }
330359 }
331360 return true ;
332361}
@@ -344,6 +373,12 @@ bool HexagonOptAddrMode::isValidOffset(MachineInstr *MI, int Offset) {
344373 case Hexagon::V6_vgathermhwq_pseudo:
345374 return HII->isValidOffset (MI->getOpcode (), Offset, HRI, false );
346375 default :
376+ if (HII->getAddrMode (*MI) == HexagonII::BaseImmOffset) {
377+ // The immediates are mentioned in multiples of vector counts
378+ unsigned AlignMask = HII->getMemAccessSize (*MI) - 1 ;
379+ if ((AlignMask & Offset) == 0 )
380+ return HII->isValidOffset (MI->getOpcode (), Offset, HRI, false );
381+ }
347382 return false ;
348383 }
349384 }
@@ -414,6 +449,264 @@ unsigned HexagonOptAddrMode::getOffsetOpPosition(MachineInstr *MI) {
414449 }
415450}
416451
452+ bool HexagonOptAddrMode::usedInLoadStore (NodeAddr<StmtNode *> CurrentInstSN,
453+ int64_t NewOffset) {
454+ NodeList LoadStoreUseList;
455+
456+ getAllRealUses (CurrentInstSN, LoadStoreUseList);
457+ bool FoundLoadStoreUse = false ;
458+ for (auto I = LoadStoreUseList.begin (), E = LoadStoreUseList.end (); I != E;
459+ ++I) {
460+ NodeAddr<UseNode *> UN = *I;
461+ NodeAddr<StmtNode *> SN = UN.Addr ->getOwner (*DFG);
462+ MachineInstr *LoadStoreMI = SN.Addr ->getCode ();
463+ const MCInstrDesc &MID = LoadStoreMI->getDesc ();
464+ if ((MID.mayLoad () || MID.mayStore ()) &&
465+ isValidOffset (LoadStoreMI, NewOffset)) {
466+ FoundLoadStoreUse = true ;
467+ break ;
468+ }
469+ }
470+ return FoundLoadStoreUse;
471+ }
472+
473+ bool HexagonOptAddrMode::findFirstReachedInst (
474+ MachineInstr *AddMI,
475+ std::vector<std::pair<NodeAddr<StmtNode *>, NodeAddr<UseNode *>>> &AddiList,
476+ NodeAddr<StmtNode *> &UseSN) {
477+ // Find the very first Addi instruction in the current basic block among the
478+ // AddiList This is the Addi that should be preserved so that we do not need
479+ // to handle the complexity of moving instructions
480+ //
481+ // TODO: find Addi instructions across basic blocks
482+ //
483+ // TODO: Try to remove this and add a solution that optimizes the number of
484+ // Addi instructions that can be modified.
485+ // This change requires choosing the Addi with the median offset value, but
486+ // would also require moving that instruction above the others. Since this
487+ // pass runs after register allocation, there might be multiple cases that
488+ // need to be handled if we move instructions around
489+ MachineBasicBlock *CurrentMBB = AddMI->getParent ();
490+ for (auto &InstIter : *CurrentMBB) {
491+ // If the instruction is an Addi and is in the AddiList
492+ if (InstIter.getOpcode () == Hexagon::A2_addi) {
493+ auto Iter = std::find_if (
494+ AddiList.begin (), AddiList.end (), [&InstIter](const auto &SUPair) {
495+ return SUPair.first .Addr ->getCode () == &InstIter;
496+ });
497+ if (Iter != AddiList.end ()) {
498+ UseSN = Iter->first ;
499+ return true ;
500+ }
501+ }
502+ }
503+ return false ;
504+ }
505+
506+ // This function tries to modify the immediate value in Hexagon::Addi
507+ // instructions, so that the immediates could then be moved into a load/store
508+ // instruction with offset and the add removed completely when we call
509+ // processAddUses
510+ //
511+ // For Example, If we have the below sequence of instructions:
512+ //
513+ // r1 = add(r2,#1024)
514+ // ...
515+ // r3 = add(r2,#1152)
516+ // ...
517+ // r4 = add(r2,#1280)
518+ //
519+ // Where the register r2 has the same reaching definition, They get modified to
520+ // the below sequence:
521+ //
522+ // r1 = add(r2,#1024)
523+ // ...
524+ // r3 = add(r1,#128)
525+ // ...
526+ // r4 = add(r1,#256)
527+ //
528+ // The below change helps the processAddUses method to later move the
529+ // immediates #128 and #256 into a load/store instruction that can take an
530+ // offset, like the Vd = mem(Rt+#s4)
531+ bool HexagonOptAddrMode::processAddBases (NodeAddr<StmtNode *> AddSN,
532+ MachineInstr *AddMI) {
533+
534+ bool Changed = false ;
535+
536+ LLVM_DEBUG (dbgs () << " \n\t\t [Processing Addi]: " << *AddMI << " \n " );
537+
538+ auto Processed =
539+ [](const MachineInstr *MI,
540+ const DenseSet<MachineInstr *> &ProcessedAddiInsts) -> bool {
541+ // If we've already processed this Addi, just return
542+ if (ProcessedAddiInsts.find (MI) != ProcessedAddiInsts.end ()) {
543+ LLVM_DEBUG (dbgs () << " \t\t\t Addi already found in ProcessedAddiInsts: "
544+ << *MI << " \n\t\t\t Skipping..." );
545+ return true ;
546+ }
547+ return false ;
548+ };
549+
550+ if (Processed (AddMI, ProcessedAddiInsts))
551+ return Changed;
552+ ProcessedAddiInsts.insert (AddMI);
553+
554+ // Get the base register that would be shared by other Addi Intructions
555+ Register BaseReg = AddMI->getOperand (1 ).getReg ();
556+
557+ // Store a list of all Addi instructions that share the above common base
558+ // register
559+ std::vector<std::pair<NodeAddr<StmtNode *>, NodeAddr<UseNode *>>> AddiList;
560+
561+ NodeId UAReachingDefID;
562+ // Find the UseNode that contains the base register and it's reachingDef
563+ for (NodeAddr<UseNode *> UA : AddSN.Addr ->members_if (DFG->IsUse , *DFG)) {
564+ RegisterRef URR = UA.Addr ->getRegRef (*DFG);
565+ if (BaseReg != URR.Reg )
566+ continue ;
567+
568+ UAReachingDefID = UA.Addr ->getReachingDef ();
569+ NodeAddr<DefNode *> UADef = DFG->addr <DefNode *>(UAReachingDefID);
570+ if (!UAReachingDefID || UADef.Addr ->getFlags () & NodeAttrs::PhiRef) {
571+ LLVM_DEBUG (dbgs () << " \t\t\t Could not find reachingDef. Skipping...\n " );
572+ return false ;
573+ }
574+ }
575+
576+ NodeAddr<DefNode *> UAReachingDef = DFG->addr <DefNode *>(UAReachingDefID);
577+ NodeAddr<StmtNode *> ReachingDefStmt = UAReachingDef.Addr ->getOwner (*DFG);
578+
579+ // If the reaching definition is a predicated instruction, this might not be
580+ // the only definition of our base register, so return immediately.
581+ MachineInstr *ReachingDefInstr = ReachingDefStmt.Addr ->getCode ();
582+ if (HII->isPredicated (*ReachingDefInstr))
583+ return false ;
584+
585+ NodeList AddiUseList;
586+
587+ // Find all Addi instructions that share the same base register and add them
588+ // to the AddiList
589+ getAllRealUses (ReachingDefStmt, AddiUseList);
590+ for (auto I = AddiUseList.begin (), E = AddiUseList.end (); I != E; ++I) {
591+ NodeAddr<UseNode *> UN = *I;
592+ NodeAddr<StmtNode *> SN = UN.Addr ->getOwner (*DFG);
593+ MachineInstr *MI = SN.Addr ->getCode ();
594+
595+ // Only add instructions if it's an Addi and it's not already processed.
596+ if (MI->getOpcode () == Hexagon::A2_addi &&
597+ !(MI != AddMI && Processed (MI, ProcessedAddiInsts))) {
598+ AddiList.push_back ({SN, UN});
599+
600+ // This ensures that we process each instruction only once
601+ ProcessedAddiInsts.insert (MI);
602+ }
603+ }
604+
605+ // If there's only one Addi instruction, nothing to do here
606+ if (AddiList.size () <= 1 )
607+ return Changed;
608+
609+ NodeAddr<StmtNode *> FirstReachedUseSN;
610+ // Find the first reached use of Addi instruction from the list
611+ if (!findFirstReachedInst (AddMI, AddiList, FirstReachedUseSN))
612+ return Changed;
613+
614+ // If we reach this point we know that the StmtNode FirstReachedUseSN is for
615+ // an Addi instruction. So, we're guaranteed to have just one DefNode, and
616+ // hence we can access the front() directly without checks
617+ NodeAddr<DefNode *> FirstReachedUseDN =
618+ FirstReachedUseSN.Addr ->members_if (DFG->IsDef , *DFG).front ();
619+
620+ MachineInstr *FirstReachedMI = FirstReachedUseSN.Addr ->getCode ();
621+ const MachineOperand FirstReachedMIImmOp = FirstReachedMI->getOperand (2 );
622+ if (!FirstReachedMIImmOp.isImm ())
623+ return false ;
624+
625+ for (auto &I : AddiList) {
626+ NodeAddr<StmtNode *> CurrentInstSN = I.first ;
627+ NodeAddr<UseNode *> CurrentInstUN = I.second ;
628+
629+ MachineInstr *CurrentMI = CurrentInstSN.Addr ->getCode ();
630+ MachineOperand &CurrentMIImmOp = CurrentMI->getOperand (2 );
631+
632+ int64_t NewOffset;
633+
634+ // Even though we know it's an Addi instruction, the second operand could be
635+ // a global value and not an immediate
636+ if (!CurrentMIImmOp.isImm ())
637+ continue ;
638+
639+ NewOffset = CurrentMIImmOp.getImm () - FirstReachedMIImmOp.getImm ();
640+
641+ // This is the first occuring Addi, so skip modifying this
642+ if (CurrentMI == FirstReachedMI) {
643+ continue ;
644+ }
645+
646+ if (CurrentMI->getParent () != FirstReachedMI->getParent ())
647+ continue ;
648+
649+ // Modify the Addi instruction only if it could be used to modify a
650+ // future load/store instruction and get removed
651+ //
652+ // This check is needed because, if we modify the current Addi instruction
653+ // we create RAW dependence between the FirstReached Addi and the current
654+ // one, which could result in extra packets. So we only do this change if
655+ // we know the current Addi would get removed later
656+ if (!usedInLoadStore (CurrentInstSN, NewOffset)) {
657+ return false ;
658+ }
659+
660+ // Verify whether the First Addi's definition register is still live when
661+ // we reach the current Addi
662+ RegisterRef FirstReachedDefRR = FirstReachedUseDN.Addr ->getRegRef (*DFG);
663+ NodeAddr<InstrNode *> CurrentAddiIN = CurrentInstUN.Addr ->getOwner (*DFG);
664+ NodeAddr<RefNode *> NearestAA =
665+ LV->getNearestAliasedRef (FirstReachedDefRR, CurrentAddiIN);
666+ if ((DFG->IsDef (NearestAA) && NearestAA.Id != FirstReachedUseDN.Id ) ||
667+ (!DFG->IsDef (NearestAA) &&
668+ NearestAA.Addr ->getReachingDef () != FirstReachedUseDN.Id )) {
669+ // Found another definition of FirstReachedDef
670+ LLVM_DEBUG (dbgs () << " \t\t\t Could not modify below Addi since the first "
671+ " defined Addi register was redefined\n " );
672+ continue ;
673+ }
674+
675+ MachineOperand CurrentMIBaseOp = CurrentMI->getOperand (1 );
676+ if (CurrentMIBaseOp.getReg () != FirstReachedMI->getOperand (1 ).getReg ()) {
677+ continue ;
678+ }
679+
680+ // If we reached this point, then we can modify MI to use the result of
681+ // FirstReachedMI
682+ Changed |= updateAddBases (CurrentMI, FirstReachedMI, NewOffset);
683+
684+ // Update the reachingDef of the Current AddI use after change
685+ CurrentInstUN.Addr ->linkToDef (CurrentInstUN.Id , FirstReachedUseDN);
686+ }
687+
688+ return Changed;
689+ }
690+
691+ bool HexagonOptAddrMode::updateAddBases (MachineInstr *CurrentMI,
692+ MachineInstr *FirstReachedMI,
693+ int64_t NewOffset) {
694+ LLVM_DEBUG (dbgs () << " [About to modify the Addi]: " << *CurrentMI << " \n " );
695+ const MachineOperand FirstReachedDef = FirstReachedMI->getOperand (0 );
696+ Register FirstDefRegister = FirstReachedDef.getReg ();
697+
698+ MachineOperand &CurrentMIBaseOp = CurrentMI->getOperand (1 );
699+ MachineOperand &CurrentMIImmOp = CurrentMI->getOperand (2 );
700+
701+ CurrentMIBaseOp.setReg (FirstDefRegister);
702+ CurrentMIBaseOp.setIsUndef (FirstReachedDef.isUndef ());
703+ CurrentMIBaseOp.setImplicit (FirstReachedDef.isImplicit ());
704+ CurrentMIImmOp.setImm (NewOffset);
705+ ProcessedAddiInsts.insert (CurrentMI);
706+ MRI->clearKillFlags (FirstDefRegister);
707+ return true ;
708+ }
709+
417710bool HexagonOptAddrMode::processAddUses (NodeAddr<StmtNode *> AddSN,
418711 MachineInstr *AddMI,
419712 const NodeList &UNodeList) {
@@ -737,7 +1030,6 @@ bool HexagonOptAddrMode::changeAddAsl(NodeAddr<UseNode *> AddAslUN,
7371030
7381031 for (unsigned i = OpStart; i < OpEnd; ++i)
7391032 MIB.add (UseMI->getOperand (i));
740-
7411033 Deleted.insert (UseMI);
7421034 }
7431035
@@ -782,6 +1074,8 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) {
7821074 << " ]: " << *MI << " \n\t [InstrNode]: "
7831075 << Print<NodeAddr<InstrNode *>>(IA, *DFG) << ' \n ' );
7841076
1077+ if (MI->getOpcode () == Hexagon::A2_addi)
1078+ Changed |= processAddBases (SA, MI);
7851079 NodeList UNodeList;
7861080 getAllRealUses (SA, UNodeList);
7871081
@@ -869,6 +1163,7 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
8691163 bool Changed = false ;
8701164 auto &HST = MF.getSubtarget <HexagonSubtarget>();
8711165 MRI = &MF.getRegInfo ();
1166+ TRI = MF.getSubtarget ().getRegisterInfo ();
8721167 HII = HST.getInstrInfo ();
8731168 HRI = HST.getRegisterInfo ();
8741169 const auto &MDF = getAnalysis<MachineDominanceFrontier>();
@@ -885,6 +1180,7 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
8851180 LV = &L;
8861181
8871182 Deleted.clear ();
1183+ ProcessedAddiInsts.clear ();
8881184 NodeAddr<FuncNode *> FA = DFG->getFunc ();
8891185 LLVM_DEBUG (dbgs () << " ==== [RefMap#]=====:\n "
8901186 << Print<NodeAddr<FuncNode *>>(FA, *DFG) << " \n " );
0 commit comments