ROCm
diff --git a/‎lib/Target/AArch64/AArch64RegisterBankInfo.cpp
Lines changed: 44 additions & 26 deletions b/‎lib/Target/AArch64/AArch64RegisterBankInfo.cpp
Lines changed: 44 additions & 26 deletions
diff --git a/‎lib/Target/AArch64/AArch64RegisterBankInfo.h
Lines changed: 10 additions & 0 deletions b/‎lib/Target/AArch64/AArch64RegisterBankInfo.h
Lines changed: 10 additions & 0 deletions
diff --git a/‎lib/Target/AArch64/AArch64Subtarget.cpp
Lines changed: 4 additions & 3 deletions b/‎lib/Target/AArch64/AArch64Subtarget.cpp
Lines changed: 4 additions & 3 deletions
diff --git a/‎lib/Target/AArch64/AArch64Subtarget.h
Lines changed: 2 additions & 4 deletions b/‎lib/Target/AArch64/AArch64Subtarget.h
Lines changed: 2 additions & 4 deletions
diff --git a/‎lib/Target/AMDGPU/AMDGPU.h
Lines changed: 3 additions & 1 deletion b/‎lib/Target/AMDGPU/AMDGPU.h
Lines changed: 3 additions & 1 deletion
diff --git a/‎lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
Lines changed: 2 additions & 2 deletions b/‎lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Lines changed: 6 additions & 6 deletions b/‎lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Lines changed: 6 additions & 6 deletions
diff --git a/‎lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Lines changed: 1 addition & 1 deletion b/‎lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎lib/Target/Hexagon/RDFGraph.cpp
Lines changed: 14 additions & 11 deletions b/‎lib/Target/Hexagon/RDFGraph.cpp
Lines changed: 14 additions & 11 deletions
diff --git a/‎lib/Target/Hexagon/RDFLiveness.cpp
Lines changed: 7 additions & 29 deletions b/‎lib/Target/Hexagon/RDFLiveness.cpp
Lines changed: 7 additions & 29 deletions
@@ -291,6 +291,47 @@ static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
   return false;
 }
 
+RegisterBankInfo::InstructionMapping
+AArch64RegisterBankInfo::getSameKindOfOperandsMapping(const MachineInstr &MI) {
+  const unsigned Opc = MI.getOpcode();
+  const MachineFunction &MF = *MI.getParent()->getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  unsigned NumOperands = MI.getNumOperands();
+  assert(NumOperands <= 3 &&
+         "This code is for instructions with 3 or less operands");
+
+  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+  unsigned Size = Ty.getSizeInBits();
+  bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
+
+#ifndef NDEBUG
+  // Make sure all the operands are using similar size and type.
+  // Should probably be checked by the machine verifier.
+  // This code won't catch cases where the number of lanes is
+  // different between the operands.
+  // If we want to go to that level of details, it is probably
+  // best to check that the types are the same, period.
+  // Currently, we just check that the register banks are the same
+  // for each types.
+  for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
+    LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
+    assert(AArch64::getRegBankBaseIdxOffset(OpTy.getSizeInBits()) ==
+               AArch64::getRegBankBaseIdxOffset(Size) &&
+           "Operand has incompatible size");
+    bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
+    (void)OpIsFPR;
+    assert(IsFPR == OpIsFPR && "Operand has incompatible type");
+  }
+#endif // End NDEBUG.
+
+  AArch64::PartialMappingIdx RBIdx =
+      IsFPR ? AArch64::FirstFPR : AArch64::FirstGPR;
+
+  return InstructionMapping{DefaultMappingID, 1,
+                            AArch64::getValueMapping(RBIdx, Size), NumOperands};
+}
+
 RegisterBankInfo::InstructionMapping
 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   const unsigned Opc = MI.getOpcode();
@@ -305,7 +346,6 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       return Mapping;
   }
 
-  unsigned NumOperands = MI.getNumOperands();
   switch (Opc) {
     // G_{F|S|U}REM are not listed because they are not legal.
     // Arithmetic ops.
@@ -327,35 +367,13 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case TargetOpcode::G_FADD:
   case TargetOpcode::G_FSUB:
   case TargetOpcode::G_FMUL:
-  case TargetOpcode::G_FDIV:{
-    assert(NumOperands == 3 && "This code is for 3-operands instructions");
-
-    LLT Ty = MRI.getType(MI.getOperand(0).getReg());
-    unsigned Size = Ty.getSizeInBits();
-    // Make sure all the operands are using similar size.
-    // Should probably be checked by the machine verifier.
-    assert(AArch64::getRegBankBaseIdxOffset(
-               MRI.getType(MI.getOperand(1).getReg()).getSizeInBits()) ==
-               AArch64::getRegBankBaseIdxOffset(Size) &&
-           "Operand 1 has incompatible size");
-    assert(AArch64::getRegBankBaseIdxOffset(
-               MRI.getType(MI.getOperand(2).getReg()).getSizeInBits()) ==
-               AArch64::getRegBankBaseIdxOffset(Size) &&
-           "Operand 2 has incompatible size");
-
-    bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
-
-    AArch64::PartialMappingIdx RBIdx =
-        IsFPR ? AArch64::FirstFPR : AArch64::FirstGPR;
-
-    return InstructionMapping{DefaultMappingID, 1,
-                              AArch64::getValueMapping(RBIdx, Size),
-                              NumOperands};
-  }
+  case TargetOpcode::G_FDIV:
+    return getSameKindOfOperandsMapping(MI);
   default:
     break;
   }
 
+  unsigned NumOperands = MI.getNumOperands();
   RegisterBankInfo::InstructionMapping Mapping =
       InstructionMapping{DefaultMappingID, 1, nullptr, NumOperands};
 
 
@@ -38,6 +38,16 @@ class AArch64RegisterBankInfo final : public RegisterBankInfo {
   /// See RegisterBankInfo::applyMapping.
   void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
 
+  /// Get an instruction mapping where all the operands map to
+  /// the same register bank and have similar size.
+  ///
+  /// \pre MI.getNumOperands() <= 3
+  ///
+  /// \return An InstructionMappings with a statically allocated
+  /// OperandsMapping.
+  static InstructionMapping
+  getSameKindOfOperandsMapping(const MachineInstr &MI);
+
 public:
   AArch64RegisterBankInfo(const TargetRegisterInfo &TRI);
   /// Get the cost of a copy from \p B to \p A, or put differently,
 
@@ -36,7 +36,8 @@ UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
                          "an address is ignored"), cl::init(false), cl::Hidden);
 
 AArch64Subtarget &
-AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) {
+AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
+                                                  StringRef CPUString) {
   // Determine default and user-specified characteristics
 
   if (CPUString.empty())
@@ -90,8 +91,8 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
                                    const std::string &FS,
                                    const TargetMachine &TM, bool LittleEndian)
     : AArch64GenSubtargetInfo(TT, CPU, FS), ReserveX18(TT.isOSDarwin()),
-      IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
-      InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
+      IsLittle(LittleEndian), TargetTriple(TT), FrameLowering(),
+      InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
       TLInfo(TM, *this), GISel() {}
 
 const CallLowering *AArch64Subtarget::getCallLowering() const {
 
@@ -97,9 +97,6 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
 
   bool IsLittle;
 
-  /// CPUString - String name of used CPU.
-  std::string CPUString;
-
   /// TargetTriple - What processor and OS we're targeting.
   Triple TargetTriple;
 
@@ -116,7 +113,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   /// initializeSubtargetDependencies - Initializes using CPUString and the
   /// passed in feature string so that we can use initializer lists for
   /// subtarget initialization.
-  AArch64Subtarget &initializeSubtargetDependencies(StringRef FS);
+  AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
+                                                    StringRef CPUString);
 
   /// Initialize properties based on the selected processor family.
   void initializeProperties();
 
@@ -12,6 +12,7 @@
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
 
 #include "llvm/IR/Instructions.h"
+#include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
 
@@ -89,7 +90,8 @@ void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
 extern char &AMDGPUPromoteAllocaID;
 
 Pass *createAMDGPUStructurizeCFGPass();
-FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
+FunctionPass *createAMDGPUISelDag(TargetMachine &TM,
+                                  CodeGenOpt::Level OptLevel);
 ModulePass *createAMDGPUAlwaysInlinePass();
 ModulePass *createAMDGPUOpenCLImageTypeLoweringPass();
 FunctionPass *createAMDGPUAnnotateUniformValues();
 
@@ -172,8 +172,8 @@ bool AMDGPUCodeGenPrepare::isI32Ty(const Type *T) const {
 }
 
 bool AMDGPUCodeGenPrepare::isSigned(const BinaryOperator &I) const {
-  return I.getOpcode() == Instruction::SDiv ||
-      I.getOpcode() == Instruction::SRem;
+  return I.getOpcode() == Instruction::AShr ||
+      I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::SRem;
 }
 
 bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {
 
@@ -46,7 +46,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   const AMDGPUSubtarget *Subtarget;
 
 public:
-  AMDGPUDAGToDAGISel(TargetMachine &TM);
+  explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(TM, OptLevel) {}
+
   virtual ~AMDGPUDAGToDAGISel();
   bool runOnMachineFunction(MachineFunction &MF) override;
   void Select(SDNode *N) override;
@@ -149,13 +151,11 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
 
 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
 // DAG, ready for instruction scheduling.
-FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
-  return new AMDGPUDAGToDAGISel(TM);
+FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM,
+                                        CodeGenOpt::Level OptLevel) {
+  return new AMDGPUDAGToDAGISel(TM, OptLevel);
 }
 
-AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
-    : SelectionDAGISel(TM) {}
-
 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
   Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
   return SelectionDAGISel::runOnMachineFunction(MF);
 
@@ -441,7 +441,7 @@ bool AMDGPUPassConfig::addPreISel() {
 }
 
 bool AMDGPUPassConfig::addInstSelector() {
-  addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
+  addPass(createAMDGPUISelDag(getAMDGPUTargetMachine(), getOptLevel()));
   return false;
 }
 
 
@@ -213,18 +213,21 @@ raw_ostream &operator<< (raw_ostream &OS,
   const MachineInstr &MI = *P.Obj.Addr->getCode();
   unsigned Opc = MI.getOpcode();
   OS << Print<NodeId>(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc);
-  // Print the target for calls (for readability).
-  if (MI.getDesc().isCall()) {
-    MachineInstr::const_mop_iterator Fn =
+  // Print the target for calls and branches (for readability).
+  if (MI.isCall() || MI.isBranch()) {
+    MachineInstr::const_mop_iterator T =
           find_if(MI.operands(),
                   [] (const MachineOperand &Op) -> bool {
-                    return Op.isGlobal() || Op.isSymbol();
+                    return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
                   });
-    if (Fn != MI.operands_end()) {
-      if (Fn->isGlobal())
-        OS << ' ' << Fn->getGlobal()->getName();
-      else if (Fn->isSymbol())
-        OS << ' ' << Fn->getSymbolName();
+    if (T != MI.operands_end()) {
+      OS << ' ';
+      if (T->isMBB())
+        OS << "BB#" << T->getMBB()->getNumber();
+      else if (T->isGlobal())
+        OS << T->getGlobal()->getName();
+      else if (T->isSymbol())
+        OS << T->getSymbolName();
     }
   }
   OS << " [" << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']';
@@ -263,8 +266,8 @@ raw_ostream &operator<< (raw_ostream &OS,
     }
   };
 
-  OS << Print<NodeId>(P.Obj.Id, P.G) << ": === BB#" << BB->getNumber()
-     << " === preds(" << NP << "): ";
+  OS << Print<NodeId>(P.Obj.Id, P.G) << ": --- BB#" << BB->getNumber()
+     << " --- preds(" << NP << "): ";
   for (auto I : BB->predecessors())
     Ns.push_back(I->getNumber());
   PrintBBs(Ns);
 
@@ -530,7 +530,7 @@ void Liveness::computePhiInfo() {
         RegisterSet UpReached;
         for (const std::pair<RegisterRef,NodeSet> &T : RUM) {
           RegisterRef R = T.first;
-          if (!isRestrictedToRef(PA, UA, R))
+          if (UA.Addr->getFlags() & NodeAttrs::Shadow)
             R = getRestrictedRegRef(UA);
           if (!MidDefs.hasCoverOf(R))
             UpReached.insert(R);
@@ -648,7 +648,7 @@ void Liveness::computeLiveIns() {
         auto &LOX = PhiLOX[PrA.Addr->getCode()];
         for (auto R : RUs) {
           RegisterRef RR = R.first;
-          if (!isRestrictedToRef(PA, UA, RR))
+          if (UA.Addr->getFlags() & NodeAttrs::Shadow)
             RR = getRestrictedRegRef(UA);
           // The restricted ref may be different from the ref that was
           // accessed in the "real use". This means that this phi use
@@ -770,29 +770,6 @@ void Liveness::resetKills(MachineBasicBlock *B) {
 }
 
 
-// For shadows, determine if RR is aliased to a reaching def of any other
-// shadow associated with RA. The register ref on RA will be "larger" than
-// each individual reaching def, and to determine the data-flow between defs
-// and uses of RR it may be necessary to visit all shadows. If RR is not
-// aliased to the reaching def of any other shadow, then visiting only RA
-// is sufficient. In that sense, the data flow of RR would be restricted to
-// the reference RA.
-// For non-shadows, this function returns "true".
-bool Liveness::isRestrictedToRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
-      RegisterRef RR) const {
-  NodeId Start = RA.Id;
-  for (NodeAddr<RefNode*> TA = DFG.getNextShadow(IA, RA);
-       TA.Id != 0 && TA.Id != Start; TA = DFG.getNextShadow(IA, TA)) {
-    NodeId RD = TA.Addr->getReachingDef();
-    if (RD == 0)
-      continue;
-    if (DFG.alias(RR, DFG.addr<DefNode*>(RD).Addr->getRegRef()))
-      return false;
-  }
-  return true;
-}
-
-
 RegisterRef Liveness::getRestrictedRegRef(NodeAddr<RefNode*> RA) const {
   assert(DFG.IsRef<NodeAttrs::Use>(RA));
   if (RA.Addr->getFlags() & NodeAttrs::Shadow) {
@@ -850,12 +827,13 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
   }
 
   if (Trace) {
-    dbgs() << LLVM_FUNCTION_NAME << " in BB#" << B->getNumber()
-           << " after recursion into";
+    dbgs() << "\n-- BB#" << B->getNumber() << ": " << LLVM_FUNCTION_NAME
+           << " after recursion into: {";
     for (auto I : *N)
       dbgs() << ' ' << I->getBlock()->getNumber();
-    dbgs() << "\n  LiveIn: " << Print<RefMap>(LiveIn, DFG);
-    dbgs() << "\n  Local:  " << Print<RegisterSet>(LiveMap[B], DFG) << '\n';
+    dbgs() << " }\n";
+    dbgs() << "  LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
+    dbgs() << "  Local:  " << Print<RegisterSet>(LiveMap[B], DFG) << '\n';
   }
 
   // Add phi uses that are live on exit from this block.
Original file line number	Diff line number	Diff line change
`@@ -172,8 +172,8 @@ bool AMDGPUCodeGenPrepare::isI32Ty(const Type *T) const {`
`172`	`172`	`}`
`173`	`173`
`174`	`174`	`bool AMDGPUCodeGenPrepare::isSigned(const BinaryOperator &I) const {`
`175`		`- return I.getOpcode() == Instruction::SDiv \|\|`
`176`		`- I.getOpcode() == Instruction::SRem;`
	`175`	`+ return I.getOpcode() == Instruction::AShr \|\|`
	`176`	`+ I.getOpcode() == Instruction::SDiv \|\| I.getOpcode() == Instruction::SRem;`
`177`	`177`	`}`
`178`	`178`
`179`	`179`	`bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {`
Original file line number	Diff line number	Diff line change
`@@ -441,7 +441,7 @@ bool AMDGPUPassConfig::addPreISel() {`
`441`	`441`	`}`
`442`	`442`
`443`	`443`	`bool AMDGPUPassConfig::addInstSelector() {`
`444`		`- addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));`
	`444`	`+ addPass(createAMDGPUISelDag(getAMDGPUTargetMachine(), getOptLevel()));`
`445`	`445`	`return false;`
`446`	`446`	`}`
`447`	`447`