llvm
diff --git a/‎llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Lines changed: 29 additions & 4 deletions b/‎llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Lines changed: 29 additions & 4 deletions
diff --git a/‎llvm/lib/Target/RISCV/RISCVISelLowering.h
Lines changed: 2 additions & 3 deletions b/‎llvm/lib/Target/RISCV/RISCVISelLowering.h
Lines changed: 2 additions & 3 deletions
diff --git a/‎llvm/lib/Target/RISCV/RISCVSubtarget.cpp
Lines changed: 25 additions & 0 deletions b/‎llvm/lib/Target/RISCV/RISCVSubtarget.cpp
Lines changed: 25 additions & 0 deletions
diff --git a/‎llvm/lib/Target/RISCV/RISCVSubtarget.h
Lines changed: 6 additions & 0 deletions b/‎llvm/lib/Target/RISCV/RISCVSubtarget.h
Lines changed: 6 additions & 0 deletions
diff --git a/‎llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
Lines changed: 42 additions & 108 deletions b/‎llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
Lines changed: 42 additions & 108 deletions
@@ -125,12 +125,37 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
     CurDAG->RemoveDeadNodes();
 }
 
-static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm,
-                         const RISCVSubtarget &Subtarget) {
+static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL,
+                                         const MVT VT, int64_t Imm,
+                                         const RISCVSubtarget &Subtarget) {
+  assert(VT == MVT::i64 && "Expecting MVT::i64");
+  const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
+  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(CurDAG->getConstantPool(
+      ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT));
+  SDValue Addr = TLI->getAddr(CP, *CurDAG);
+  SDValue Offset = CurDAG->getTargetConstant(0, DL, VT);
+  // Since there is no data race, the chain can be the entry node.
+  SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset,
+                                        CurDAG->getEntryNode());
+  MachineFunction &MF = CurDAG->getMachineFunction();
+  MachineMemOperand *MemOp = MF.getMachineMemOperand(
+      MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
+      LLT(VT), CP->getAlign());
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Load), {MemOp});
+  return Load;
+}
+
+static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
+                         int64_t Imm, const RISCVSubtarget &Subtarget) {
   MVT XLenVT = Subtarget.getXLenVT();
   RISCVMatInt::InstSeq Seq =
       RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
 
+  // If Imm is expensive to build, then we put it into constant pool.
+  if (Subtarget.useConstantPoolForLargeInts() &&
+      Seq.size() > Subtarget.getMaxBuildIntsCost())
+    return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget);
+
   SDNode *Result = nullptr;
   SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT);
   for (RISCVMatInt::Inst &Inst : Seq) {
@@ -498,7 +523,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
     if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
       Imm = SignExtend64(Imm, 32);
 
-    ReplaceNode(Node, selectImm(CurDAG, DL, Imm, *Subtarget));
+    ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget));
     return;
   }
   case ISD::FrameIndex: {
@@ -774,7 +799,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
       ShiftedC1 = SignExtend64(ShiftedC1, 32);
 
     // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
-    SDNode *Imm = selectImm(CurDAG, DL, ShiftedC1, *Subtarget);
+    SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget);
     SDNode *SLLI =
         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
                                CurDAG->getTargetConstant(LeadingZeros, DL, VT));
 
@@ -462,6 +462,8 @@ class RISCVTargetLowering : public TargetLowering {
                       SelectionDAG &DAG) const override;
   SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
                     SmallVectorImpl<SDValue> &InVals) const override;
+  template <class NodeTy>
+  SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const;
 
   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                          Type *Ty) const override {
@@ -544,9 +546,6 @@ class RISCVTargetLowering : public TargetLowering {
                          bool IsRet, CallLoweringInfo *CLI,
                          RISCVCCAssignFn Fn) const;
 
-  template <class NodeTy>
-  SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const;
-
   SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
                            bool UseGOT) const;
   SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
 
@@ -50,6 +50,16 @@ static cl::opt<unsigned> RVVVectorELENMax(
     cl::desc("The maximum ELEN value to use for fixed length vectors."),
     cl::init(64), cl::Hidden);
 
+static cl::opt<bool> RISCVDisableUsingConstantPoolForLargeInts(
+    "riscv-disable-using-constant-pool-for-large-ints",
+    cl::desc("Disable using constant pool for large integers."),
+    cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> RISCVMaxBuildIntsCost(
+    "riscv-max-build-ints-cost",
+    cl::desc("The maximum cost used for building integers."), cl::init(0),
+    cl::Hidden);
+
 void RISCVSubtarget::anchor() {}
 
 RISCVSubtarget &
@@ -110,6 +120,21 @@ const RegisterBankInfo *RISCVSubtarget::getRegBankInfo() const {
   return RegBankInfo.get();
 }
 
+bool RISCVSubtarget::useConstantPoolForLargeInts() const {
+  return !RISCVDisableUsingConstantPoolForLargeInts;
+}
+
+unsigned RISCVSubtarget::getMaxBuildIntsCost() const {
+  // Loading integer from constant pool needs two instructions (the reason why
+  // the minimum cost is 2): an address calculation instruction and a load
+  // instruction. Usually, address calculation and instructions used for
+  // building integers (addi, slli, etc.) can be done in one cycle, so here we
+  // set the default cost to (LoadLatency + 1) if no threshold is provided.
+  return RISCVMaxBuildIntsCost == 0
+             ? getSchedModel().LoadLatency + 1
+             : std::max<unsigned>(2, RISCVMaxBuildIntsCost);
+}
+
 unsigned RISCVSubtarget::getMaxRVVVectorSizeInBits() const {
   assert(hasVInstructions() &&
          "Tried to get vector length without Zve or V extension support!");
 
@@ -157,6 +157,12 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   const LegalizerInfo *getLegalizerInfo() const override;
   const RegisterBankInfo *getRegBankInfo() const override;
 
+  bool useConstantPoolForLargeInts() const;
+
+  // Maximum cost used for building integers, integers will be put into constant
+  // pool if exceeded.
+  unsigned getMaxBuildIntsCost() const;
+
   // Return the known range for the bit length of RVV data registers. A value
   // of 0 means nothing is known about that particular limit beyond what's
   // implied by the architecture.
 
@@ -513,46 +513,24 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ; RV64I-NEXT:    addi a1, a0, -1
 ; RV64I-NEXT:    not a0, a0
 ; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    srli a1, a0, 1
-; RV64I-NEXT:    lui a2, 21845
-; RV64I-NEXT:    addiw a2, a2, 1365
-; RV64I-NEXT:    slli a2, a2, 12
-; RV64I-NEXT:    addi a2, a2, 1365
-; RV64I-NEXT:    slli a2, a2, 12
-; RV64I-NEXT:    addi a2, a2, 1365
-; RV64I-NEXT:    slli a2, a2, 12
-; RV64I-NEXT:    addi a2, a2, 1365
-; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    lui a1, %hi(.LCPI7_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI7_0)(a1)
+; RV64I-NEXT:    lui a2, %hi(.LCPI7_1)
+; RV64I-NEXT:    ld a2, %lo(.LCPI7_1)(a2)
+; RV64I-NEXT:    srli a3, a0, 1
+; RV64I-NEXT:    and a1, a3, a1
 ; RV64I-NEXT:    sub a0, a0, a1
-; RV64I-NEXT:    lui a1, 13107
-; RV64I-NEXT:    addiw a1, a1, 819
-; RV64I-NEXT:    slli a1, a1, 12
-; RV64I-NEXT:    addi a1, a1, 819
-; RV64I-NEXT:    slli a1, a1, 12
-; RV64I-NEXT:    addi a1, a1, 819
-; RV64I-NEXT:    slli a1, a1, 12
-; RV64I-NEXT:    addi a1, a1, 819
-; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    and a1, a0, a2
 ; RV64I-NEXT:    srli a0, a0, 2
-; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    lui a2, %hi(.LCPI7_2)
+; RV64I-NEXT:    ld a2, %lo(.LCPI7_2)(a2)
+; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 4
 ; RV64I-NEXT:    add a0, a0, a1
-; RV64I-NEXT:    lui a1, 3855
-; RV64I-NEXT:    addiw a1, a1, 241
-; RV64I-NEXT:    slli a1, a1, 12
-; RV64I-NEXT:    addi a1, a1, -241
-; RV64I-NEXT:    slli a1, a1, 12
-; RV64I-NEXT:    addi a1, a1, 241
-; RV64I-NEXT:    slli a1, a1, 12
-; RV64I-NEXT:    addi a1, a1, -241
-; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    lui a1, 4112
-; RV64I-NEXT:    addiw a1, a1, 257
-; RV64I-NEXT:    slli a1, a1, 16
-; RV64I-NEXT:    addi a1, a1, 257
-; RV64I-NEXT:    slli a1, a1, 16
-; RV64I-NEXT:    addi a1, a1, 257
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    lui a1, %hi(.LCPI7_3)
+; RV64I-NEXT:    ld a1, %lo(.LCPI7_3)(a1)
 ; RV64I-NEXT:    call __muldi3@plt
 ; RV64I-NEXT:    srli a0, a0, 56
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -811,46 +789,24 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
 ; RV64I-NEXT:    addi a1, a0, -1
 ; RV64I-NEXT:    not a0, a0
 ; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    srli a1, a0, 1
-; RV64I-NEXT:    lui a2, 21845
-; RV64I-NEXT:    addiw a2, a2, 1365
-; RV64I-NEXT:    slli a2, a2, 12
-; RV64I-NEXT:    addi a2, a2, 1365
-; RV64I-NEXT:    slli a2, a2, 12
-; RV64I-NEXT:    addi a2, a2, 1365
-; RV64I-NEXT:    slli a2, a2, 12
-; RV64I-NEXT:    addi a2, a2, 1365
-; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    lui a1, %hi(.LCPI11_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI11_0)(a1)
+; RV64I-NEXT:    lui a2, %hi(.LCPI11_1)
+; RV64I-NEXT:    ld a2, %lo(.LCPI11_1)(a2)
+; RV64I-NEXT:    srli a3, a0, 1
+; RV64I-NEXT:    and a1, a3, a1
 ; RV64I-NEXT:    sub a0, a0, a1
-; RV64I-NEXT:    lui a1, 13107
-; RV64I-NEXT:    addiw a1, a1, 819
-; RV64I-NEXT:    slli a1, a1, 12
-; RV64I-NEXT:    addi a1, a1, 819
-; RV64I-NEXT:    slli a1, a1, 12
-; RV64I-NEXT:    addi a1, a1, 819
-; RV64I-NEXT:    slli a1, a1, 12
-; RV64I-NEXT:    addi a1, a1, 819
-; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    and a1, a0, a2
 ; RV64I-NEXT:    srli a0, a0, 2
-; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    lui a2, %hi(.LCPI11_2)
+; RV64I-NEXT:    ld a2, %lo(.LCPI11_2)(a2)
+; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 4
 ; RV64I-NEXT:    add a0, a0, a1
-; RV64I-NEXT:    lui a1, 3855
-; RV64I-NEXT:    addiw a1, a1, 241
-; RV64I-NEXT:    slli a1, a1, 12
-; RV64I-NEXT:    addi a1, a1, -241
-; RV64I-NEXT:    slli a1, a1, 12
-; RV64I-NEXT:    addi a1, a1, 241
-; RV64I-NEXT:    slli a1, a1, 12
-; RV64I-NEXT:    addi a1, a1, -241
-; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    lui a1, 4112
-; RV64I-NEXT:    addiw a1, a1, 257
-; RV64I-NEXT:    slli a1, a1, 16
-; RV64I-NEXT:    addi a1, a1, 257
-; RV64I-NEXT:    slli a1, a1, 16
-; RV64I-NEXT:    addi a1, a1, 257
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    lui a1, %hi(.LCPI11_3)
+; RV64I-NEXT:    ld a1, %lo(.LCPI11_3)(a1)
 ; RV64I-NEXT:    call __muldi3@plt
 ; RV64I-NEXT:    srli a0, a0, 56
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -982,46 +938,24 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
 ; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    srli a1, a0, 1
-; RV64I-NEXT:    lui a2, 21845
-; RV64I-NEXT:    addiw a2, a2, 1365
-; RV64I-NEXT:    slli a2, a2, 12
-; RV64I-NEXT:    addi a2, a2, 1365
-; RV64I-NEXT:    slli a2, a2, 12
-; RV64I-NEXT:    addi a2, a2, 1365
-; RV64I-NEXT:    slli a2, a2, 12
-; RV64I-NEXT:    addi a2, a2, 1365
-; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    lui a1, %hi(.LCPI13_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI13_0)(a1)
+; RV64I-NEXT:    lui a2, %hi(.LCPI13_1)
+; RV64I-NEXT:    ld a2, %lo(.LCPI13_1)(a2)
+; RV64I-NEXT:    srli a3, a0, 1
+; RV64I-NEXT:    and a1, a3, a1
 ; RV64I-NEXT:    sub a0, a0, a1
-; RV64I-NEXT:    lui a1, 13107
-; RV64I-NEXT:    addiw a1, a1, 819
-; RV64I-NEXT:    slli a1, a1, 12
-; RV64I-NEXT:    addi a1, a1, 819
-; RV64I-NEXT:    slli a1, a1, 12
-; RV64I-NEXT:    addi a1, a1, 819
-; RV64I-NEXT:    slli a1, a1, 12
-; RV64I-NEXT:    addi a1, a1, 819
-; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    and a1, a0, a2
 ; RV64I-NEXT:    srli a0, a0, 2
-; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    lui a2, %hi(.LCPI13_2)
+; RV64I-NEXT:    ld a2, %lo(.LCPI13_2)(a2)
+; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 4
 ; RV64I-NEXT:    add a0, a0, a1
-; RV64I-NEXT:    lui a1, 3855
-; RV64I-NEXT:    addiw a1, a1, 241
-; RV64I-NEXT:    slli a1, a1, 12
-; RV64I-NEXT:    addi a1, a1, -241
-; RV64I-NEXT:    slli a1, a1, 12
-; RV64I-NEXT:    addi a1, a1, 241
-; RV64I-NEXT:    slli a1, a1, 12
-; RV64I-NEXT:    addi a1, a1, -241
-; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    lui a1, 4112
-; RV64I-NEXT:    addiw a1, a1, 257
-; RV64I-NEXT:    slli a1, a1, 16
-; RV64I-NEXT:    addi a1, a1, 257
-; RV64I-NEXT:    slli a1, a1, 16
-; RV64I-NEXT:    addi a1, a1, 257
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    lui a1, %hi(.LCPI13_3)
+; RV64I-NEXT:    ld a1, %lo(.LCPI13_3)(a1)
 ; RV64I-NEXT:    call __muldi3@plt
 ; RV64I-NEXT:    srli a0, a0, 56
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload