Skip to content

Commit 41454ab

Browse files
committed
[RISCV] Use constant pool for large integers
For large integers (for example, magic numbers generated by TargetLowering::BuildSDIV when dividing by constant), we may need about 4~8 instructions to build them. In the same time, it just takes two instructions to load constants (with extra cycles to access memory), so it may be profitable to put these integers into constant pool. Reviewed By: asb, craig.topper Differential Revision: https://reviews.llvm.org/D114950
1 parent 05f82dc commit 41454ab

31 files changed

+1662
-3713
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -125,12 +125,37 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
125125
CurDAG->RemoveDeadNodes();
126126
}
127127

128-
static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm,
129-
const RISCVSubtarget &Subtarget) {
128+
static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL,
129+
const MVT VT, int64_t Imm,
130+
const RISCVSubtarget &Subtarget) {
131+
assert(VT == MVT::i64 && "Expecting MVT::i64");
132+
const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
133+
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(CurDAG->getConstantPool(
134+
ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT));
135+
SDValue Addr = TLI->getAddr(CP, *CurDAG);
136+
SDValue Offset = CurDAG->getTargetConstant(0, DL, VT);
137+
// Since there is no data race, the chain can be the entry node.
138+
SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset,
139+
CurDAG->getEntryNode());
140+
MachineFunction &MF = CurDAG->getMachineFunction();
141+
MachineMemOperand *MemOp = MF.getMachineMemOperand(
142+
MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
143+
LLT(VT), CP->getAlign());
144+
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Load), {MemOp});
145+
return Load;
146+
}
147+
148+
static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
149+
int64_t Imm, const RISCVSubtarget &Subtarget) {
130150
MVT XLenVT = Subtarget.getXLenVT();
131151
RISCVMatInt::InstSeq Seq =
132152
RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
133153

154+
// If Imm is expensive to build, then we put it into constant pool.
155+
if (Subtarget.useConstantPoolForLargeInts() &&
156+
Seq.size() > Subtarget.getMaxBuildIntsCost())
157+
return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget);
158+
134159
SDNode *Result = nullptr;
135160
SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT);
136161
for (RISCVMatInt::Inst &Inst : Seq) {
@@ -498,7 +523,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
498523
if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
499524
Imm = SignExtend64(Imm, 32);
500525

501-
ReplaceNode(Node, selectImm(CurDAG, DL, Imm, *Subtarget));
526+
ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget));
502527
return;
503528
}
504529
case ISD::FrameIndex: {
@@ -774,7 +799,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
774799
ShiftedC1 = SignExtend64(ShiftedC1, 32);
775800

776801
// Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
777-
SDNode *Imm = selectImm(CurDAG, DL, ShiftedC1, *Subtarget);
802+
SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget);
778803
SDNode *SLLI =
779804
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
780805
CurDAG->getTargetConstant(LeadingZeros, DL, VT));

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,8 @@ class RISCVTargetLowering : public TargetLowering {
462462
SelectionDAG &DAG) const override;
463463
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
464464
SmallVectorImpl<SDValue> &InVals) const override;
465+
template <class NodeTy>
466+
SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const;
465467

466468
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
467469
Type *Ty) const override {
@@ -544,9 +546,6 @@ class RISCVTargetLowering : public TargetLowering {
544546
bool IsRet, CallLoweringInfo *CLI,
545547
RISCVCCAssignFn Fn) const;
546548

547-
template <class NodeTy>
548-
SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const;
549-
550549
SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
551550
bool UseGOT) const;
552551
SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;

llvm/lib/Target/RISCV/RISCVSubtarget.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,16 @@ static cl::opt<unsigned> RVVVectorELENMax(
5050
cl::desc("The maximum ELEN value to use for fixed length vectors."),
5151
cl::init(64), cl::Hidden);
5252

53+
static cl::opt<bool> RISCVDisableUsingConstantPoolForLargeInts(
54+
"riscv-disable-using-constant-pool-for-large-ints",
55+
cl::desc("Disable using constant pool for large integers."),
56+
cl::init(false), cl::Hidden);
57+
58+
static cl::opt<unsigned> RISCVMaxBuildIntsCost(
59+
"riscv-max-build-ints-cost",
60+
cl::desc("The maximum cost used for building integers."), cl::init(0),
61+
cl::Hidden);
62+
5363
void RISCVSubtarget::anchor() {}
5464

5565
RISCVSubtarget &
@@ -110,6 +120,21 @@ const RegisterBankInfo *RISCVSubtarget::getRegBankInfo() const {
110120
return RegBankInfo.get();
111121
}
112122

123+
bool RISCVSubtarget::useConstantPoolForLargeInts() const {
124+
return !RISCVDisableUsingConstantPoolForLargeInts;
125+
}
126+
127+
unsigned RISCVSubtarget::getMaxBuildIntsCost() const {
128+
// Loading integer from constant pool needs two instructions (the reason why
129+
// the minimum cost is 2): an address calculation instruction and a load
130+
// instruction. Usually, address calculation and instructions used for
131+
// building integers (addi, slli, etc.) can be done in one cycle, so here we
132+
// set the default cost to (LoadLatency + 1) if no threshold is provided.
133+
return RISCVMaxBuildIntsCost == 0
134+
? getSchedModel().LoadLatency + 1
135+
: std::max<unsigned>(2, RISCVMaxBuildIntsCost);
136+
}
137+
113138
unsigned RISCVSubtarget::getMaxRVVVectorSizeInBits() const {
114139
assert(hasVInstructions() &&
115140
"Tried to get vector length without Zve or V extension support!");

llvm/lib/Target/RISCV/RISCVSubtarget.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,12 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
157157
const LegalizerInfo *getLegalizerInfo() const override;
158158
const RegisterBankInfo *getRegBankInfo() const override;
159159

160+
bool useConstantPoolForLargeInts() const;
161+
162+
// Maximum cost used for building integers, integers will be put into constant
163+
// pool if exceeded.
164+
unsigned getMaxBuildIntsCost() const;
165+
160166
// Return the known range for the bit length of RVV data registers. A value
161167
// of 0 means nothing is known about that particular limit beyond what's
162168
// implied by the architecture.

llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll

Lines changed: 42 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -513,46 +513,24 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
513513
; RV64I-NEXT: addi a1, a0, -1
514514
; RV64I-NEXT: not a0, a0
515515
; RV64I-NEXT: and a0, a0, a1
516-
; RV64I-NEXT: srli a1, a0, 1
517-
; RV64I-NEXT: lui a2, 21845
518-
; RV64I-NEXT: addiw a2, a2, 1365
519-
; RV64I-NEXT: slli a2, a2, 12
520-
; RV64I-NEXT: addi a2, a2, 1365
521-
; RV64I-NEXT: slli a2, a2, 12
522-
; RV64I-NEXT: addi a2, a2, 1365
523-
; RV64I-NEXT: slli a2, a2, 12
524-
; RV64I-NEXT: addi a2, a2, 1365
525-
; RV64I-NEXT: and a1, a1, a2
516+
; RV64I-NEXT: lui a1, %hi(.LCPI7_0)
517+
; RV64I-NEXT: ld a1, %lo(.LCPI7_0)(a1)
518+
; RV64I-NEXT: lui a2, %hi(.LCPI7_1)
519+
; RV64I-NEXT: ld a2, %lo(.LCPI7_1)(a2)
520+
; RV64I-NEXT: srli a3, a0, 1
521+
; RV64I-NEXT: and a1, a3, a1
526522
; RV64I-NEXT: sub a0, a0, a1
527-
; RV64I-NEXT: lui a1, 13107
528-
; RV64I-NEXT: addiw a1, a1, 819
529-
; RV64I-NEXT: slli a1, a1, 12
530-
; RV64I-NEXT: addi a1, a1, 819
531-
; RV64I-NEXT: slli a1, a1, 12
532-
; RV64I-NEXT: addi a1, a1, 819
533-
; RV64I-NEXT: slli a1, a1, 12
534-
; RV64I-NEXT: addi a1, a1, 819
535-
; RV64I-NEXT: and a2, a0, a1
523+
; RV64I-NEXT: and a1, a0, a2
536524
; RV64I-NEXT: srli a0, a0, 2
537-
; RV64I-NEXT: and a0, a0, a1
538-
; RV64I-NEXT: add a0, a2, a0
525+
; RV64I-NEXT: and a0, a0, a2
526+
; RV64I-NEXT: lui a2, %hi(.LCPI7_2)
527+
; RV64I-NEXT: ld a2, %lo(.LCPI7_2)(a2)
528+
; RV64I-NEXT: add a0, a1, a0
539529
; RV64I-NEXT: srli a1, a0, 4
540530
; RV64I-NEXT: add a0, a0, a1
541-
; RV64I-NEXT: lui a1, 3855
542-
; RV64I-NEXT: addiw a1, a1, 241
543-
; RV64I-NEXT: slli a1, a1, 12
544-
; RV64I-NEXT: addi a1, a1, -241
545-
; RV64I-NEXT: slli a1, a1, 12
546-
; RV64I-NEXT: addi a1, a1, 241
547-
; RV64I-NEXT: slli a1, a1, 12
548-
; RV64I-NEXT: addi a1, a1, -241
549-
; RV64I-NEXT: and a0, a0, a1
550-
; RV64I-NEXT: lui a1, 4112
551-
; RV64I-NEXT: addiw a1, a1, 257
552-
; RV64I-NEXT: slli a1, a1, 16
553-
; RV64I-NEXT: addi a1, a1, 257
554-
; RV64I-NEXT: slli a1, a1, 16
555-
; RV64I-NEXT: addi a1, a1, 257
531+
; RV64I-NEXT: and a0, a0, a2
532+
; RV64I-NEXT: lui a1, %hi(.LCPI7_3)
533+
; RV64I-NEXT: ld a1, %lo(.LCPI7_3)(a1)
556534
; RV64I-NEXT: call __muldi3@plt
557535
; RV64I-NEXT: srli a0, a0, 56
558536
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -811,46 +789,24 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
811789
; RV64I-NEXT: addi a1, a0, -1
812790
; RV64I-NEXT: not a0, a0
813791
; RV64I-NEXT: and a0, a0, a1
814-
; RV64I-NEXT: srli a1, a0, 1
815-
; RV64I-NEXT: lui a2, 21845
816-
; RV64I-NEXT: addiw a2, a2, 1365
817-
; RV64I-NEXT: slli a2, a2, 12
818-
; RV64I-NEXT: addi a2, a2, 1365
819-
; RV64I-NEXT: slli a2, a2, 12
820-
; RV64I-NEXT: addi a2, a2, 1365
821-
; RV64I-NEXT: slli a2, a2, 12
822-
; RV64I-NEXT: addi a2, a2, 1365
823-
; RV64I-NEXT: and a1, a1, a2
792+
; RV64I-NEXT: lui a1, %hi(.LCPI11_0)
793+
; RV64I-NEXT: ld a1, %lo(.LCPI11_0)(a1)
794+
; RV64I-NEXT: lui a2, %hi(.LCPI11_1)
795+
; RV64I-NEXT: ld a2, %lo(.LCPI11_1)(a2)
796+
; RV64I-NEXT: srli a3, a0, 1
797+
; RV64I-NEXT: and a1, a3, a1
824798
; RV64I-NEXT: sub a0, a0, a1
825-
; RV64I-NEXT: lui a1, 13107
826-
; RV64I-NEXT: addiw a1, a1, 819
827-
; RV64I-NEXT: slli a1, a1, 12
828-
; RV64I-NEXT: addi a1, a1, 819
829-
; RV64I-NEXT: slli a1, a1, 12
830-
; RV64I-NEXT: addi a1, a1, 819
831-
; RV64I-NEXT: slli a1, a1, 12
832-
; RV64I-NEXT: addi a1, a1, 819
833-
; RV64I-NEXT: and a2, a0, a1
799+
; RV64I-NEXT: and a1, a0, a2
834800
; RV64I-NEXT: srli a0, a0, 2
835-
; RV64I-NEXT: and a0, a0, a1
836-
; RV64I-NEXT: add a0, a2, a0
801+
; RV64I-NEXT: and a0, a0, a2
802+
; RV64I-NEXT: lui a2, %hi(.LCPI11_2)
803+
; RV64I-NEXT: ld a2, %lo(.LCPI11_2)(a2)
804+
; RV64I-NEXT: add a0, a1, a0
837805
; RV64I-NEXT: srli a1, a0, 4
838806
; RV64I-NEXT: add a0, a0, a1
839-
; RV64I-NEXT: lui a1, 3855
840-
; RV64I-NEXT: addiw a1, a1, 241
841-
; RV64I-NEXT: slli a1, a1, 12
842-
; RV64I-NEXT: addi a1, a1, -241
843-
; RV64I-NEXT: slli a1, a1, 12
844-
; RV64I-NEXT: addi a1, a1, 241
845-
; RV64I-NEXT: slli a1, a1, 12
846-
; RV64I-NEXT: addi a1, a1, -241
847-
; RV64I-NEXT: and a0, a0, a1
848-
; RV64I-NEXT: lui a1, 4112
849-
; RV64I-NEXT: addiw a1, a1, 257
850-
; RV64I-NEXT: slli a1, a1, 16
851-
; RV64I-NEXT: addi a1, a1, 257
852-
; RV64I-NEXT: slli a1, a1, 16
853-
; RV64I-NEXT: addi a1, a1, 257
807+
; RV64I-NEXT: and a0, a0, a2
808+
; RV64I-NEXT: lui a1, %hi(.LCPI11_3)
809+
; RV64I-NEXT: ld a1, %lo(.LCPI11_3)(a1)
854810
; RV64I-NEXT: call __muldi3@plt
855811
; RV64I-NEXT: srli a0, a0, 56
856812
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -982,46 +938,24 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
982938
; RV64I: # %bb.0:
983939
; RV64I-NEXT: addi sp, sp, -16
984940
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
985-
; RV64I-NEXT: srli a1, a0, 1
986-
; RV64I-NEXT: lui a2, 21845
987-
; RV64I-NEXT: addiw a2, a2, 1365
988-
; RV64I-NEXT: slli a2, a2, 12
989-
; RV64I-NEXT: addi a2, a2, 1365
990-
; RV64I-NEXT: slli a2, a2, 12
991-
; RV64I-NEXT: addi a2, a2, 1365
992-
; RV64I-NEXT: slli a2, a2, 12
993-
; RV64I-NEXT: addi a2, a2, 1365
994-
; RV64I-NEXT: and a1, a1, a2
941+
; RV64I-NEXT: lui a1, %hi(.LCPI13_0)
942+
; RV64I-NEXT: ld a1, %lo(.LCPI13_0)(a1)
943+
; RV64I-NEXT: lui a2, %hi(.LCPI13_1)
944+
; RV64I-NEXT: ld a2, %lo(.LCPI13_1)(a2)
945+
; RV64I-NEXT: srli a3, a0, 1
946+
; RV64I-NEXT: and a1, a3, a1
995947
; RV64I-NEXT: sub a0, a0, a1
996-
; RV64I-NEXT: lui a1, 13107
997-
; RV64I-NEXT: addiw a1, a1, 819
998-
; RV64I-NEXT: slli a1, a1, 12
999-
; RV64I-NEXT: addi a1, a1, 819
1000-
; RV64I-NEXT: slli a1, a1, 12
1001-
; RV64I-NEXT: addi a1, a1, 819
1002-
; RV64I-NEXT: slli a1, a1, 12
1003-
; RV64I-NEXT: addi a1, a1, 819
1004-
; RV64I-NEXT: and a2, a0, a1
948+
; RV64I-NEXT: and a1, a0, a2
1005949
; RV64I-NEXT: srli a0, a0, 2
1006-
; RV64I-NEXT: and a0, a0, a1
1007-
; RV64I-NEXT: add a0, a2, a0
950+
; RV64I-NEXT: and a0, a0, a2
951+
; RV64I-NEXT: lui a2, %hi(.LCPI13_2)
952+
; RV64I-NEXT: ld a2, %lo(.LCPI13_2)(a2)
953+
; RV64I-NEXT: add a0, a1, a0
1008954
; RV64I-NEXT: srli a1, a0, 4
1009955
; RV64I-NEXT: add a0, a0, a1
1010-
; RV64I-NEXT: lui a1, 3855
1011-
; RV64I-NEXT: addiw a1, a1, 241
1012-
; RV64I-NEXT: slli a1, a1, 12
1013-
; RV64I-NEXT: addi a1, a1, -241
1014-
; RV64I-NEXT: slli a1, a1, 12
1015-
; RV64I-NEXT: addi a1, a1, 241
1016-
; RV64I-NEXT: slli a1, a1, 12
1017-
; RV64I-NEXT: addi a1, a1, -241
1018-
; RV64I-NEXT: and a0, a0, a1
1019-
; RV64I-NEXT: lui a1, 4112
1020-
; RV64I-NEXT: addiw a1, a1, 257
1021-
; RV64I-NEXT: slli a1, a1, 16
1022-
; RV64I-NEXT: addi a1, a1, 257
1023-
; RV64I-NEXT: slli a1, a1, 16
1024-
; RV64I-NEXT: addi a1, a1, 257
956+
; RV64I-NEXT: and a0, a0, a2
957+
; RV64I-NEXT: lui a1, %hi(.LCPI13_3)
958+
; RV64I-NEXT: ld a1, %lo(.LCPI13_3)(a1)
1025959
; RV64I-NEXT: call __muldi3@plt
1026960
; RV64I-NEXT: srli a0, a0, 56
1027961
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload

0 commit comments

Comments
 (0)