Skip to content

Commit de423cf

Browse files
authored
[RISCV] Prefer vsetivli for VLMAX when VLEN is exactly known (#75509)
If VLEN is exactly known, we may be able to use the vsetivli encoding instead of the vsetvli a0, zero, <vtype> encoding. This slightly reduces register pressure. This builds on 632f1c5, but reverses course a bit. It turns out to be quite complicated to canonicalize from VLMAX to immediate early because the sentinel value is widely used in tablegen patterns without knowledge of LMUL. Instead, we canonicalize towards the VLMAX representation, and then pick the immediate form during insertion since we have the LMUL information there. Within InsertVSETVLI, this could reasonable fit in a couple places. If reviewers want me to e.g. move it to emission, let me know. Doing so may require a bit of extra code to e.g. handle comparisons of the two forms, but shouldn't be too complicated.
1 parent b1ae461 commit de423cf

File tree

4 files changed

+72
-33
lines changed

4 files changed

+72
-33
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2592,11 +2592,12 @@ static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
25922592

25932593
static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
25942594
SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2595-
// If we know the exact VLEN, our VL is exactly equal to VLMAX, and
2596-
// we can't encode the AVL as an immediate, use the VLMAX encoding.
2595+
// If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2596+
// canonicalize the representation. InsertVSETVLI will pick the immediate
2597+
// encoding later if profitable.
25972598
const auto [MinVLMAX, MaxVLMAX] =
25982599
RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2599-
if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX && NumElts > 31)
2600+
if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
26002601
return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
26012602

26022603
return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -804,7 +804,18 @@ static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
804804
return NewInfo;
805805
}
806806

807+
static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
808+
RISCVII::VLMUL VLMul) {
809+
auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
810+
if (Fractional)
811+
VLEN = VLEN / LMul;
812+
else
813+
VLEN = VLEN * LMul;
814+
return VLEN/SEW;
815+
}
816+
807817
static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
818+
const RISCVSubtarget &ST,
808819
const MachineRegisterInfo *MRI) {
809820
VSETVLIInfo InstrInfo;
810821

@@ -846,8 +857,15 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
846857
if (VLOp.isImm()) {
847858
int64_t Imm = VLOp.getImm();
848859
// Conver the VLMax sentintel to X0 register.
849-
if (Imm == RISCV::VLMaxSentinel)
850-
InstrInfo.setAVLReg(RISCV::X0);
860+
if (Imm == RISCV::VLMaxSentinel) {
861+
// If we know the exact VLEN, see if we can use the constant encoding
862+
// for the VLMAX instead. This reduces register pressure slightly.
863+
const unsigned VLMAX = computeVLMAX(ST.getRealMaxVLen(), SEW, VLMul);
864+
if (ST.getRealMinVLen() == ST.getRealMaxVLen() && VLMAX <= 31)
865+
InstrInfo.setAVLImm(VLMAX);
866+
else
867+
InstrInfo.setAVLReg(RISCV::X0);
868+
}
851869
else
852870
InstrInfo.setAVLImm(Imm);
853871
} else {
@@ -983,7 +1001,7 @@ static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
9831001
bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
9841002
const VSETVLIInfo &Require,
9851003
const VSETVLIInfo &CurInfo) const {
986-
assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI));
1004+
assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, MRI));
9871005

9881006
if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
9891007
return true;
@@ -1071,7 +1089,7 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
10711089
if (!RISCVII::hasSEWOp(TSFlags))
10721090
return;
10731091

1074-
const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
1092+
const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, *ST, MRI);
10751093
assert(NewInfo.isValid() && !NewInfo.isUnknown());
10761094
if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
10771095
return;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll

Lines changed: 39 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -652,25 +652,45 @@ define void @extract_v2i1_nxv2i1_0(<vscale x 2 x i1> %x, ptr %y) {
652652
}
653653

654654
define void @extract_v2i1_nxv2i1_2(<vscale x 2 x i1> %x, ptr %y) {
655-
; CHECK-LABEL: extract_v2i1_nxv2i1_2:
656-
; CHECK: # %bb.0:
657-
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
658-
; CHECK-NEXT: vmv.v.i v8, 0
659-
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
660-
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
661-
; CHECK-NEXT: vslidedown.vi v8, v8, 2
662-
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
663-
; CHECK-NEXT: vmsne.vi v0, v8, 0
664-
; CHECK-NEXT: vmv.v.i v8, 0
665-
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
666-
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
667-
; CHECK-NEXT: vmv.v.i v9, 0
668-
; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
669-
; CHECK-NEXT: vmv.v.v v9, v8
670-
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
671-
; CHECK-NEXT: vmsne.vi v8, v9, 0
672-
; CHECK-NEXT: vsm.v v8, (a0)
673-
; CHECK-NEXT: ret
655+
; CHECK-V-LABEL: extract_v2i1_nxv2i1_2:
656+
; CHECK-V: # %bb.0:
657+
; CHECK-V-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
658+
; CHECK-V-NEXT: vmv.v.i v8, 0
659+
; CHECK-V-NEXT: vmerge.vim v8, v8, 1, v0
660+
; CHECK-V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
661+
; CHECK-V-NEXT: vslidedown.vi v8, v8, 2
662+
; CHECK-V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
663+
; CHECK-V-NEXT: vmsne.vi v0, v8, 0
664+
; CHECK-V-NEXT: vmv.v.i v8, 0
665+
; CHECK-V-NEXT: vmerge.vim v8, v8, 1, v0
666+
; CHECK-V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
667+
; CHECK-V-NEXT: vmv.v.i v9, 0
668+
; CHECK-V-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
669+
; CHECK-V-NEXT: vmv.v.v v9, v8
670+
; CHECK-V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
671+
; CHECK-V-NEXT: vmsne.vi v8, v9, 0
672+
; CHECK-V-NEXT: vsm.v v8, (a0)
673+
; CHECK-V-NEXT: ret
674+
;
675+
; CHECK-KNOWNVLEN128-LABEL: extract_v2i1_nxv2i1_2:
676+
; CHECK-KNOWNVLEN128: # %bb.0:
677+
; CHECK-KNOWNVLEN128-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
678+
; CHECK-KNOWNVLEN128-NEXT: vmv.v.i v8, 0
679+
; CHECK-KNOWNVLEN128-NEXT: vmerge.vim v8, v8, 1, v0
680+
; CHECK-KNOWNVLEN128-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
681+
; CHECK-KNOWNVLEN128-NEXT: vslidedown.vi v8, v8, 2
682+
; CHECK-KNOWNVLEN128-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
683+
; CHECK-KNOWNVLEN128-NEXT: vmsne.vi v0, v8, 0
684+
; CHECK-KNOWNVLEN128-NEXT: vmv.v.i v8, 0
685+
; CHECK-KNOWNVLEN128-NEXT: vmerge.vim v8, v8, 1, v0
686+
; CHECK-KNOWNVLEN128-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
687+
; CHECK-KNOWNVLEN128-NEXT: vmv.v.i v9, 0
688+
; CHECK-KNOWNVLEN128-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
689+
; CHECK-KNOWNVLEN128-NEXT: vmv.v.v v9, v8
690+
; CHECK-KNOWNVLEN128-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
691+
; CHECK-KNOWNVLEN128-NEXT: vmsne.vi v8, v9, 0
692+
; CHECK-KNOWNVLEN128-NEXT: vsm.v v8, (a0)
693+
; CHECK-KNOWNVLEN128-NEXT: ret
674694
%c = call <2 x i1> @llvm.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %x, i64 2)
675695
store <2 x i1> %c, ptr %y
676696
ret void

llvm/test/CodeGen/RISCV/rvv/load-add-store.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ define void @exact_vlen_vadd_vint8m1(ptr %pc, ptr %pa, ptr %pb) nounwind vscale_
362362
; CHECK: # %bb.0:
363363
; CHECK-NEXT: vl1r.v v8, (a1)
364364
; CHECK-NEXT: vl1r.v v9, (a2)
365-
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
365+
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
366366
; CHECK-NEXT: vadd.vv v8, v8, v9
367367
; CHECK-NEXT: vs1r.v v8, (a0)
368368
; CHECK-NEXT: ret
@@ -392,7 +392,7 @@ define void @exact_vlen_vadd_vint8m2(ptr %pc, ptr %pa, ptr %pb) nounwind vscale_
392392
define void @exact_vlen_vadd_vint8mf2(ptr %pc, ptr %pa, ptr %pb) nounwind vscale_range(2,2) {
393393
; CHECK-LABEL: exact_vlen_vadd_vint8mf2:
394394
; CHECK: # %bb.0:
395-
; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
395+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
396396
; CHECK-NEXT: vle8.v v8, (a1)
397397
; CHECK-NEXT: vle8.v v9, (a2)
398398
; CHECK-NEXT: vadd.vv v8, v8, v9
@@ -408,7 +408,7 @@ define void @exact_vlen_vadd_vint8mf2(ptr %pc, ptr %pa, ptr %pb) nounwind vscale
408408
define void @exact_vlen_vadd_vint8mf4(ptr %pc, ptr %pa, ptr %pb) nounwind vscale_range(2,2) {
409409
; CHECK-LABEL: exact_vlen_vadd_vint8mf4:
410410
; CHECK: # %bb.0:
411-
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
411+
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
412412
; CHECK-NEXT: vle8.v v8, (a1)
413413
; CHECK-NEXT: vle8.v v9, (a2)
414414
; CHECK-NEXT: vadd.vv v8, v8, v9
@@ -424,7 +424,7 @@ define void @exact_vlen_vadd_vint8mf4(ptr %pc, ptr %pa, ptr %pb) nounwind vscale
424424
define void @exact_vlen_vadd_vint8mf8(ptr %pc, ptr %pa, ptr %pb) nounwind vscale_range(2,2) {
425425
; CHECK-LABEL: exact_vlen_vadd_vint8mf8:
426426
; CHECK: # %bb.0:
427-
; CHECK-NEXT: vsetvli a3, zero, e8, mf8, ta, ma
427+
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
428428
; CHECK-NEXT: vle8.v v8, (a1)
429429
; CHECK-NEXT: vle8.v v9, (a2)
430430
; CHECK-NEXT: vadd.vv v8, v8, v9
@@ -442,7 +442,7 @@ define void @exact_vlen_vadd_vint32m1(ptr %pc, ptr %pa, ptr %pb) nounwind vscale
442442
; CHECK: # %bb.0:
443443
; CHECK-NEXT: vl1re32.v v8, (a1)
444444
; CHECK-NEXT: vl1re32.v v9, (a2)
445-
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
445+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
446446
; CHECK-NEXT: vadd.vv v8, v8, v9
447447
; CHECK-NEXT: vs1r.v v8, (a0)
448448
; CHECK-NEXT: ret
@@ -458,7 +458,7 @@ define void @exact_vlen_vadd_vint32m2(ptr %pc, ptr %pa, ptr %pb) nounwind vscale
458458
; CHECK: # %bb.0:
459459
; CHECK-NEXT: vl2re32.v v8, (a1)
460460
; CHECK-NEXT: vl2re32.v v10, (a2)
461-
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
461+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
462462
; CHECK-NEXT: vadd.vv v8, v8, v10
463463
; CHECK-NEXT: vs2r.v v8, (a0)
464464
; CHECK-NEXT: ret
@@ -474,7 +474,7 @@ define void @exact_vlen_vadd_vint32m4(ptr %pc, ptr %pa, ptr %pb) nounwind vscale
474474
; CHECK: # %bb.0:
475475
; CHECK-NEXT: vl4re32.v v8, (a1)
476476
; CHECK-NEXT: vl4re32.v v12, (a2)
477-
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
477+
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
478478
; CHECK-NEXT: vadd.vv v8, v8, v12
479479
; CHECK-NEXT: vs4r.v v8, (a0)
480480
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)