Skip to content

Commit 65dc53b

Browse files
authored
[RISCV] Rematerialize vmv.v.i (#107550)
This continues the line of work started in #97520, and gives a 2.5% reduction in the number of spills on SPEC CPU 2017. Program regalloc.NumSpills lhs rhs diff 605.mcf_s 141.00 141.00 0.0% 505.mcf_r 141.00 141.00 0.0% 519.lbm_r 73.00 73.00 0.0% 619.lbm_s 68.00 68.00 0.0% 631.deepsjeng_s 354.00 353.00 -0.3% 531.deepsjeng_r 354.00 353.00 -0.3% 625.x264_s 1896.00 1886.00 -0.5% 525.x264_r 1896.00 1886.00 -0.5% 508.namd_r 6665.00 6598.00 -1.0% 644.nab_s 761.00 753.00 -1.1% 544.nab_r 761.00 753.00 -1.1% 638.imagick_s 4287.00 4181.00 -2.5% 538.imagick_r 4287.00 4181.00 -2.5% 602.gcc_s 12771.00 12450.00 -2.5% 502.gcc_r 12771.00 12450.00 -2.5% 510.parest_r 43876.00 42740.00 -2.6% 500.perlbench_r 4297.00 4179.00 -2.7% 600.perlbench_s 4297.00 4179.00 -2.7% 526.blender_r 13503.00 13103.00 -3.0% 511.povray_r 2006.00 1937.00 -3.4% 620.omnetpp_s 984.00 946.00 -3.9% 520.omnetpp_r 984.00 946.00 -3.9% 657.xz_s 302.00 289.00 -4.3% 557.xz_r 302.00 289.00 -4.3% 541.leela_r 378.00 356.00 -5.8% 641.leela_s 378.00 356.00 -5.8% 623.xalancbmk_s 1646.00 1548.00 -6.0% 523.xalancbmk_r 1646.00 1548.00 -6.0% Geomean difference -2.5% I initially held off submitting this patch because it surprisingly introduced a lot of spills in the test diffs, but after #107290 the vmv.v.is that caused them are now gone. The gist is that marking vmv.v.i as spillable decreased its spill weight, which actually resulted in more m8 registers getting evicted and spilled during register allocation. The SPEC results show this isn't an issue in practice though, and I plan on posting a separate patch to explain this in more detail.
1 parent b5ee463 commit 65dc53b

File tree

4 files changed

+77
-7
lines changed

4 files changed

+77
-7
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -168,13 +168,19 @@ Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
168168

169169
bool RISCVInstrInfo::isReallyTriviallyReMaterializable(
170170
const MachineInstr &MI) const {
171-
if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VID_V &&
172-
MI.getOperand(1).isUndef() &&
173-
/* After RISCVInsertVSETVLI most pseudos will have implicit uses on vl and
174-
vtype. Make sure we only rematerialize before RISCVInsertVSETVLI
175-
i.e. -riscv-vsetvl-after-rvv-regalloc=true */
176-
!MI.hasRegisterImplicitUseOperand(RISCV::VTYPE))
177-
return true;
171+
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
172+
case RISCV::VMV_V_I:
173+
case RISCV::VID_V:
174+
if (MI.getOperand(1).isUndef() &&
175+
/* After RISCVInsertVSETVLI most pseudos will have implicit uses on vl
176+
and vtype. Make sure we only rematerialize before RISCVInsertVSETVLI
177+
i.e. -riscv-vsetvl-after-rvv-regalloc=true */
178+
!MI.hasRegisterImplicitUseOperand(RISCV::VTYPE))
179+
return true;
180+
break;
181+
default:
182+
break;
183+
}
178184
return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
179185
}
180186

llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2478,6 +2478,7 @@ multiclass VPseudoUnaryVMV_V_X_I {
24782478
def "_X_" # mx : VPseudoUnaryNoMask<m.vrclass, GPR>,
24792479
SchedUnary<"WriteVIMovX", "ReadVIMovX", mx,
24802480
forcePassthruRead=true>;
2481+
let isReMaterializable = 1 in
24812482
def "_I_" # mx : VPseudoUnaryNoMask<m.vrclass, simm5>,
24822483
SchedNullary<"WriteVIMovI", mx,
24832484
forcePassthruRead=true>;

llvm/test/CodeGen/RISCV/rvv/remat.ll

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,65 @@ define void @vid_passthru(ptr %p, <vscale x 8 x i64> %v) {
109109
store volatile <vscale x 8 x i64> %vid, ptr %p
110110
ret void
111111
}
112+
113+
define void @vmv.v.i(ptr %p) {
114+
; POSTRA-LABEL: vmv.v.i:
115+
; POSTRA: # %bb.0:
116+
; POSTRA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
117+
; POSTRA-NEXT: vmv.v.i v8, 1
118+
; POSTRA-NEXT: vs8r.v v8, (a0)
119+
; POSTRA-NEXT: vl8re64.v v16, (a0)
120+
; POSTRA-NEXT: vl8re64.v v24, (a0)
121+
; POSTRA-NEXT: vl8re64.v v0, (a0)
122+
; POSTRA-NEXT: vl8re64.v v8, (a0)
123+
; POSTRA-NEXT: vs8r.v v8, (a0)
124+
; POSTRA-NEXT: vs8r.v v0, (a0)
125+
; POSTRA-NEXT: vs8r.v v24, (a0)
126+
; POSTRA-NEXT: vs8r.v v16, (a0)
127+
; POSTRA-NEXT: vmv.v.i v8, 1
128+
; POSTRA-NEXT: vs8r.v v8, (a0)
129+
; POSTRA-NEXT: ret
130+
;
131+
; PRERA-LABEL: vmv.v.i:
132+
; PRERA: # %bb.0:
133+
; PRERA-NEXT: addi sp, sp, -16
134+
; PRERA-NEXT: .cfi_def_cfa_offset 16
135+
; PRERA-NEXT: csrr a1, vlenb
136+
; PRERA-NEXT: slli a1, a1, 3
137+
; PRERA-NEXT: sub sp, sp, a1
138+
; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
139+
; PRERA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
140+
; PRERA-NEXT: vmv.v.i v8, 1
141+
; PRERA-NEXT: vs8r.v v8, (a0)
142+
; PRERA-NEXT: vl8re64.v v16, (a0)
143+
; PRERA-NEXT: addi a1, sp, 16
144+
; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
145+
; PRERA-NEXT: vl8re64.v v24, (a0)
146+
; PRERA-NEXT: vl8re64.v v0, (a0)
147+
; PRERA-NEXT: vl8re64.v v16, (a0)
148+
; PRERA-NEXT: vs8r.v v16, (a0)
149+
; PRERA-NEXT: vs8r.v v0, (a0)
150+
; PRERA-NEXT: vs8r.v v24, (a0)
151+
; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
152+
; PRERA-NEXT: vs8r.v v16, (a0)
153+
; PRERA-NEXT: vs8r.v v8, (a0)
154+
; PRERA-NEXT: csrr a0, vlenb
155+
; PRERA-NEXT: slli a0, a0, 3
156+
; PRERA-NEXT: add sp, sp, a0
157+
; PRERA-NEXT: addi sp, sp, 16
158+
; PRERA-NEXT: ret
159+
%vmv.v.i = call <vscale x 8 x i64> @llvm.riscv.vmv.v.x.nxv8i64(<vscale x 8 x i64> poison, i64 1, i64 -1)
160+
store volatile <vscale x 8 x i64> %vmv.v.i, ptr %p
161+
162+
%a = load volatile <vscale x 8 x i64>, ptr %p
163+
%b = load volatile <vscale x 8 x i64>, ptr %p
164+
%c = load volatile <vscale x 8 x i64>, ptr %p
165+
%d = load volatile <vscale x 8 x i64>, ptr %p
166+
store volatile <vscale x 8 x i64> %d, ptr %p
167+
store volatile <vscale x 8 x i64> %c, ptr %p
168+
store volatile <vscale x 8 x i64> %b, ptr %p
169+
store volatile <vscale x 8 x i64> %a, ptr %p
170+
171+
store volatile <vscale x 8 x i64> %vmv.v.i, ptr %p
172+
ret void
173+
}

llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,7 @@ define void @vselect_legalize_regression(<vscale x 16 x double> %a, <vscale x 16
519519
; CHECK-NEXT: vmv.v.i v24, 0
520520
; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0
521521
; CHECK-NEXT: vmv1r.v v0, v7
522+
; CHECK-NEXT: vmv.v.i v24, 0
522523
; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
523524
; CHECK-NEXT: vs8r.v v8, (a1)
524525
; CHECK-NEXT: slli a0, a0, 3

0 commit comments

Comments
 (0)