Skip to content

[RISCV] Rematerialize vmv.v.i #107550

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,13 +168,19 @@ Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,

bool RISCVInstrInfo::isReallyTriviallyReMaterializable(
const MachineInstr &MI) const {
if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VID_V &&
MI.getOperand(1).isUndef() &&
/* After RISCVInsertVSETVLI most pseudos will have implicit uses on vl and
vtype. Make sure we only rematerialize before RISCVInsertVSETVLI
i.e. -riscv-vsetvl-after-rvv-regalloc=true */
!MI.hasRegisterImplicitUseOperand(RISCV::VTYPE))
return true;
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
case RISCV::VMV_V_I:
case RISCV::VID_V:
if (MI.getOperand(1).isUndef() &&
/* After RISCVInsertVSETVLI most pseudos will have implicit uses on vl
and vtype. Make sure we only rematerialize before RISCVInsertVSETVLI
i.e. -riscv-vsetvl-after-rvv-regalloc=true */
!MI.hasRegisterImplicitUseOperand(RISCV::VTYPE))
return true;
break;
default:
break;
}
return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
}

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
Original file line number Diff line number Diff line change
Expand Up @@ -2478,6 +2478,7 @@ multiclass VPseudoUnaryVMV_V_X_I {
def "_X_" # mx : VPseudoUnaryNoMask<m.vrclass, GPR>,
SchedUnary<"WriteVIMovX", "ReadVIMovX", mx,
forcePassthruRead=true>;
let isReMaterializable = 1 in
def "_I_" # mx : VPseudoUnaryNoMask<m.vrclass, simm5>,
SchedNullary<"WriteVIMovI", mx,
forcePassthruRead=true>;
Expand Down
62 changes: 62 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/remat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,65 @@ define void @vid_passthru(ptr %p, <vscale x 8 x i64> %v) {
store volatile <vscale x 8 x i64> %vid, ptr %p
ret void
}

define void @vmv.v.i(ptr %p) {
; POSTRA-LABEL: vmv.v.i:
; POSTRA: # %bb.0:
; POSTRA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; POSTRA-NEXT: vmv.v.i v8, 1
; POSTRA-NEXT: vs8r.v v8, (a0)
; POSTRA-NEXT: vl8re64.v v16, (a0)
; POSTRA-NEXT: vl8re64.v v24, (a0)
; POSTRA-NEXT: vl8re64.v v0, (a0)
; POSTRA-NEXT: vl8re64.v v8, (a0)
; POSTRA-NEXT: vs8r.v v8, (a0)
; POSTRA-NEXT: vs8r.v v0, (a0)
; POSTRA-NEXT: vs8r.v v24, (a0)
; POSTRA-NEXT: vs8r.v v16, (a0)
; POSTRA-NEXT: vmv.v.i v8, 1
; POSTRA-NEXT: vs8r.v v8, (a0)
; POSTRA-NEXT: ret
;
; PRERA-LABEL: vmv.v.i:
; PRERA: # %bb.0:
; PRERA-NEXT: addi sp, sp, -16
; PRERA-NEXT: .cfi_def_cfa_offset 16
; PRERA-NEXT: csrr a1, vlenb
; PRERA-NEXT: slli a1, a1, 3
; PRERA-NEXT: sub sp, sp, a1
; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; PRERA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; PRERA-NEXT: vmv.v.i v8, 1
; PRERA-NEXT: vs8r.v v8, (a0)
; PRERA-NEXT: vl8re64.v v16, (a0)
; PRERA-NEXT: addi a1, sp, 16
; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; PRERA-NEXT: vl8re64.v v24, (a0)
; PRERA-NEXT: vl8re64.v v0, (a0)
; PRERA-NEXT: vl8re64.v v16, (a0)
; PRERA-NEXT: vs8r.v v16, (a0)
; PRERA-NEXT: vs8r.v v0, (a0)
; PRERA-NEXT: vs8r.v v24, (a0)
; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; PRERA-NEXT: vs8r.v v16, (a0)
; PRERA-NEXT: vs8r.v v8, (a0)
; PRERA-NEXT: csrr a0, vlenb
; PRERA-NEXT: slli a0, a0, 3
; PRERA-NEXT: add sp, sp, a0
; PRERA-NEXT: addi sp, sp, 16
; PRERA-NEXT: ret
%vmv.v.i = call <vscale x 8 x i64> @llvm.riscv.vmv.v.x.nxv8i64(<vscale x 8 x i64> poison, i64 1, i64 -1)
store volatile <vscale x 8 x i64> %vmv.v.i, ptr %p

%a = load volatile <vscale x 8 x i64>, ptr %p
%b = load volatile <vscale x 8 x i64>, ptr %p
%c = load volatile <vscale x 8 x i64>, ptr %p
%d = load volatile <vscale x 8 x i64>, ptr %p
store volatile <vscale x 8 x i64> %d, ptr %p
store volatile <vscale x 8 x i64> %c, ptr %p
store volatile <vscale x 8 x i64> %b, ptr %p
store volatile <vscale x 8 x i64> %a, ptr %p

store volatile <vscale x 8 x i64> %vmv.v.i, ptr %p
ret void
}
1 change: 1 addition & 0 deletions llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,7 @@ define void @vselect_legalize_regression(<vscale x 16 x double> %a, <vscale x 16
; CHECK-NEXT: vmv.v.i v24, 0
; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any idea why we're not turning this into a vmerge.vim with a negated mask? That would radically reduce register pressure and work, and be net equal in terms of instructions.

; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vmv.v.i v24, 0
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an example of an m8 register now being evicted due to the change in spill weights, even when nothing here actually needs to be spilled/remateralized.

; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
; CHECK-NEXT: vs8r.v v8, (a1)
; CHECK-NEXT: slli a0, a0, 3
Expand Down
Loading