Skip to content

[RISCV] Rematerialize vid.v #97520

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,18 @@ Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
return 0;
}

bool RISCVInstrInfo::isReallyTriviallyReMaterializable(
const MachineInstr &MI) const {
if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VID_V &&
MI.getOperand(1).isUndef() &&
/* After RISCVInsertVSETVLI most pseudos will have implicit uses on vl and
vtype. Make sure we only rematerialize before RISCVInsertVSETVLI
i.e. -riscv-vsetvl-after-rvv-regalloc=true */
!MI.hasRegisterImplicitUseOperand(RISCV::VTYPE))
return true;
return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
}

static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
unsigned NumRegs) {
return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex,
unsigned &MemBytes) const override;

bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;

void copyPhysRegVector(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
MCRegister DstReg, MCRegister SrcReg, bool KillSrc,
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
Original file line number Diff line number Diff line change
Expand Up @@ -6629,6 +6629,7 @@ defm PseudoVIOTA_M: VPseudoVIOTA_M;
//===----------------------------------------------------------------------===//
// 15.9. Vector Element Index Instruction
//===----------------------------------------------------------------------===//
let isReMaterializable = 1 in
defm PseudoVID : VPseudoVID_V;
} // Predicates = [HasVInstructions]

Expand Down
111 changes: 111 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/remat.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rematerializable instructions can be hoisted in MachineLICM, need we add some loop tests here?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LICM should be able to hoist vid.v even it wasn't rematerializable. Right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, you're right!

; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,POSTRA
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -riscv-vsetvl-after-rvv-regalloc=false -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,PRERA

define void @vid(ptr %p) {
; POSTRA-LABEL: vid:
; POSTRA: # %bb.0:
; POSTRA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; POSTRA-NEXT: vid.v v8
; POSTRA-NEXT: vs8r.v v8, (a0)
; POSTRA-NEXT: vl8re64.v v16, (a0)
; POSTRA-NEXT: vl8re64.v v24, (a0)
; POSTRA-NEXT: vl8re64.v v0, (a0)
; POSTRA-NEXT: vl8re64.v v8, (a0)
; POSTRA-NEXT: vs8r.v v8, (a0)
; POSTRA-NEXT: vs8r.v v0, (a0)
; POSTRA-NEXT: vs8r.v v24, (a0)
; POSTRA-NEXT: vs8r.v v16, (a0)
; POSTRA-NEXT: vid.v v8
; POSTRA-NEXT: vs8r.v v8, (a0)
; POSTRA-NEXT: ret
;
; PRERA-LABEL: vid:
; PRERA: # %bb.0:
; PRERA-NEXT: addi sp, sp, -16
; PRERA-NEXT: .cfi_def_cfa_offset 16
; PRERA-NEXT: csrr a1, vlenb
; PRERA-NEXT: slli a1, a1, 3
; PRERA-NEXT: sub sp, sp, a1
; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; PRERA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; PRERA-NEXT: vid.v v8
; PRERA-NEXT: vs8r.v v8, (a0)
; PRERA-NEXT: vl8re64.v v16, (a0)
; PRERA-NEXT: addi a1, sp, 16
; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; PRERA-NEXT: vl8re64.v v24, (a0)
; PRERA-NEXT: vl8re64.v v0, (a0)
; PRERA-NEXT: vl8re64.v v16, (a0)
; PRERA-NEXT: vs8r.v v16, (a0)
; PRERA-NEXT: vs8r.v v0, (a0)
; PRERA-NEXT: vs8r.v v24, (a0)
; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; PRERA-NEXT: vs8r.v v16, (a0)
; PRERA-NEXT: vs8r.v v8, (a0)
; PRERA-NEXT: csrr a0, vlenb
; PRERA-NEXT: slli a0, a0, 3
; PRERA-NEXT: add sp, sp, a0
; PRERA-NEXT: addi sp, sp, 16
; PRERA-NEXT: ret
%vid = call <vscale x 8 x i64> @llvm.riscv.vid.nxv8i64(<vscale x 8 x i64> poison, i64 -1)
store volatile <vscale x 8 x i64> %vid, ptr %p

%a = load volatile <vscale x 8 x i64>, ptr %p
%b = load volatile <vscale x 8 x i64>, ptr %p
%c = load volatile <vscale x 8 x i64>, ptr %p
%d = load volatile <vscale x 8 x i64>, ptr %p
store volatile <vscale x 8 x i64> %d, ptr %p
store volatile <vscale x 8 x i64> %c, ptr %p
store volatile <vscale x 8 x i64> %b, ptr %p
store volatile <vscale x 8 x i64> %a, ptr %p

store volatile <vscale x 8 x i64> %vid, ptr %p
ret void
}


define void @vid_passthru(ptr %p, <vscale x 8 x i64> %v) {
; CHECK-LABEL: vid_passthru:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; CHECK-NEXT: vsetivli zero, 1, e64, m8, tu, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vs8r.v v8, (a0)
; CHECK-NEXT: vl8re64.v v16, (a0)
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vl8re64.v v24, (a0)
; CHECK-NEXT: vl8re64.v v0, (a0)
; CHECK-NEXT: vl8re64.v v16, (a0)
; CHECK-NEXT: vs8r.v v16, (a0)
; CHECK-NEXT: vs8r.v v0, (a0)
; CHECK-NEXT: vs8r.v v24, (a0)
; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vs8r.v v16, (a0)
; CHECK-NEXT: vs8r.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%vid = call <vscale x 8 x i64> @llvm.riscv.vid.nxv8i64(<vscale x 8 x i64> %v, i64 1)
store volatile <vscale x 8 x i64> %vid, ptr %p

%a = load volatile <vscale x 8 x i64>, ptr %p
%b = load volatile <vscale x 8 x i64>, ptr %p
%c = load volatile <vscale x 8 x i64>, ptr %p
%d = load volatile <vscale x 8 x i64>, ptr %p
store volatile <vscale x 8 x i64> %d, ptr %p
store volatile <vscale x 8 x i64> %c, ptr %p
store volatile <vscale x 8 x i64> %b, ptr %p
store volatile <vscale x 8 x i64> %a, ptr %p

store volatile <vscale x 8 x i64> %vid, ptr %p
ret void
}
Loading