From 4554663bc0da71d61ab488641c95ef98430cb451 Mon Sep 17 00:00:00 2001 From: LiDongjin Date: Fri, 6 Jan 2023 09:54:19 -0800 Subject: [PATCH] Recommit "[RISCV] Enable the LocalStackSlotAllocation pass support" This includes a fix for the tramp3d failure from the llvm-testsuite that caused the last revert. Hopefully the others failures were the same issue. Original commit message: For RISC-V, load/store(exclude vector load/store) instructions only has a 12 bit immediate operand. If the offset is out-of-range, it must make use of a temp register to make up this offset. If between these offsets, they have a small(IsInt<12>) relative offset, LocalStackSlotAllocation pass can find a value as frame base register's value, and replace the origin offset with this register's value plus the relative offset. Co-authored-by: luxufan Co-authored-by: Craig Topper Differential Revision: https://reviews.llvm.org/D98101 --- llvm/lib/Target/RISCV/RISCVFrameLowering.h | 6 + llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 121 ++++++++++++++++++ llvm/lib/Target/RISCV/RISCVRegisterInfo.h | 16 +++ .../RISCV/local-stack-slot-allocation.ll | 62 +++++++-- 4 files changed, 196 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h index a6d98d1d1cc35e..bf6c1a6526294b 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -73,6 +73,12 @@ class RISCVFrameLowering : public TargetFrameLowering { bool isSupportedStackID(TargetStackID::Value ID) const override; TargetStackID::Value getStackIDForScalableVectors() const override; + bool isStackIdSafeForLocalArea(unsigned StackId) const override { + // We don't support putting RISCV Vector objects into the pre-allocated + // local frame block at the moment. + return StackId != TargetStackID::ScalableVector; + } + protected: const RISCVSubtarget &STI; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 8c893a58f39752..f400628f93d4a5 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -498,6 +498,127 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, return false; } +bool RISCVRegisterInfo::requiresVirtualBaseRegisters( + const MachineFunction &MF) const { + return true; +} + +// Returns true if the instruction's frame index reference would be better +// served by a base register other than FP or SP. +// Used by LocalStackSlotAllocation pass to determine which frame index +// references it should create new base registers for. +bool RISCVRegisterInfo::needsFrameBaseReg(MachineInstr *MI, + int64_t Offset) const { + unsigned FIOperandNum = 0; + for (; !MI->getOperand(FIOperandNum).isFI(); FIOperandNum++) + assert(FIOperandNum < MI->getNumOperands() && + "Instr doesn't have FrameIndex operand"); + + // For RISC-V, The machine instructions that include a FrameIndex operand + // are load/store, ADDI instructions. + unsigned MIFrm = RISCVII::getFormat(MI->getDesc().TSFlags); + if (MIFrm != RISCVII::InstFormatI && MIFrm != RISCVII::InstFormatS) + return false; + // We only generate virtual base registers for loads and stores, so + // return false for everything else. + if (!MI->mayLoad() && !MI->mayStore()) + return false; + + const MachineFunction &MF = *MI->getMF(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const RISCVFrameLowering *TFI = getFrameLowering(MF); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned CalleeSavedSize = 0; + Offset += getFrameIndexInstrOffset(MI, FIOperandNum); + + // Estimate the stack size used to store callee saved registers( + // excludes reserved registers). + BitVector ReservedRegs = getReservedRegs(MF); + for (const MCPhysReg *R = MRI.getCalleeSavedRegs(); MCPhysReg Reg = *R; ++R) { + if (!ReservedRegs.test(Reg)) + CalleeSavedSize += getSpillSize(*getMinimalPhysRegClass(Reg)); + } + + int64_t MaxFPOffset = Offset - CalleeSavedSize; + if (TFI->hasFP(MF) && !shouldRealignStack(MF)) + return !isFrameOffsetLegal(MI, RISCV::X8, MaxFPOffset); + + // Assume 128 bytes spill slots size to estimate the maximum possible + // offset relative to the stack pointer. + // FIXME: The 128 is copied from ARM. We should run some statistics and pick a + // real one for RISC-V. + int64_t MaxSPOffset = Offset + 128; + MaxSPOffset += MFI.getLocalFrameSize(); + return !isFrameOffsetLegal(MI, RISCV::X2, MaxSPOffset); +} + +// Determine whether a given base register plus offset immediate is +// encodable to resolve a frame index. +bool RISCVRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, + Register BaseReg, + int64_t Offset) const { + unsigned FIOperandNum = 0; + while (!MI->getOperand(FIOperandNum).isFI()) { + FIOperandNum++; + assert(FIOperandNum < MI->getNumOperands() && + "Instr does not have a FrameIndex operand!"); + } + + Offset += getFrameIndexInstrOffset(MI, FIOperandNum); + return isInt<12>(Offset); +} + +// Insert defining instruction(s) for a pointer to FrameIdx before +// insertion point I. +// Return materialized frame pointer. +Register RISCVRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, + int FrameIdx, + int64_t Offset) const { + MachineBasicBlock::iterator MBBI = MBB->begin(); + DebugLoc DL; + if (MBBI != MBB->end()) + DL = MBBI->getDebugLoc(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MFI = MF->getRegInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + + Register BaseReg = MFI.createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(*MBB, MBBI, DL, TII->get(RISCV::ADDI), BaseReg) + .addFrameIndex(FrameIdx) + .addImm(Offset); + return BaseReg; +} + +// Resolve a frame index operand of an instruction to reference the +// indicated base register plus offset instead. +void RISCVRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg, + int64_t Offset) const { + unsigned FIOperandNum = 0; + while (!MI.getOperand(FIOperandNum).isFI()) { + FIOperandNum++; + assert(FIOperandNum < MI.getNumOperands() && + "Instr does not have a FrameIndex operand!"); + } + + Offset += getFrameIndexInstrOffset(&MI, FIOperandNum); + // FrameIndex Operands are always represented as a + // register followed by an immediate. + MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); +} + +// Get the offset from the referenced frame index in the instruction, +// if there is one. +int64_t RISCVRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI, + int Idx) const { + assert((RISCVII::getFormat(MI->getDesc().TSFlags) == RISCVII::InstFormatI || + RISCVII::getFormat(MI->getDesc().TSFlags) == RISCVII::InstFormatS) && + "The MI must be I or S format."); + assert(MI->getOperand(Idx).isFI() && "The Idx'th operand of MI is not a " + "FrameIndex operand"); + return MI->getOperand(Idx + 1).getImm(); +} + Register RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = getFrameLowering(MF); return TFI->hasFP(MF) ? RISCV::X8 : RISCV::X2; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h index fe0d2e1ce6987a..57a7256735238d 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -51,6 +51,22 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; + bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override; + + bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override; + + bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, + int64_t Offset) const override; + + Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, + int64_t Offset) const override; + + void resolveFrameIndex(MachineInstr &MI, Register BaseReg, + int64_t Offset) const override; + + int64_t getFrameIndexInstrOffset(const MachineInstr *MI, + int Idx) const override; + void lowerVSPILL(MachineBasicBlock::iterator II) const; void lowerVRELOAD(MachineBasicBlock::iterator II) const; diff --git a/llvm/test/CodeGen/RISCV/local-stack-slot-allocation.ll b/llvm/test/CodeGen/RISCV/local-stack-slot-allocation.ll index fa32e6b37aba64..186b8287d87c94 100644 --- a/llvm/test/CodeGen/RISCV/local-stack-slot-allocation.ll +++ b/llvm/test/CodeGen/RISCV/local-stack-slot-allocation.ll @@ -5,7 +5,6 @@ ; This test case test the LocalStackSlotAllocation pass that use a base register ; for the frame index that its offset is out-of-range (for RISC-V. the immediate ; is 12 bits for the load store instruction (excludes vector load / store)) -; TODO: Enable LocalStackSlotAllocation pass. define void @use_frame_base_reg() { ; RV32I-LABEL: use_frame_base_reg: ; RV32I: # %bb.0: @@ -14,11 +13,10 @@ define void @use_frame_base_reg() { ; RV32I-NEXT: sub sp, sp, a0 ; RV32I-NEXT: .cfi_def_cfa_offset 100016 ; RV32I-NEXT: lui a0, 24 +; RV32I-NEXT: addi a0, a0, 1704 ; RV32I-NEXT: add a0, sp, a0 -; RV32I-NEXT: lb a0, 1708(a0) -; RV32I-NEXT: lui a0, 24 -; RV32I-NEXT: add a0, sp, a0 -; RV32I-NEXT: lb a0, 1704(a0) +; RV32I-NEXT: lb a1, 4(a0) +; RV32I-NEXT: lb a0, 0(a0) ; RV32I-NEXT: lui a0, 24 ; RV32I-NEXT: addi a0, a0, 1712 ; RV32I-NEXT: add sp, sp, a0 @@ -31,11 +29,10 @@ define void @use_frame_base_reg() { ; RV64I-NEXT: sub sp, sp, a0 ; RV64I-NEXT: .cfi_def_cfa_offset 100016 ; RV64I-NEXT: lui a0, 24 +; RV64I-NEXT: addiw a0, a0, 1704 ; RV64I-NEXT: add a0, sp, a0 -; RV64I-NEXT: lb a0, 1708(a0) -; RV64I-NEXT: lui a0, 24 -; RV64I-NEXT: add a0, sp, a0 -; RV64I-NEXT: lb a0, 1704(a0) +; RV64I-NEXT: lb a1, 4(a0) +; RV64I-NEXT: lb a0, 0(a0) ; RV64I-NEXT: lui a0, 24 ; RV64I-NEXT: addiw a0, a0, 1712 ; RV64I-NEXT: add sp, sp, a0 @@ -48,3 +45,50 @@ define void @use_frame_base_reg() { %argp.next = load volatile i8, ptr %va1, align 4 ret void } + +; Test containing a load with its own local offset. Make sure isFrameOffsetLegal +; considers it and does not create a virtual base register. +define void @load_with_offset() { +; RV32I-LABEL: load_with_offset: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a0, 25 +; RV32I-NEXT: addi a0, a0, -1792 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: .cfi_def_cfa_offset 100608 +; RV32I-NEXT: lui a0, 25 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lb a0, -292(a0) +; RV32I-NEXT: lui a0, 24 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lb a0, 1704(a0) +; RV32I-NEXT: lui a0, 25 +; RV32I-NEXT: addi a0, a0, -1792 +; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: load_with_offset: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a0, 25 +; RV64I-NEXT: addiw a0, a0, -1792 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: .cfi_def_cfa_offset 100608 +; RV64I-NEXT: lui a0, 25 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: lb a0, -292(a0) +; RV64I-NEXT: lui a0, 24 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: lb a0, 1704(a0) +; RV64I-NEXT: lui a0, 25 +; RV64I-NEXT: addiw a0, a0, -1792 +; RV64I-NEXT: add sp, sp, a0 +; RV64I-NEXT: ret + + %va = alloca [100 x i8], align 4 + %va1 = alloca [500 x i8], align 4 + %large = alloca [100000 x i8] + %va_gep = getelementptr [100 x i8], ptr %va, i64 16 + %va1_gep = getelementptr [100 x i8], ptr %va1, i64 0 + %load = load volatile i8, ptr %va_gep, align 4 + %load1 = load volatile i8, ptr %va1_gep, align 4 + ret void +}