From 4554663bc0da71d61ab488641c95ef98430cb451 Mon Sep 17 00:00:00 2001
From: LiDongjin <jin.mse.sse@gmail.com>
Date: Fri, 6 Jan 2023 09:54:19 -0800
Subject: [PATCH] Recommit "[RISCV] Enable the LocalStackSlotAllocation pass
 support"

This includes a fix for the tramp3d failure from the llvm-testsuite
that caused the last revert. Hopefully the others failures were the
same issue.

Original commit message:
For RISC-V, load/store(exclude vector load/store) instructions only has a 12 bit immediate operand. If the offset is out-of-range, it must make use of a temp register to make up this offset. If between these offsets, they have a small(IsInt<12>) relative offset, LocalStackSlotAllocation pass can find a value as frame base register's value, and replace the origin offset with this register's value plus the relative offset.

Co-authored-by: luxufan <luxufan@iscas.ac.cn>
Co-authored-by: Craig Topper <craig.topper@sifive.com>

Differential Revision: https://reviews.llvm.org/D98101
---
 llvm/lib/Target/RISCV/RISCVFrameLowering.h    |   6 +
 llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp   | 121 ++++++++++++++++++
 llvm/lib/Target/RISCV/RISCVRegisterInfo.h     |  16 +++
 .../RISCV/local-stack-slot-allocation.ll      |  62 +++++++--
 4 files changed, 196 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index a6d98d1d1cc35..bf6c1a6526294 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -73,6 +73,12 @@ class RISCVFrameLowering : public TargetFrameLowering {
   bool isSupportedStackID(TargetStackID::Value ID) const override;
   TargetStackID::Value getStackIDForScalableVectors() const override;
 
+  bool isStackIdSafeForLocalArea(unsigned StackId) const override {
+    // We don't support putting RISCV Vector objects into the pre-allocated
+    // local frame block at the moment.
+    return StackId != TargetStackID::ScalableVector;
+  }
+
 protected:
   const RISCVSubtarget &STI;
 
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 8c893a58f3975..f400628f93d4a 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -498,6 +498,127 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   return false;
 }
 
+bool RISCVRegisterInfo::requiresVirtualBaseRegisters(
+    const MachineFunction &MF) const {
+  return true;
+}
+
+// Returns true if the instruction's frame index reference would be better
+// served by a base register other than FP or SP.
+// Used by LocalStackSlotAllocation pass to determine which frame index
+// references it should create new base registers for.
+bool RISCVRegisterInfo::needsFrameBaseReg(MachineInstr *MI,
+                                          int64_t Offset) const {
+  unsigned FIOperandNum = 0;
+  for (; !MI->getOperand(FIOperandNum).isFI(); FIOperandNum++)
+    assert(FIOperandNum < MI->getNumOperands() &&
+           "Instr doesn't have FrameIndex operand");
+
+  // For RISC-V, The machine instructions that include a FrameIndex operand
+  // are load/store, ADDI instructions.
+  unsigned MIFrm = RISCVII::getFormat(MI->getDesc().TSFlags);
+  if (MIFrm != RISCVII::InstFormatI && MIFrm != RISCVII::InstFormatS)
+    return false;
+  // We only generate virtual base registers for loads and stores, so
+  // return false for everything else.
+  if (!MI->mayLoad() && !MI->mayStore())
+    return false;
+
+  const MachineFunction &MF = *MI->getMF();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const RISCVFrameLowering *TFI = getFrameLowering(MF);
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  unsigned CalleeSavedSize = 0;
+  Offset += getFrameIndexInstrOffset(MI, FIOperandNum);
+
+  // Estimate the stack size used to store callee saved registers(
+  // excludes reserved registers).
+  BitVector ReservedRegs = getReservedRegs(MF);
+  for (const MCPhysReg *R = MRI.getCalleeSavedRegs(); MCPhysReg Reg = *R; ++R) {
+    if (!ReservedRegs.test(Reg))
+      CalleeSavedSize += getSpillSize(*getMinimalPhysRegClass(Reg));
+  }
+
+  int64_t MaxFPOffset = Offset - CalleeSavedSize;
+  if (TFI->hasFP(MF) && !shouldRealignStack(MF))
+    return !isFrameOffsetLegal(MI, RISCV::X8, MaxFPOffset);
+
+  // Assume 128 bytes spill slots size to estimate the maximum possible
+  // offset relative to the stack pointer.
+  // FIXME: The 128 is copied from ARM. We should run some statistics and pick a
+  // real one for RISC-V.
+  int64_t MaxSPOffset = Offset + 128;
+  MaxSPOffset += MFI.getLocalFrameSize();
+  return !isFrameOffsetLegal(MI, RISCV::X2, MaxSPOffset);
+}
+
+// Determine whether a given base register plus offset immediate is
+// encodable to resolve a frame index.
+bool RISCVRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+                                           Register BaseReg,
+                                           int64_t Offset) const {
+  unsigned FIOperandNum = 0;
+  while (!MI->getOperand(FIOperandNum).isFI()) {
+    FIOperandNum++;
+    assert(FIOperandNum < MI->getNumOperands() &&
+           "Instr does not have a FrameIndex operand!");
+  }
+
+  Offset += getFrameIndexInstrOffset(MI, FIOperandNum);
+  return isInt<12>(Offset);
+}
+
+// Insert defining instruction(s) for a pointer to FrameIdx before
+// insertion point I.
+// Return materialized frame pointer.
+Register RISCVRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
+                                                         int FrameIdx,
+                                                         int64_t Offset) const {
+  MachineBasicBlock::iterator MBBI = MBB->begin();
+  DebugLoc DL;
+  if (MBBI != MBB->end())
+    DL = MBBI->getDebugLoc();
+  MachineFunction *MF = MBB->getParent();
+  MachineRegisterInfo &MFI = MF->getRegInfo();
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+  Register BaseReg = MFI.createVirtualRegister(&RISCV::GPRRegClass);
+  BuildMI(*MBB, MBBI, DL, TII->get(RISCV::ADDI), BaseReg)
+      .addFrameIndex(FrameIdx)
+      .addImm(Offset);
+  return BaseReg;
+}
+
+// Resolve a frame index operand of an instruction to reference the
+// indicated base register plus offset instead.
+void RISCVRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
+                                          int64_t Offset) const {
+  unsigned FIOperandNum = 0;
+  while (!MI.getOperand(FIOperandNum).isFI()) {
+    FIOperandNum++;
+    assert(FIOperandNum < MI.getNumOperands() &&
+           "Instr does not have a FrameIndex operand!");
+  }
+
+  Offset += getFrameIndexInstrOffset(&MI, FIOperandNum);
+  // FrameIndex Operands are always represented as a
+  // register followed by an immediate.
+  MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
+  MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+}
+
+// Get the offset from the referenced frame index in the instruction,
+// if there is one.
+int64_t RISCVRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
+                                                    int Idx) const {
+  assert((RISCVII::getFormat(MI->getDesc().TSFlags) == RISCVII::InstFormatI ||
+          RISCVII::getFormat(MI->getDesc().TSFlags) == RISCVII::InstFormatS) &&
+         "The MI must be I or S format.");
+  assert(MI->getOperand(Idx).isFI() && "The Idx'th operand of MI is not a "
+                                       "FrameIndex operand");
+  return MI->getOperand(Idx + 1).getImm();
+}
+
 Register RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const TargetFrameLowering *TFI = getFrameLowering(MF);
   return TFI->hasFP(MF) ? RISCV::X8 : RISCV::X2;
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index fe0d2e1ce6987..57a7256735238 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -51,6 +51,22 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 
+  bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override;
+
+  bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
+
+  bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
+                          int64_t Offset) const override;
+
+  Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
+                                        int64_t Offset) const override;
+
+  void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
+                         int64_t Offset) const override;
+
+  int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
+                                   int Idx) const override;
+
   void lowerVSPILL(MachineBasicBlock::iterator II) const;
   void lowerVRELOAD(MachineBasicBlock::iterator II) const;
 
diff --git a/llvm/test/CodeGen/RISCV/local-stack-slot-allocation.ll b/llvm/test/CodeGen/RISCV/local-stack-slot-allocation.ll
index fa32e6b37aba6..186b8287d87c9 100644
--- a/llvm/test/CodeGen/RISCV/local-stack-slot-allocation.ll
+++ b/llvm/test/CodeGen/RISCV/local-stack-slot-allocation.ll
@@ -5,7 +5,6 @@
 ; This test case test the LocalStackSlotAllocation pass that use a base register
 ; for the frame index that its offset is out-of-range (for RISC-V. the immediate
 ; is 12 bits for the load store instruction (excludes vector load / store))
-; TODO: Enable LocalStackSlotAllocation pass.
 define void @use_frame_base_reg() {
 ; RV32I-LABEL: use_frame_base_reg:
 ; RV32I:       # %bb.0:
@@ -14,11 +13,10 @@ define void @use_frame_base_reg() {
 ; RV32I-NEXT:    sub sp, sp, a0
 ; RV32I-NEXT:    .cfi_def_cfa_offset 100016
 ; RV32I-NEXT:    lui a0, 24
+; RV32I-NEXT:    addi a0, a0, 1704
 ; RV32I-NEXT:    add a0, sp, a0
-; RV32I-NEXT:    lb a0, 1708(a0)
-; RV32I-NEXT:    lui a0, 24
-; RV32I-NEXT:    add a0, sp, a0
-; RV32I-NEXT:    lb a0, 1704(a0)
+; RV32I-NEXT:    lb a1, 4(a0)
+; RV32I-NEXT:    lb a0, 0(a0)
 ; RV32I-NEXT:    lui a0, 24
 ; RV32I-NEXT:    addi a0, a0, 1712
 ; RV32I-NEXT:    add sp, sp, a0
@@ -31,11 +29,10 @@ define void @use_frame_base_reg() {
 ; RV64I-NEXT:    sub sp, sp, a0
 ; RV64I-NEXT:    .cfi_def_cfa_offset 100016
 ; RV64I-NEXT:    lui a0, 24
+; RV64I-NEXT:    addiw a0, a0, 1704
 ; RV64I-NEXT:    add a0, sp, a0
-; RV64I-NEXT:    lb a0, 1708(a0)
-; RV64I-NEXT:    lui a0, 24
-; RV64I-NEXT:    add a0, sp, a0
-; RV64I-NEXT:    lb a0, 1704(a0)
+; RV64I-NEXT:    lb a1, 4(a0)
+; RV64I-NEXT:    lb a0, 0(a0)
 ; RV64I-NEXT:    lui a0, 24
 ; RV64I-NEXT:    addiw a0, a0, 1712
 ; RV64I-NEXT:    add sp, sp, a0
@@ -48,3 +45,50 @@ define void @use_frame_base_reg() {
   %argp.next = load volatile i8, ptr %va1, align 4
   ret void
 }
+
+; Test containing a load with its own local offset. Make sure isFrameOffsetLegal
+; considers it and does not create a virtual base register.
+define void @load_with_offset() {
+; RV32I-LABEL: load_with_offset:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a0, 25
+; RV32I-NEXT:    addi a0, a0, -1792
+; RV32I-NEXT:    sub sp, sp, a0
+; RV32I-NEXT:    .cfi_def_cfa_offset 100608
+; RV32I-NEXT:    lui a0, 25
+; RV32I-NEXT:    add a0, sp, a0
+; RV32I-NEXT:    lb a0, -292(a0)
+; RV32I-NEXT:    lui a0, 24
+; RV32I-NEXT:    add a0, sp, a0
+; RV32I-NEXT:    lb a0, 1704(a0)
+; RV32I-NEXT:    lui a0, 25
+; RV32I-NEXT:    addi a0, a0, -1792
+; RV32I-NEXT:    add sp, sp, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: load_with_offset:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a0, 25
+; RV64I-NEXT:    addiw a0, a0, -1792
+; RV64I-NEXT:    sub sp, sp, a0
+; RV64I-NEXT:    .cfi_def_cfa_offset 100608
+; RV64I-NEXT:    lui a0, 25
+; RV64I-NEXT:    add a0, sp, a0
+; RV64I-NEXT:    lb a0, -292(a0)
+; RV64I-NEXT:    lui a0, 24
+; RV64I-NEXT:    add a0, sp, a0
+; RV64I-NEXT:    lb a0, 1704(a0)
+; RV64I-NEXT:    lui a0, 25
+; RV64I-NEXT:    addiw a0, a0, -1792
+; RV64I-NEXT:    add sp, sp, a0
+; RV64I-NEXT:    ret
+
+  %va = alloca [100 x i8], align 4
+  %va1 = alloca [500 x i8], align 4
+  %large = alloca [100000 x i8]
+  %va_gep = getelementptr [100 x i8], ptr %va, i64 16
+  %va1_gep = getelementptr [100 x i8], ptr %va1, i64 0
+  %load = load volatile i8, ptr %va_gep, align 4
+  %load1 = load volatile i8, ptr %va1_gep, align 4
+  ret void
+}