-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AArch64] Prepare for split ZPR and PPR area allocation (NFCI) #142391
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/MacDue/pred_stack_id
Are you sure you want to change the base?
Conversation
This is a stacked PR. Please see the final PR for context. |
@llvm/pr-subscribers-backend-aarch64 Author: Benjamin Maxwell (MacDue) ChangesThis patch attempts to refactor AArch64FrameLowering to allow the size of the ZPR and PPR areas to be calculated separately. This will be used by a subsequent patch to support allocating ZPRs and PPRs to separate areas. This patch should be an NFC and is split out to make later functional changes easier to spot. Patch is 34.11 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142391.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 643778c742497..e5592a921e192 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -326,7 +326,10 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
static bool produceCompactUnwindFrame(MachineFunction &MF);
static bool needsWinCFI(const MachineFunction &MF);
+static StackOffset getZPRStackSize(const MachineFunction &MF);
+static StackOffset getPPRStackSize(const MachineFunction &MF);
static StackOffset getSVEStackSize(const MachineFunction &MF);
+static bool hasSVEStackSize(const MachineFunction &MF);
static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
/// Returns true if a homogeneous prolog or epilog code can be emitted
@@ -345,7 +348,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
if (needsWinCFI(MF))
return false;
// TODO: SVE is not supported yet.
- if (getSVEStackSize(MF))
+ if (hasSVEStackSize(MF))
return false;
// Bail on stack adjustment needed on return for simplicity.
@@ -445,10 +448,36 @@ static unsigned getFixedObjectSize(const MachineFunction &MF,
}
}
-/// Returns the size of the entire SVE stackframe (calleesaves + spills).
+static unsigned getStackHazardSize(const MachineFunction &MF) {
+ return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
+}
+
+/// Returns the size of the entire ZPR stackframe (calleesaves + spills).
+static StackOffset getZPRStackSize(const MachineFunction &MF) {
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ return StackOffset::getScalable(AFI->getStackSizeZPR());
+}
+
+/// Returns the size of the entire PPR stackframe (calleesaves + spills).
+static StackOffset getPPRStackSize(const MachineFunction &MF) {
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ return StackOffset::getScalable(AFI->getStackSizePPR());
+}
+
+/// Returns the size of the entire SVE stackframe (PPRs + ZPRs).
static StackOffset getSVEStackSize(const MachineFunction &MF) {
+ return getZPRStackSize(MF) + getPPRStackSize(MF);
+}
+
+static bool hasSVEStackSize(const MachineFunction &MF) {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
+ return AFI->getStackSizeZPR() > 0 || AFI->getStackSizePPR() > 0;
+}
+
+/// Returns true if PPRs are spilled as ZPRs.
+static bool arePPRsSpilledAsZPR(const MachineFunction &MF) {
+ return MF.getSubtarget().getRegisterInfo()->getSpillSize(
+ AArch64::PPRRegClass) == 16;
}
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
@@ -476,7 +505,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
!Subtarget.hasSVE();
return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize ||
- getSVEStackSize(MF) || LowerQRegCopyThroughMem);
+ hasSVEStackSize(MF) || LowerQRegCopyThroughMem);
}
/// hasFPImpl - Return true if the specified function should have a dedicated
@@ -1144,7 +1173,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
// When there is an SVE area on the stack, always allocate the
// callee-saves and spills/locals separately.
- if (getSVEStackSize(MF))
+ if (hasSVEStackSize(MF))
return false;
return true;
@@ -1570,30 +1599,40 @@ static bool isTargetWindows(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
}
-static unsigned getStackHazardSize(const MachineFunction &MF) {
- return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
-}
-
// Convenience function to determine whether I is an SVE callee save.
-static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
+static bool IsZPRCalleeSave(MachineBasicBlock::iterator I) {
switch (I->getOpcode()) {
default:
return false;
- case AArch64::PTRUE_C_B:
case AArch64::LD1B_2Z_IMM:
case AArch64::ST1B_2Z_IMM:
case AArch64::STR_ZXI:
- case AArch64::STR_PXI:
case AArch64::LDR_ZXI:
- case AArch64::LDR_PXI:
- case AArch64::PTRUE_B:
case AArch64::CPY_ZPzI_B:
case AArch64::CMPNE_PPzZI_B:
+ case AArch64::PTRUE_C_B:
+ case AArch64::PTRUE_B:
+ return I->getFlag(MachineInstr::FrameSetup) ||
+ I->getFlag(MachineInstr::FrameDestroy);
+ }
+}
+
+// Convenience function to determine whether I is an SVE predicate callee save.
+static bool IsPPRCalleeSave(MachineBasicBlock::iterator I) {
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case AArch64::STR_PXI:
+ case AArch64::LDR_PXI:
return I->getFlag(MachineInstr::FrameSetup) ||
I->getFlag(MachineInstr::FrameDestroy);
}
}
+static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
+ return IsZPRCalleeSave(I) || IsPPRCalleeSave(I);
+}
+
static void emitShadowCallStackPrologue(const TargetInstrInfo &TII,
MachineFunction &MF,
MachineBasicBlock &MBB,
@@ -1825,8 +1864,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
else
AFI->setTaggedBasePointerOffset(MFI.getStackSize());
- const StackOffset &SVEStackSize = getSVEStackSize(MF);
-
// getStackSize() includes all the locals in its size calculation. We don't
// include these locals when computing the stack size of a funclet, as they
// are allocated in the parent's stack frame and accessed via the frame
@@ -1837,7 +1874,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
assert(!HasFP && "unexpected function without stack frame but with FP");
- assert(!SVEStackSize &&
+ assert(!hasSVEStackSize(MF) &&
"unexpected function without stack frame but with SVE objects");
// All of the stack allocation is for locals.
AFI->setLocalStackSize(NumBytes);
@@ -1879,7 +1916,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
bool HomPrologEpilog = homogeneousPrologEpilog(MF);
if (CombineSPBump) {
- assert(!SVEStackSize && "Cannot combine SP bump with SVE");
+ assert(!hasSVEStackSize(MF) && "Cannot combine SP bump with SVE");
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(-NumBytes), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
@@ -2105,34 +2142,63 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
}
- StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
- MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
+ StackOffset PPRCalleeSavesSize =
+ StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
+ StackOffset ZPRCalleeSavesSize =
+ StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
+ StackOffset PPRLocalsSize = getPPRStackSize(MF);
+ StackOffset ZPRLocalsSize = getZPRStackSize(MF);
+
+ MachineBasicBlock::iterator ZPRCalleeSavesBegin = MBBI,
+ ZPRCalleeSavesEnd = MBBI;
+ MachineBasicBlock::iterator PPRCalleeSavesBegin = MBBI,
+ PPRCalleeSavesEnd = MBBI;
// Process the SVE callee-saves to determine what space needs to be
// allocated.
- if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
- LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
+
+ if (int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize()) {
+ LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = " << PPRCalleeSavedSize
+ << "\n");
+
+ PPRCalleeSavesBegin = MBBI;
+ assert(IsPPRCalleeSave(PPRCalleeSavesBegin) && "Unexpected instruction");
+ while (IsPPRCalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
+ ++MBBI;
+ PPRCalleeSavesEnd = MBBI;
+
+ PPRLocalsSize -= StackOffset::getScalable(PPRCalleeSavedSize);
+ }
+
+ if (int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize()) {
+ LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = " << ZPRCalleeSavedSize
<< "\n");
// Find callee save instructions in frame.
- CalleeSavesBegin = MBBI;
- assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
- while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
+ ZPRCalleeSavesBegin = MBBI;
+ assert(IsZPRCalleeSave(ZPRCalleeSavesBegin) && "Unexpected instruction");
+ while (IsZPRCalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
++MBBI;
- CalleeSavesEnd = MBBI;
+ ZPRCalleeSavesEnd = MBBI;
- SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
- SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
+ ZPRLocalsSize -= StackOffset::getScalable(ZPRCalleeSavedSize);
}
// Allocate space for the callee saves (if any).
StackOffset CFAOffset =
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
- StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
+ StackOffset LocalsSize =
+ PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed(NumBytes);
+ StackOffset SVECalleeSavesSize = PPRCalleeSavesSize + ZPRCalleeSavesSize;
+ MachineBasicBlock::iterator CalleeSavesBegin =
+ AFI->getPPRCalleeSavedStackSize() ? PPRCalleeSavesBegin
+ : ZPRCalleeSavesBegin;
allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
MFI.hasVarSizedObjects() || LocalsSize);
CFAOffset += SVECalleeSavesSize;
+ MachineBasicBlock::iterator CalleeSavesEnd =
+ AFI->getZPRCalleeSavedStackSize() ? ZPRCalleeSavesEnd : PPRCalleeSavesEnd;
if (EmitAsyncCFI)
emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
@@ -2144,6 +2210,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
// the correct value here, as NumBytes also includes padding bytes,
// which shouldn't be counted here.
+ StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
SVELocalsSize + StackOffset::getFixed(NumBytes),
NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
@@ -2193,7 +2260,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
emitDefineCFAWithFP(MF, MBB, MBBI, FixedObject);
} else {
StackOffset TotalSize =
- SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
+ getSVEStackSize(MF) +
+ StackOffset::getFixed((int64_t)MFI.getStackSize());
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
CFIBuilder.insertCFIInst(
createDefCFA(*RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
@@ -2388,7 +2456,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
}
}
- const StackOffset &SVEStackSize = getSVEStackSize(MF);
+ StackOffset SVEStackSize = getSVEStackSize(MF);
// If there is a single SP update, insert it before the ret and we're done.
if (CombineSPBump) {
@@ -2413,7 +2481,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// deallocated.
StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
- if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
+ int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize();
+ int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize();
+ int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
+
+ if (SVECalleeSavedSize) {
RestoreBegin = std::prev(RestoreEnd);
while (RestoreBegin != MBB.begin() &&
IsSVECalleeSave(std::prev(RestoreBegin)))
@@ -2423,7 +2495,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
StackOffset CalleeSavedSizeAsOffset =
- StackOffset::getScalable(CalleeSavedSize);
+ StackOffset::getScalable(SVECalleeSavedSize);
DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
DeallocateAfter = CalleeSavedSizeAsOffset;
}
@@ -2434,16 +2506,16 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// restore the stack pointer from the frame pointer prior to SVE CSR
// restoration.
if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) {
- if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
+ if (SVECalleeSavedSize) {
// Set SP to start of SVE callee-save area from which they can
// be reloaded. The code below will deallocate the stack space
// space by moving FP -> SP.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
- StackOffset::getScalable(-CalleeSavedSize), TII,
+ StackOffset::getScalable(-SVECalleeSavedSize), TII,
MachineInstr::FrameDestroy);
}
} else {
- if (AFI->getSVECalleeSavedStackSize()) {
+ if (SVECalleeSavedSize) {
// Deallocate the non-SVE locals first before we can deallocate (and
// restore callee saves) from the SVE area.
emitFrameOffset(
@@ -2572,7 +2644,9 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
const auto &MFI = MF.getFrameInfo();
int64_t ObjectOffset = MFI.getObjectOffset(FI);
- StackOffset SVEStackSize = getSVEStackSize(MF);
+ StackOffset ZPRStackSize = getZPRStackSize(MF);
+ StackOffset PPRStackSize = getPPRStackSize(MF);
+ StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
// For VLA-area objects, just emit an offset at the end of the stack frame.
// Whilst not quite correct, these objects do live at the end of the frame and
@@ -2663,7 +2737,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
bool isCSR =
!isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
- const StackOffset &SVEStackSize = getSVEStackSize(MF);
+ const StackOffset SVEStackSize = getSVEStackSize(MF);
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs or a dynamically realigned SP (and thus the SP isn't
@@ -2800,7 +2874,9 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
!(Subtarget.getTargetLowering()->supportSwiftError() &&
Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
MF.getFunction().getCallingConv() != CallingConv::SwiftTail &&
- !requiresSaveVG(MF) && AFI->getSVECalleeSavedStackSize() == 0;
+ !requiresSaveVG(MF) &&
+ (AFI->getZPRCalleeSavedStackSize() +
+ AFI->getPPRCalleeSavedStackSize()) == 0;
}
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
@@ -2932,9 +3008,13 @@ static void computeCalleeSaveRegisterPairs(
RegInc = -1;
FirstReg = Count - 1;
}
- int ScalableByteOffset = AFI->getSVECalleeSavedStackSize();
+
+ int ScalableByteOffset =
+ AFI->getZPRCalleeSavedStackSize() + AFI->getPPRCalleeSavedStackSize();
+
bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
Register LastReg = 0;
+ bool HasCSHazardPadding = AFI->hasStackHazardSlotIndex();
// When iterating backwards, the loop condition relies on unsigned wraparound.
for (unsigned i = FirstReg; i < Count; i += RegInc) {
@@ -2964,7 +3044,7 @@ static void computeCalleeSaveRegisterPairs(
}
// Add the stack hazard size as we transition from GPR->FPR CSRs.
- if (AFI->hasStackHazardSlotIndex() &&
+ if (HasCSHazardPadding &&
(!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
AArch64InstrInfo::isFpOrNEON(RPI.Reg1))
ByteOffset += StackFillDir * StackHazardSize;
@@ -2972,7 +3052,7 @@ static void computeCalleeSaveRegisterPairs(
int Scale = TRI->getSpillSize(*RPI.RC);
// Add the next reg to the pair if it is in the same register class.
- if (unsigned(i + RegInc) < Count && !AFI->hasStackHazardSlotIndex()) {
+ if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) {
MCRegister NextReg = CSI[i + RegInc].getReg();
bool IsFirst = i == FirstReg;
switch (RPI.Type) {
@@ -3541,8 +3621,9 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
void AArch64FrameLowering::determineStackHazardSlot(
MachineFunction &MF, BitVector &SavedRegs) const {
unsigned StackHazardSize = getStackHazardSize(MF);
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
if (StackHazardSize == 0 || StackHazardSize % 16 != 0 ||
- MF.getInfo<AArch64FunctionInfo>()->hasStackHazardSlotIndex())
+ AFI->hasStackHazardSlotIndex())
return;
// Stack hazards are only needed in streaming functions.
@@ -3594,10 +3675,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
- const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
unsigned UnspilledCSGPR = AArch64::NoRegister;
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
@@ -3718,19 +3800,29 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// Calculates the callee saved stack size.
unsigned CSStackSize = 0;
- unsigned SVECSStackSize = 0;
+ unsigned ZPRCSStackSize = 0;
+ unsigned PPRCSStackSize = 0;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
for (unsigned Reg : SavedRegs.set_bits()) {
auto *RC = TRI->getMinimalPhysRegClass(Reg);
assert(RC && "expected register class!");
auto SpillSize = TRI->getSpillSize(*RC);
- if (AArch64::PPRRegClass.contains(Reg) ||
- AArch64::ZPRRegClass.contains(Reg))
- SVECSStackSize += SpillSize;
+ bool IsZPR = AArch64::ZPRRegClass.contains(Reg);
+ bool IsPPR = !IsZPR && AArch64::PPRRegClass.contains(Reg);
+ if (IsZPR || (IsPPR && arePPRsSpilledAsZPR(MF)))
+ ZPRCSStackSize += SpillSize;
+ else if (IsPPR)
+ PPRCSStackSize += SpillSize;
else
CSStackSize += SpillSize;
}
+ // Determine if a Hazard slot should be used, and increase the CSStackSize by
+ // StackHazardSize if so.
+ determineStackHazardSlot(MF, SavedRegs);
+ if (AFI->hasStackHazardSlotIndex())
+ CSStackSize += getStackHazardSize(MF);
+
// Increase the callee-saved stack size if the function has streaming mode
// changes, as we will need to spill the value of the VG register.
// For locally streaming functions, we spill both the streaming and
@@ -3744,12 +3836,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
CSStackSize += 8;
}
- // Determine if a Hazard slot should be used, and increase the CSStackSize by
- // StackHazardSize if so.
- determineStackHazardSlot(MF, SavedRegs);
- if (AFI->hasStackHazardSlotIndex())
- CSStackSize += getStackHazardSize(MF);
-
// Save number of saved regs, so we can easily update CSStackSize later.
unsigned NumSavedRegs = SavedRegs.count();
@@ -3769,8 +3855,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
});
// If any callee-saved registers are used, the frame cannot be eliminated.
+ auto [ZPRLocalStackSize, PPRLocalStackSize] =
+ estimateSVEStackObjectOffsets(MF);
+ int64_t SVELocals = ZPRLocalStackSize + PPRLocalStackSize;
int64_t SVEStackSize =
- alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
+ alignTo(ZPRCSStackSize + PPRCSStackSize + SVELocals, 16);
bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
...
[truncated]
|
This patch attempts to refactor AArch64FrameLowering to allow the size of the ZPR and PPR areas to be calculated separately. This will be used by a subsequent patch to support allocating ZPRs and PPRs to separate areas. This patch should be an NFC and is split out to make later functional changes easier to spot.
5764311
to
813787f
Compare
0b09392
to
42af819
Compare
This patch attempts to refactor AArch64FrameLowering to allow the size of the ZPR and PPR areas to be calculated separately. This will be used by a subsequent patch to support allocating ZPRs and PPRs to separate areas. This patch should be an NFC and is split out to make later functional changes easier to spot.