240240#include " llvm/Support/CommandLine.h"
241241#include " llvm/Support/Debug.h"
242242#include " llvm/Support/ErrorHandling.h"
243+ #include " llvm/Support/FormatVariadic.h"
243244#include " llvm/Support/MathExtras.h"
244245#include " llvm/Support/raw_ostream.h"
245246#include " llvm/Target/TargetMachine.h"
@@ -275,6 +276,10 @@ cl::opt<bool> EnableHomogeneousPrologEpilog(
275276// Stack hazard padding size. 0 = disabled.
276277static cl::opt<unsigned > StackHazardSize (" aarch64-stack-hazard-size" ,
277278 cl::init (0 ), cl::Hidden);
279+ // Stack hazard size for analysis remarks. StackHazardSize takes precedence.
280+ static cl::opt<unsigned >
281+ StackHazardRemarkSize (" aarch64-stack-hazard-remark-size" , cl::init(0 ),
282+ cl::Hidden);
278283// Whether to insert padding into non-streaming functions (for testing).
279284static cl::opt<bool >
280285 StackHazardInNonStreaming (" aarch64-stack-hazard-in-non-streaming" ,
@@ -2616,9 +2621,16 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
26162621 const auto &MFI = MF.getFrameInfo ();
26172622
26182623 int64_t ObjectOffset = MFI.getObjectOffset (FI);
2624+ StackOffset SVEStackSize = getSVEStackSize (MF);
2625+
2626+ // For VLA-area objects, just emit an offset at the end of the stack frame.
2627+ // Whilst not quite correct, these objects do live at the end of the frame and
2628+ // so it is more useful for analysis for the offset to reflect this.
2629+ if (MFI.isVariableSizedObjectIndex (FI)) {
2630+ return StackOffset::getFixed (-((int64_t )MFI.getStackSize ())) - SVEStackSize;
2631+ }
26192632
26202633 // This is correct in the absence of any SVE stack objects.
2621- StackOffset SVEStackSize = getSVEStackSize (MF);
26222634 if (!SVEStackSize)
26232635 return StackOffset::getFixed (ObjectOffset - getOffsetOfLocalArea ());
26242636
@@ -3529,13 +3541,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
35293541 return true ;
35303542}
35313543
3532- // Return the FrameID for a Load/Store instruction by looking at the MMO.
3533- static std::optional<int > getLdStFrameID (const MachineInstr &MI,
3534- const MachineFrameInfo &MFI) {
3535- if (!MI.mayLoadOrStore () || MI.getNumMemOperands () < 1 )
3536- return std::nullopt ;
3537-
3538- MachineMemOperand *MMO = *MI.memoperands_begin ();
3544+ // Return the FrameID for a MMO.
3545+ static std::optional<int > getMMOFrameID (MachineMemOperand *MMO,
3546+ const MachineFrameInfo &MFI) {
35393547 auto *PSV =
35403548 dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->getPseudoValue ());
35413549 if (PSV)
@@ -3553,6 +3561,15 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
35533561 return std::nullopt ;
35543562}
35553563
3564+ // Return the FrameID for a Load/Store instruction by looking at the first MMO.
3565+ static std::optional<int > getLdStFrameID (const MachineInstr &MI,
3566+ const MachineFrameInfo &MFI) {
3567+ if (!MI.mayLoadOrStore () || MI.getNumMemOperands () < 1 )
3568+ return std::nullopt ;
3569+
3570+ return getMMOFrameID (*MI.memoperands_begin (), MFI);
3571+ }
3572+
35563573// Check if a Hazard slot is needed for the current function, and if so create
35573574// one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex,
35583575// which can be used to determine if any hazard padding is needed.
@@ -5030,3 +5047,174 @@ void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF,
50305047 MI->eraseFromParent ();
50315048 }
50325049}
5050+
5051+ struct StackAccess {
5052+ enum AccessType {
5053+ NotAccessed = 0 , // Stack object not accessed by load/store instructions.
5054+ GPR = 1 << 0 , // A general purpose register.
5055+ PPR = 1 << 1 , // A predicate register.
5056+ FPR = 1 << 2 , // A floating point/Neon/SVE register.
5057+ };
5058+
5059+ int Idx;
5060+ StackOffset Offset;
5061+ int64_t Size;
5062+ unsigned AccessTypes;
5063+
5064+ StackAccess () : Idx(0 ), Offset(), Size(0 ), AccessTypes(NotAccessed) {}
5065+
5066+ bool operator <(const StackAccess &Rhs) const {
5067+ return std::make_tuple (start (), Idx) <
5068+ std::make_tuple (Rhs.start (), Rhs.Idx );
5069+ }
5070+
5071+ bool isCPU () const {
5072+ // Predicate register load and store instructions execute on the CPU.
5073+ return AccessTypes & (AccessType::GPR | AccessType::PPR);
5074+ }
5075+ bool isSME () const { return AccessTypes & AccessType::FPR; }
5076+ bool isMixed () const { return isCPU () && isSME (); }
5077+
5078+ int64_t start () const { return Offset.getFixed () + Offset.getScalable (); }
5079+ int64_t end () const { return start () + Size; }
5080+
5081+ std::string getTypeString () const {
5082+ switch (AccessTypes) {
5083+ case AccessType::FPR:
5084+ return " FPR" ;
5085+ case AccessType::PPR:
5086+ return " PPR" ;
5087+ case AccessType::GPR:
5088+ return " GPR" ;
5089+ case AccessType::NotAccessed:
5090+ return " NA" ;
5091+ default :
5092+ return " Mixed" ;
5093+ }
5094+ }
5095+
5096+ void print (raw_ostream &OS) const {
5097+ OS << getTypeString () << " stack object at [SP"
5098+ << (Offset.getFixed () < 0 ? " " : " +" ) << Offset.getFixed ();
5099+ if (Offset.getScalable ())
5100+ OS << (Offset.getScalable () < 0 ? " " : " +" ) << Offset.getScalable ()
5101+ << " * vscale" ;
5102+ OS << " ]" ;
5103+ }
5104+ };
5105+
5106+ static inline raw_ostream &operator <<(raw_ostream &OS, const StackAccess &SA) {
5107+ SA.print (OS);
5108+ return OS;
5109+ }
5110+
5111+ void AArch64FrameLowering::emitRemarks (
5112+ const MachineFunction &MF, MachineOptimizationRemarkEmitter *ORE) const {
5113+
5114+ SMEAttrs Attrs (MF.getFunction ());
5115+ if (Attrs.hasNonStreamingInterfaceAndBody ())
5116+ return ;
5117+
5118+ const uint64_t HazardSize =
5119+ (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize;
5120+
5121+ if (HazardSize == 0 )
5122+ return ;
5123+
5124+ const MachineFrameInfo &MFI = MF.getFrameInfo ();
5125+ // Bail if function has no stack objects.
5126+ if (!MFI.hasStackObjects ())
5127+ return ;
5128+
5129+ std::vector<StackAccess> StackAccesses (MFI.getNumObjects ());
5130+
5131+ size_t NumFPLdSt = 0 ;
5132+ size_t NumNonFPLdSt = 0 ;
5133+
5134+ // Collect stack accesses via Load/Store instructions.
5135+ for (const MachineBasicBlock &MBB : MF) {
5136+ for (const MachineInstr &MI : MBB) {
5137+ if (!MI.mayLoadOrStore () || MI.getNumMemOperands () < 1 )
5138+ continue ;
5139+ for (MachineMemOperand *MMO : MI.memoperands ()) {
5140+ std::optional<int > FI = getMMOFrameID (MMO, MFI);
5141+ if (FI && !MFI.isDeadObjectIndex (*FI)) {
5142+ int FrameIdx = *FI;
5143+
5144+ size_t ArrIdx = FrameIdx + MFI.getNumFixedObjects ();
5145+ if (StackAccesses[ArrIdx].AccessTypes == StackAccess::NotAccessed) {
5146+ StackAccesses[ArrIdx].Idx = FrameIdx;
5147+ StackAccesses[ArrIdx].Offset =
5148+ getFrameIndexReferenceFromSP (MF, FrameIdx);
5149+ StackAccesses[ArrIdx].Size = MFI.getObjectSize (FrameIdx);
5150+ }
5151+
5152+ unsigned RegTy = StackAccess::AccessType::GPR;
5153+ if (MFI.getStackID (FrameIdx) == TargetStackID::ScalableVector) {
5154+ if (AArch64::PPRRegClass.contains (MI.getOperand (0 ).getReg ()))
5155+ RegTy = StackAccess::PPR;
5156+ else
5157+ RegTy = StackAccess::FPR;
5158+ } else if (AArch64InstrInfo::isFpOrNEON (MI)) {
5159+ RegTy = StackAccess::FPR;
5160+ }
5161+
5162+ StackAccesses[ArrIdx].AccessTypes |= RegTy;
5163+
5164+ if (RegTy == StackAccess::FPR)
5165+ ++NumFPLdSt;
5166+ else
5167+ ++NumNonFPLdSt;
5168+ }
5169+ }
5170+ }
5171+ }
5172+
5173+ if (NumFPLdSt == 0 || NumNonFPLdSt == 0 )
5174+ return ;
5175+
5176+ llvm::sort (StackAccesses);
5177+ StackAccesses.erase (llvm::remove_if (StackAccesses,
5178+ [](const StackAccess &S) {
5179+ return S.AccessTypes ==
5180+ StackAccess::NotAccessed;
5181+ }),
5182+ StackAccesses.end ());
5183+
5184+ SmallVector<const StackAccess *> MixedObjects;
5185+ SmallVector<std::pair<const StackAccess *, const StackAccess *>> HazardPairs;
5186+
5187+ if (StackAccesses.front ().isMixed ())
5188+ MixedObjects.push_back (&StackAccesses.front ());
5189+
5190+ for (auto It = StackAccesses.begin (), End = std::prev (StackAccesses.end ());
5191+ It != End; ++It) {
5192+ const auto &First = *It;
5193+ const auto &Second = *(It + 1 );
5194+
5195+ if (Second.isMixed ())
5196+ MixedObjects.push_back (&Second);
5197+
5198+ if ((First.isSME () && Second.isCPU ()) ||
5199+ (First.isCPU () && Second.isSME ())) {
5200+ uint64_t Distance = static_cast <uint64_t >(Second.start () - First.end ());
5201+ if (Distance < HazardSize)
5202+ HazardPairs.emplace_back (&First, &Second);
5203+ }
5204+ }
5205+
5206+ auto EmitRemark = [&](llvm::StringRef Str) {
5207+ ORE->emit ([&]() {
5208+ auto R = MachineOptimizationRemarkAnalysis (
5209+ " sme" , " StackHazard" , MF.getFunction ().getSubprogram (), &MF.front ());
5210+ return R << formatv (" stack hazard in '{0}': " , MF.getName ()).str () << Str;
5211+ });
5212+ };
5213+
5214+ for (const auto &P : HazardPairs)
5215+ EmitRemark (formatv (" {0} is too close to {1}" , *P.first , *P.second ).str ());
5216+
5217+ for (const auto *Obj : MixedObjects)
5218+ EmitRemark (
5219+ formatv (" {0} accessed by both GP and FP instructions" , *Obj).str ());
5220+ }
0 commit comments