Skip to content

Commit bd86d4a

Browse files
committed
[AArch64][SME] Support split ZPR and PPR area allocation
For a while we have supported the `-aarch64-stack-hazard-size=<size>` option, which adds "hazard padding" between GPRs and FPR/ZPRs. However, there is currently a hole in this mitigation as PPR and FPR/ZPR accesses to the same area also cause streaming memory hazards (this is noted by `-pass-remarks-analysis=sme -aarch64-stack-hazard-remark-size=<val>`), and the current stack layout places PPRs and ZPRs within the same area. Which looks like: ------------------------------------ Higher address | callee-saved gpr registers | |---------------------------------- | | lr,fp (a.k.a. "frame record") | |-----------------------------------| <- fp(=x29) | <hazard padding> | |-----------------------------------| | callee-saved fp/simd/SVE regs | |-----------------------------------| | SVE stack objects | |-----------------------------------| | local variables of fixed size | | <FPR> | | <hazard padding> | | <GPR> | ------------------------------------| <- sp | Lower address With this patch the stack (and hazard padding) is rearranged so that hazard padding is placed between the PPRs and ZPRs rather than within the (fixed size) callee-save region. Which looks something like this: ------------------------------------ Higher address | callee-saved gpr registers | |---------------------------------- | | lr,fp (a.k.a. "frame record") | |-----------------------------------| <- fp(=x29) | callee-saved PPRs | | PPR stack objects | (These are SVE predicates) |-----------------------------------| | <hazard padding> | |-----------------------------------| | callee-saved ZPR regs | (These are SVE vectors) | ZPR stack objects | Note: FPRs are promoted to ZPRs |-----------------------------------| | local variables of fixed size | | <FPR> | | <hazard padding> | | <GPR> | ------------------------------------| <- sp | Lower address This layout is only enabled if: * SplitSVEObjects are enabled (`-aarch64-split-sve-objects`) - (This may be enabled by default in a later patch) * Streaming memory hazards are present - (`-aarch64-stack-hazard-size=<val>` != 0) * PPRs and FPRs/ZPRs are on the stack * There's no stack realignment or variable-sized objects - This is left as a TODO for now Additionally, any FPR callee-saves that are present will be promoted to ZPRs. This is to prevent stack hazards between FPRs and GRPs in the fixed size callee-save area (which would otherwise require more hazard padding, or moving the FPR callee-saves). This layout should resolve the hole in the hazard padding mitigation, and is not intended change codegen for non-SME code.
1 parent 42af819 commit bd86d4a

File tree

8 files changed

+2264
-516
lines changed

8 files changed

+2264
-516
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 429 additions & 177 deletions
Large diffs are not rendered by default.

llvm/lib/Target/AArch64/AArch64FrameLowering.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ class AArch64FrameLowering : public TargetFrameLowering {
5757
StackOffset resolveFrameOffsetReference(const MachineFunction &MF,
5858
int64_t ObjectOffset, bool isFixed,
5959
bool isSVE, Register &FrameReg,
60-
bool PreferFP, bool ForSimm) const;
60+
bool PreferFP, bool ForSimm,
61+
int64_t FI = -1) const;
6162
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
6263
MachineBasicBlock::iterator MI,
6364
ArrayRef<CalleeSavedInfo> CSI,

llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
136136
uint64_t StackSizeZPR = 0;
137137
uint64_t StackSizePPR = 0;
138138

139+
/// Are SVE objects (vectors and predicates) split into separate regions on
140+
/// the stack.
141+
bool SplitSVEObjects = false;
142+
139143
/// HasCalculatedStackSizeSVE indicates whether StackSizeZPR/PPR is valid.
140144
bool HasCalculatedStackSizeSVE = false;
141145

@@ -310,6 +314,9 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
310314
uint64_t getStackSizeZPR() const { return StackSizeZPR; }
311315
uint64_t getStackSizePPR() const { return StackSizePPR; }
312316

317+
bool hasSplitSVEObjects() const { return SplitSVEObjects; }
318+
void setSplitSVEObjects(bool s) { SplitSVEObjects = s; }
319+
313320
bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; }
314321

315322
bool hasStackFrame() const { return HasStackFrame; }

llvm/test/CodeGen/AArch64/framelayout-split-sve.mir

Lines changed: 526 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
22
# RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-zpr-predicate-spills -run-pass=greedy %s -o - | FileCheck %s
33
# RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-zpr-predicate-spills -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=EXPAND
4+
45
--- |
56
source_filename = "<stdin>"
67
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -983,26 +984,22 @@ body: |
983984
; EXPAND-LABEL: name: zpr_predicate_spill_p4_saved
984985
; EXPAND: liveins: $p0, $p1, $p2, $p3, $fp, $p8, $p4
985986
; EXPAND-NEXT: {{ $}}
986-
; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0
987-
; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.3)
987+
; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
988988
; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg
989989
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0
990-
; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.2)
990+
; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.1)
991991
; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0
992-
; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.1)
993-
; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0
992+
; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.0)
994993
;
995994
; EXPAND-NEXT: $p8 = IMPLICIT_DEF
996995
;
997-
; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0
998-
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.2)
996+
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.1)
999997
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
1000998
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
1001-
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.1)
999+
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.0)
10021000
; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
10031001
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg
1004-
; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.3)
1005-
; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
1002+
; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
10061003
; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $p1, implicit $p2, implicit $p3
10071004
10081005
; If we spill a register above p8, p4 must also be saved, so we can guarantee

llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll

Lines changed: 751 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AArch64/stack-hazard.ll

Lines changed: 542 additions & 326 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -595,5 +595,3 @@ entry:
595595
ret i32 %x
596596
}
597597
declare void @other()
598-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
599-
; CHECK-FRAMELAYOUT: {{.*}}

0 commit comments

Comments
 (0)