Skip to content

[win][aarch64] Place catch objects in the fixed object area #147421

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 53 additions & 15 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -435,19 +435,40 @@ AArch64FrameLowering::getStackIDForScalableVectors() const {
static unsigned getFixedObjectSize(const MachineFunction &MF,
const AArch64FunctionInfo *AFI, bool IsWin64,
bool IsFunclet) {
assert(AFI->getTailCallReservedStack() % 16 == 0 &&
"Tail call reserved stack must be aligned to 16 bytes");
if (!IsWin64 || IsFunclet) {
return AFI->getTailCallReservedStack();
} else {
if (AFI->getTailCallReservedStack() != 0 &&
!MF.getFunction().getAttributes().hasAttrSomewhere(
Attribute::SwiftAsync))
report_fatal_error("cannot generate ABI-changing tail call for Win64");
unsigned FixedObjectSize = AFI->getTailCallReservedStack();

// Var args are stored here in the primary function.
const unsigned VarArgsArea = AFI->getVarArgsGPRSize();
// To support EH funclets we allocate an UnwindHelp object
const unsigned UnwindHelpObject = (MF.hasEHFunclets() ? 8 : 0);
return AFI->getTailCallReservedStack() +
alignTo(VarArgsArea + UnwindHelpObject, 16);
FixedObjectSize += AFI->getVarArgsGPRSize();

if (MF.hasEHFunclets()) {
// Catch objects are stored here in the primary function.
const MachineFrameInfo &MFI = MF.getFrameInfo();
const WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
SmallSetVector<int, 8> CatchObjFrameIndices;
for (const WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
for (const WinEHHandlerType &H : TBME.HandlerArray) {
int FrameIndex = H.CatchObj.FrameIndex;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like it can allocate space for the same FrameIndex multiple times? I think a given FrameIndex can show up multple times in the TryBlockMap. (See FunctionLoweringInfo::set)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch: yes, if the catchpads reuse the same alloca then we'd allocate too many fixed objects. I've fixed it for AArch64 and added a test, I'll create a followup PR to fix x64.

I couldn't figure out a way to make Clang generate this IR, so that could be a potential optimization (i.e., only emit one alloca for all by-reference and by-pointer catch blocks).

if ((FrameIndex != INT_MAX) &&
CatchObjFrameIndices.insert(FrameIndex)) {
FixedObjectSize = alignTo(FixedObjectSize,
MFI.getObjectAlign(FrameIndex).value()) +
MFI.getObjectSize(FrameIndex);
}
}
}
// To support EH funclets we allocate an UnwindHelp object
FixedObjectSize += 8;
}
return alignTo(FixedObjectSize, 16);
}
}

Expand Down Expand Up @@ -4641,30 +4662,47 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
// anything.
if (!MF.hasEHFunclets())
return;
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();

MachineBasicBlock &MBB = MF.front();
auto MBBI = MBB.begin();
while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
++MBBI;
// Win64 C++ EH needs to allocate space for the catch objects in the fixed
// object area right next to the UnwindHelp object.
WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
int64_t CurrentOffset =
AFI->getVarArgsGPRSize() + AFI->getTailCallReservedStack();
for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
for (WinEHHandlerType &H : TBME.HandlerArray) {
int FrameIndex = H.CatchObj.FrameIndex;
if ((FrameIndex != INT_MAX) && MFI.getObjectOffset(FrameIndex) == 0) {
CurrentOffset =
alignTo(CurrentOffset, MFI.getObjectAlign(FrameIndex).value());
CurrentOffset += MFI.getObjectSize(FrameIndex);
MFI.setObjectOffset(FrameIndex, -CurrentOffset);
}
}
}

// Create an UnwindHelp object.
// The UnwindHelp object is allocated at the start of the fixed object area
int64_t FixedObject =
getFixedObjectSize(MF, AFI, /*IsWin64*/ true, /*IsFunclet*/ false);
int UnwindHelpFI = MFI.CreateFixedObject(/*Size*/ 8,
/*SPOffset*/ -FixedObject,
int64_t UnwindHelpOffset = alignTo(CurrentOffset + 8, Align(16));
assert(UnwindHelpOffset == getFixedObjectSize(MF, AFI, /*IsWin64*/ true,
/*IsFunclet*/ false) &&
"UnwindHelpOffset must be at the start of the fixed object area");
int UnwindHelpFI = MFI.CreateFixedObject(/*Size*/ 8, -UnwindHelpOffset,
/*IsImmutable=*/false);
EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;

MachineBasicBlock &MBB = MF.front();
auto MBBI = MBB.begin();
while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
++MBBI;

// We need to store -2 into the UnwindHelp object at the start of the
// function.
DebugLoc DL;
RS->enterBasicBlockEnd(MBB);
RS->backward(MBBI);
Register DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
assert(DstReg && "There must be a free register after frame setup");
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
.addReg(DstReg, getKillRegState(true))
Expand Down
4 changes: 1 addition & 3 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28513,9 +28513,7 @@ void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
}

// Unlike X86, we let frame lowering assign offsets to all catch objects.
bool AArch64TargetLowering::needsFixedCatchObjects() const {
return false;
}
bool AArch64TargetLowering::needsFixedCatchObjects() const { return true; }

bool AArch64TargetLowering::shouldLocalize(
const MachineInstr &MI, const TargetTransformInfo *TTI) const {
Expand Down
145 changes: 145 additions & 0 deletions llvm/test/CodeGen/AArch64/wineh-aligned-stack-obj.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
; RUN: llc %s --mtriple=aarch64-pc-windows-msvc -o - | FileCheck %s

; Regression test for handling MSVC C++ exceptions when there's an aligned
; object on the stack.

; Generated from this C++ code:
; https://godbolt.org/z/cGzGfqq34
; > clang --target=aarch64-pc-windows-msvc test.cpp
; ```
; // Large object: alignment seems to be important?
; struct alignas(128) BigObj {
; int value;
; // Destructor so it's kept alive.
; ~BigObj() { }
; };
;
; // Exception type need to be large enough to not fit in a register.
; struct Error {
; int value;
; int padding[3];
; };
;
; int main() {
; BigObj bo{};
;
; try {
; throw Error { 42, {0, 0, 0} };
; } catch (const Error& e) {
; return e.value;
; }
; return 0;
; }
; ```

; CHECK-LABEL: main:
; CHECK: sub x[[SPTMP:[0-9]+]], sp, #336
; CHECK: and sp, x[[SPTMP]], #0xffffffffffffff80
; CHECK: mov x[[FP:[0-9]+]], sp
; CHECK: str wzr, [x[[FP]], #332]

; CHECK-LABEL: "?catch$3@?0?main@4HA":
; CHECK: str w8, [x[[FP]], #332]
; CHECK-NEXT: .seh_startepilogue
; CHECK: ret

; CHECK-LABEL: $cppxdata$main:
; CHECK: .word -16 // UnwindHelp
; CHECK-LABEL: $handlerMap$0$main:
; CHECK-NEXT: .word 8 // Adjectives
; CHECK-NEXT: .word "??_R0?AUError@@@8"@IMGREL // Type
; CHECK-NEXT: .word -8 // CatchObjOffset
; CHECK-NEXT: .word "?catch$3@?0?main@4HA"@IMGREL // Handler

%rtti.TypeDescriptor11 = type { ptr, ptr, [12 x i8] }
%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
%eh.ThrowInfo = type { i32, i32, i32, i32 }
%struct.BigObj = type { i32, [124 x i8] }
%struct.Error = type { i32, [3 x i32] }

$"??1BigObj@@QEAA@XZ" = comdat any

$"??_R0?AUError@@@8" = comdat any

$"_CT??_R0?AUError@@@816" = comdat any

$"_CTA1?AUError@@" = comdat any

$"_TI1?AUError@@" = comdat any

@"??_7type_info@@6B@" = external constant ptr
@"??_R0?AUError@@@8" = linkonce_odr global %rtti.TypeDescriptor11 { ptr @"??_7type_info@@6B@", ptr null, [12 x i8] c".?AUError@@\00" }, comdat
@__ImageBase = external dso_local constant i8
@"_CT??_R0?AUError@@@816" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (ptr @"??_R0?AUError@@@8" to i64), i64 ptrtoint (ptr @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 16, i32 0 }, section ".xdata", comdat
@"_CTA1?AUError@@" = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (ptr @"_CT??_R0?AUError@@@816" to i64), i64 ptrtoint (ptr @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
@"_TI1?AUError@@" = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (ptr @"_CTA1?AUError@@" to i64), i64 ptrtoint (ptr @__ImageBase to i64)) to i32) }, section ".xdata", comdat

define dso_local noundef i32 @main() personality ptr @__CxxFrameHandler3 {
entry:
%retval = alloca i32, align 4
%bo = alloca %struct.BigObj, align 128
%tmp = alloca %struct.Error, align 4
%e = alloca ptr, align 8
%cleanup.dest.slot = alloca i32, align 4
store i32 0, ptr %retval, align 4
call void @llvm.memset.p0.i64(ptr align 128 %bo, i8 0, i64 128, i1 false)
%value = getelementptr inbounds nuw %struct.BigObj, ptr %bo, i32 0, i32 0
%value1 = getelementptr inbounds nuw %struct.Error, ptr %tmp, i32 0, i32 0
store i32 42, ptr %value1, align 4
%padding = getelementptr inbounds nuw %struct.Error, ptr %tmp, i32 0, i32 1
store i32 0, ptr %padding, align 4
%arrayinit.element = getelementptr inbounds i32, ptr %padding, i64 1
store i32 0, ptr %arrayinit.element, align 4
%arrayinit.element2 = getelementptr inbounds i32, ptr %padding, i64 2
store i32 0, ptr %arrayinit.element2, align 4
invoke void @_CxxThrowException(ptr %tmp, ptr @"_TI1?AUError@@") #3
to label %unreachable unwind label %catch.dispatch

catch.dispatch:
%0 = catchswitch within none [label %catch] unwind label %ehcleanup

catch:
%1 = catchpad within %0 [ptr @"??_R0?AUError@@@8", i32 8, ptr %e]
%2 = load ptr, ptr %e, align 8
%value3 = getelementptr inbounds nuw %struct.Error, ptr %2, i32 0, i32 0
%3 = load i32, ptr %value3, align 4
store i32 %3, ptr %retval, align 4
store i32 1, ptr %cleanup.dest.slot, align 4
catchret from %1 to label %catchret.dest

catchret.dest:
br label %cleanup

try.cont:
store i32 0, ptr %retval, align 4
store i32 1, ptr %cleanup.dest.slot, align 4
br label %cleanup

cleanup:
call void @"??1BigObj@@QEAA@XZ"(ptr noundef nonnull align 128 dereferenceable(4) %bo) #4
%4 = load i32, ptr %retval, align 4
ret i32 %4

ehcleanup:
%5 = cleanuppad within none []
call void @"??1BigObj@@QEAA@XZ"(ptr noundef nonnull align 128 dereferenceable(4) %bo) [ "funclet"(token %5) ]
cleanupret from %5 unwind to caller

unreachable:
unreachable
}

declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg) #1

declare dso_local void @_CxxThrowException(ptr, ptr)

declare dso_local i32 @__CxxFrameHandler3(...)

define linkonce_odr dso_local void @"??1BigObj@@QEAA@XZ"(ptr noundef nonnull align 128 dereferenceable(4) %this) unnamed_addr comdat {
entry:
%this.addr = alloca ptr, align 8
store ptr %this, ptr %this.addr, align 8
%this1 = load ptr, ptr %this.addr, align 8
ret void
}
100 changes: 100 additions & 0 deletions llvm/test/CodeGen/AArch64/wineh-reuse-catch-alloca.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
; RUN: llc %s --mtriple=aarch64-pc-windows-msvc -o - | FileCheck %s

; Tests the fixed object layouts when two catchpads re-use the same stack
; allocation for this catch objects.

; Generated from this C++ code, with modifications to the IR (see comments in
; IR):
; https://godbolt.org/z/9qv5Yn68j
; > clang --target=aarch64-pc-windows-msvc test.cpp
; ```
; extern "C" void boom();
; extern "C" int calls_boom();
; {
; try { boom(); }
; catch (int& i) { return i; }
; catch (long& l) { return l; }
; return 0;
; }
; ```

; Only need 48 bytes on the stack, not 64.
; CHECK-LABEL: calls_boom:
; CHECK: sub sp, sp, #48
; CHECK: .seh_stackalloc 48

; Both the catch blocks load from the same address.
; CHECK-LABEL: "?catch$3@?0?calls_boom@4HA":
; CHECK: ldr x8, [x29, #24]
; CHECK-LABEL: "?catch$4@?0?calls_boom@4HA":
; CHECK: ldr x8, [x29, #24]

; There's enough space for the UnwindHelp to be at -16 instead of -32
; CHECK-LABEL: $cppxdata$calls_boom:
; CHECK: .word -16 // UnwindHelp

; Both catches have the same object offset.
; CHECK-LABEL: $handlerMap$0$calls_boom:
; CHECK: .word -8 // CatchObjOffset
; CHECK-NEXT: .word "?catch$3@?0?calls_boom@4HA"@IMGREL // Handler
; CHECK: .word -8 // CatchObjOffset
; CHECK-NEXT: .word "?catch$4@?0?calls_boom@4HA"@IMGREL // Handler

%rtti.TypeDescriptor2 = type { ptr, ptr, [3 x i8] }

$"??_R0H@8" = comdat any

$"??_R0J@8" = comdat any

@"??_7type_info@@6B@" = external constant ptr
@"??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { ptr @"??_7type_info@@6B@", ptr null, [3 x i8] c".H\00" }, comdat
@"??_R0J@8" = linkonce_odr global %rtti.TypeDescriptor2 { ptr @"??_7type_info@@6B@", ptr null, [3 x i8] c".J\00" }, comdat

define dso_local i32 @calls_boom() personality ptr @__CxxFrameHandler3 {
entry:
%retval = alloca i32, align 4
; MODIFICATION: Remove unusued alloca
; %l = alloca ptr, align 8
%i = alloca ptr, align 8
invoke void @boom()
to label %invoke.cont unwind label %catch.dispatch

catch.dispatch:
%0 = catchswitch within none [label %catch1, label %catch] unwind to caller

catch1:
%1 = catchpad within %0 [ptr @"??_R0H@8", i32 8, ptr %i]
%2 = load ptr, ptr %i, align 8
%3 = load i32, ptr %2, align 4
store i32 %3, ptr %retval, align 4
catchret from %1 to label %catchret.dest2

catch:
; MODIFICATION: Use %i instead of %l
%4 = catchpad within %0 [ptr @"??_R0J@8", i32 8, ptr %i]
%5 = load ptr, ptr %i, align 8
%6 = load i32, ptr %5, align 4
store i32 %6, ptr %retval, align 4
catchret from %4 to label %catchret.dest

invoke.cont:
br label %try.cont

catchret.dest:
br label %return

catchret.dest2:
br label %return

try.cont:
store i32 0, ptr %retval, align 4
br label %return

return:
%7 = load i32, ptr %retval, align 4
ret i32 %7
}

declare dso_local void @boom() #1

declare dso_local i32 @__CxxFrameHandler3(...)
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/wineh-try-catch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
; CHECK: str x28, [sp, #24]
; CHECK: stp x29, x30, [sp, #32]
; CHECK: add x29, sp, #32
; CHECK: sub sp, sp, #624
; CHECK: sub sp, sp, #608
; CHECK: mov x19, sp
; CHECK: mov x0, #-2
; CHECK: stur x0, [x29, #16]
Expand Down Expand Up @@ -51,7 +51,7 @@
; CHECK: str x21, [sp, #16]
; CHECK: str x28, [sp, #24]
; CHECK: stp x29, x30, [sp, #32]
; CHECK: add x20, x19, #12
; CHECK: add x20, x19, #0

; Check that there are no further stack updates.
; CHECK-NOT: sub sp, sp
Expand Down