From f314e12494ed73290983a7db8e15c21578f437b3 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Tue, 15 Oct 2024 11:56:40 +0100 Subject: [PATCH] [AArch64][SME] Fix iterator to fixupCalleeSaveRestoreStackOffset (#110855) The iterator passed to `fixupCalleeSaveRestoreStackOffset` may be incorrect when it tries to skip over the instructions that get the current value of 'vg', when there is a 'rdsvl' instruction straight after the prologue. That's because it doesn't check that the instruction is still a 'frame-setup' instruction. --- .../Target/AArch64/AArch64FrameLowering.cpp | 9 ++--- llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll | 38 +++++++++++++++++++ 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index be33331be4e8ff..1b8eac7fac21f7 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1953,12 +1953,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // pointer bump above. while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) && !IsSVECalleeSave(MBBI)) { - // Move past instructions generated to calculate VG - if (requiresSaveVG(MF)) - while (isVGInstruction(MBBI)) - ++MBBI; - - if (CombineSPBump) + if (CombineSPBump && + // Only fix-up frame-setup load/store instructions. + (!requiresSaveVG(MF) || !isVGInstruction(MBBI))) fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI); ++MBBI; diff --git a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll index 8724e7c1c368d9..5adeef3ab74552 100644 --- a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll +++ b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll @@ -1066,6 +1066,44 @@ define void @streaming_compatible_no_sve(i32 noundef %x) #4 { ret void } +; The algorithm that fixes up the offsets of the callee-save/restore +; instructions must jump over the instructions that instantiate the current +; 'VG' value. We must make sure that it doesn't consider any RDSVL in +; user-code as if it is part of the frame-setup when doing so. +define void @test_rdsvl_right_after_prologue(i64 %x0) nounwind { +; NO-SVE-CHECK-LABEL: test_rdsvl_right_after_prologue: +; NO-SVE-CHECK: // %bb.0: +; NO-SVE-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: mov x9, x0 +; NO-SVE-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: bl __arm_get_current_vg +; NO-SVE-CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill +; NO-SVE-CHECK-NEXT: mov x0, x9 +; NO-SVE-CHECK-NEXT: rdsvl x8, #1 +; NO-SVE-CHECK-NEXT: add x29, sp, #64 +; NO-SVE-CHECK-NEXT: lsr x8, x8, #3 +; NO-SVE-CHECK-NEXT: mov x1, x0 +; NO-SVE-CHECK-NEXT: smstart sm +; NO-SVE-CHECK-NEXT: mov x0, x8 +; NO-SVE-CHECK-NEXT: bl bar +; NO-SVE-CHECK-NEXT: smstop sm +; NO-SVE-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ret + %some_alloc = alloca i64, align 8 + %rdsvl = tail call i64 @llvm.aarch64.sme.cntsd() + call void @bar(i64 %rdsvl, i64 %x0) "aarch64_pstate_sm_enabled" + ret void +} + +declare void @bar(i64, i64) + ; Ensure we still emit async unwind information with -fno-asynchronous-unwind-tables ; if the function contains a streaming-mode change.