Skip to content

Commit 2a92ffd

Browse files
committed
[AArch64]Fix invalid use of ld1/st1 in stack alloc
This patch fixes invalid usage of scalar+immediate variant of ld1/st1 instructions during stack allocation caused by c4bac7f. This commit used ld1/st1 even when stack offset was outside of immediate range for this instruction, producing invalid assembly.
1 parent 7efa068 commit 2a92ffd

File tree

4 files changed

+1020
-607
lines changed

4 files changed

+1020
-607
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3019,6 +3019,7 @@ static void computeCalleeSaveRegisterPairs(
30193019
ByteOffset += StackFillDir * StackHazardSize;
30203020
LastReg = RPI.Reg1;
30213021

3022+
int Scale = RPI.getScale();
30223023
// Add the next reg to the pair if it is in the same register class.
30233024
if (unsigned(i + RegInc) < Count && !AFI->hasStackHazardSlotIndex()) {
30243025
Register NextReg = CSI[i + RegInc].getReg();
@@ -3044,9 +3045,14 @@ static void computeCalleeSaveRegisterPairs(
30443045
case RegPairInfo::PPR:
30453046
break;
30463047
case RegPairInfo::ZPR:
3047-
if (AFI->getPredicateRegForFillSpill() != 0)
3048-
if (((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1))
3048+
if (AFI->getPredicateRegForFillSpill() != 0 &&
3049+
((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1)) {
3050+
// Calculate offset of register pair to see if pair instruction can be
3051+
// used.
3052+
int Offset = (ScalableByteOffset + StackFillDir * 2 * Scale) / Scale;
3053+
if ((-17 < Offset && Offset < 15) && (Offset % 2 == 0))
30493054
RPI.Reg2 = NextReg;
3055+
}
30503056
break;
30513057
case RegPairInfo::VG:
30523058
break;
@@ -3086,7 +3092,6 @@ static void computeCalleeSaveRegisterPairs(
30863092
if (NeedsWinCFI &&
30873093
RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair
30883094
RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
3089-
int Scale = RPI.getScale();
30903095

30913096
int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
30923097
assert(OffsetPre % Scale == 0);
@@ -3355,8 +3360,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
33553360
MachineMemOperand::MOStore, Size, Alignment));
33563361
MIB.addReg(PnReg);
33573362
MIB.addReg(AArch64::SP)
3358-
.addImm(RPI.Offset) // [sp, #offset*scale],
3359-
// where factor*scale is implicit
3363+
.addImm(RPI.Offset / 2) // [sp, #offset*2*scale],
3364+
// where scale is implicit
33603365
.setMIFlag(MachineInstr::FrameSetup);
33613366
MIB.addMemOperand(MF.getMachineMemOperand(
33623367
MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
@@ -3522,8 +3527,8 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
35223527
MachineMemOperand::MOLoad, Size, Alignment));
35233528
MIB.addReg(PnReg);
35243529
MIB.addReg(AArch64::SP)
3525-
.addImm(RPI.Offset) // [sp, #offset*scale]
3526-
// where factor*scale is implicit
3530+
.addImm(RPI.Offset / 2) // [sp, #offset*2*scale]
3531+
// where scale is implicit
35273532
.setMIFlag(MachineInstr::FrameDestroy);
35283533
MIB.addMemOperand(MF.getMachineMemOperand(
35293534
MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),

llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll

Lines changed: 36 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -332,24 +332,25 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
332332
; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
333333
; CHECK-NEXT: ptrue pn8.b
334334
; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
335-
; CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
336-
; CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
335+
; CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
336+
; CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
337337
; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
338-
; CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
339-
; CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #16, mul vl] // 32-byte Folded Spill
338+
; CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
339+
; CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
340340
; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
341-
; CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #20, mul vl] // 32-byte Folded Spill
342-
; CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
341+
; CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
342+
; CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
343343
; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
344-
; CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #28, mul vl] // 32-byte Folded Spill
344+
; CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
345345
; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
346346
; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
347347
; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
348348
; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
349349
; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
350350
; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
351351
; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
352-
; CHECK-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
352+
; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
353+
; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
353354
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG
354355
; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG
355356
; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG
@@ -372,15 +373,16 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
372373
; CHECK-NEXT: addvl sp, sp, #1
373374
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG
374375
; CHECK-NEXT: ptrue pn8.b
376+
; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
377+
; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
378+
; CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
379+
; CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
380+
; CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
381+
; CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
382+
; CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
383+
; CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
384+
; CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
375385
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
376-
; CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
377-
; CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
378-
; CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
379-
; CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
380-
; CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
381-
; CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
382-
; CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
383-
; CHECK-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
384386
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
385387
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
386388
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -427,24 +429,25 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
427429
; FP-CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
428430
; FP-CHECK-NEXT: ptrue pn8.b
429431
; FP-CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
430-
; FP-CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
431-
; FP-CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
432+
; FP-CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
433+
; FP-CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
432434
; FP-CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
433-
; FP-CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
434-
; FP-CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #16, mul vl] // 32-byte Folded Spill
435+
; FP-CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
436+
; FP-CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
435437
; FP-CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
436-
; FP-CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #20, mul vl] // 32-byte Folded Spill
437-
; FP-CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
438+
; FP-CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
439+
; FP-CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
438440
; FP-CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
439-
; FP-CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #28, mul vl] // 32-byte Folded Spill
441+
; FP-CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
440442
; FP-CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
441443
; FP-CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
442444
; FP-CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
443445
; FP-CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
444446
; FP-CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
445447
; FP-CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
446448
; FP-CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
447-
; FP-CHECK-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
449+
; FP-CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
450+
; FP-CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
448451
; FP-CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG
449452
; FP-CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG
450453
; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG
@@ -465,15 +468,16 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
465468
; FP-CHECK-NEXT: .cfi_restore vg
466469
; FP-CHECK-NEXT: addvl sp, sp, #1
467470
; FP-CHECK-NEXT: ptrue pn8.b
471+
; FP-CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
472+
; FP-CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
473+
; FP-CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
474+
; FP-CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
475+
; FP-CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
476+
; FP-CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
477+
; FP-CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
478+
; FP-CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
479+
; FP-CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
468480
; FP-CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
469-
; FP-CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
470-
; FP-CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
471-
; FP-CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
472-
; FP-CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
473-
; FP-CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
474-
; FP-CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
475-
; FP-CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
476-
; FP-CHECK-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
477481
; FP-CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
478482
; FP-CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
479483
; FP-CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload

0 commit comments

Comments
 (0)