-
Notifications
You must be signed in to change notification settings - Fork 15.1k
release/21.x: [LoongArch] Strengthen stack size estimation for LSX/LASX extension (#146455) #149777
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@heiher What do you think about merging this PR to the release branch? |
|
@llvm/pr-subscribers-backend-loongarch Author: None (llvmbot) ChangesBackport 64a0478 Requested by: @tangaac Patch is 83.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149777.diff 16 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
index ac5e7f3891c72..1493bf4cba695 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
@@ -158,7 +158,12 @@ void LoongArchFrameLowering::processFunctionBeforeFrameFinalized(
// estimateStackSize has been observed to under-estimate the final stack
// size, so give ourselves wiggle-room by checking for stack size
// representable an 11-bit signed field rather than 12-bits.
- if (!isInt<11>(MFI.estimateStackSize(MF)))
+ // For [x]vstelm.{b/h/w/d} memory instructions with 8 imm offset, 7-bit
+ // signed field is fine.
+ unsigned EstimateStackSize = MFI.estimateStackSize(MF);
+ if (!isInt<11>(EstimateStackSize) ||
+ (MF.getSubtarget<LoongArchSubtarget>().hasExtLSX() &&
+ !isInt<7>(EstimateStackSize)))
ScavSlotsNum = std::max(ScavSlotsNum, 1u);
// For CFR spill.
diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-common.ll b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll
index d07e2914c753a..f7653af1fa9ba 100644
--- a/llvm/test/CodeGen/LoongArch/calling-conv-common.ll
+++ b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll
@@ -122,23 +122,23 @@ define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind {
define i64 @caller_large_scalars() nounwind {
; CHECK-LABEL: caller_large_scalars:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -80
-; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $zero, $sp, 24
+; CHECK-NEXT: addi.d $sp, $sp, -96
+; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $zero, $sp, 40
; CHECK-NEXT: vrepli.b $vr0, 0
-; CHECK-NEXT: vst $vr0, $sp, 8
+; CHECK-NEXT: vst $vr0, $sp, 24
; CHECK-NEXT: ori $a0, $zero, 2
-; CHECK-NEXT: st.d $a0, $sp, 0
-; CHECK-NEXT: st.d $zero, $sp, 56
-; CHECK-NEXT: vst $vr0, $sp, 40
+; CHECK-NEXT: st.d $a0, $sp, 16
+; CHECK-NEXT: st.d $zero, $sp, 72
+; CHECK-NEXT: vst $vr0, $sp, 56
; CHECK-NEXT: ori $a2, $zero, 1
-; CHECK-NEXT: addi.d $a0, $sp, 32
-; CHECK-NEXT: addi.d $a1, $sp, 0
-; CHECK-NEXT: st.d $a2, $sp, 32
+; CHECK-NEXT: addi.d $a0, $sp, 48
+; CHECK-NEXT: addi.d $a1, $sp, 16
+; CHECK-NEXT: st.d $a2, $sp, 48
; CHECK-NEXT: pcaddu18i $ra, %call36(callee_large_scalars)
; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 80
+; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 96
; CHECK-NEXT: ret
%1 = call i64 @callee_large_scalars(i256 1, i256 2)
ret i64 %1
@@ -177,20 +177,20 @@ define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d,
define i64 @caller_large_scalars_exhausted_regs() nounwind {
; CHECK-LABEL: caller_large_scalars_exhausted_regs:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $a0, $sp, 16
+; CHECK-NEXT: addi.d $sp, $sp, -112
+; CHECK-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill
+; CHECK-NEXT: addi.d $a0, $sp, 32
; CHECK-NEXT: st.d $a0, $sp, 8
; CHECK-NEXT: ori $a0, $zero, 9
; CHECK-NEXT: st.d $a0, $sp, 0
-; CHECK-NEXT: st.d $zero, $sp, 40
+; CHECK-NEXT: st.d $zero, $sp, 56
; CHECK-NEXT: vrepli.b $vr0, 0
-; CHECK-NEXT: vst $vr0, $sp, 24
+; CHECK-NEXT: vst $vr0, $sp, 40
; CHECK-NEXT: ori $a0, $zero, 10
-; CHECK-NEXT: st.d $a0, $sp, 16
-; CHECK-NEXT: st.d $zero, $sp, 72
+; CHECK-NEXT: st.d $a0, $sp, 32
+; CHECK-NEXT: st.d $zero, $sp, 88
; CHECK-NEXT: ori $a0, $zero, 8
-; CHECK-NEXT: st.d $a0, $sp, 48
+; CHECK-NEXT: st.d $a0, $sp, 64
; CHECK-NEXT: ori $a0, $zero, 1
; CHECK-NEXT: ori $a1, $zero, 2
; CHECK-NEXT: ori $a2, $zero, 3
@@ -198,12 +198,12 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind {
; CHECK-NEXT: ori $a4, $zero, 5
; CHECK-NEXT: ori $a5, $zero, 6
; CHECK-NEXT: ori $a6, $zero, 7
-; CHECK-NEXT: addi.d $a7, $sp, 48
-; CHECK-NEXT: vst $vr0, $sp, 56
+; CHECK-NEXT: addi.d $a7, $sp, 64
+; CHECK-NEXT: vst $vr0, $sp, 72
; CHECK-NEXT: pcaddu18i $ra, %call36(callee_large_scalars_exhausted_regs)
; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 112
; CHECK-NEXT: ret
%1 = call i64 @callee_large_scalars_exhausted_regs(
i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i256 8, i64 9,
diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-half.ll b/llvm/test/CodeGen/LoongArch/calling-conv-half.ll
index c88b67f13d1e7..da8c3e93f6842 100644
--- a/llvm/test/CodeGen/LoongArch/calling-conv-half.ll
+++ b/llvm/test/CodeGen/LoongArch/calling-conv-half.ll
@@ -1252,8 +1252,8 @@ define i32 @caller_half_on_stack() nounwind {
;
; LA64F-LP64S-LABEL: caller_half_on_stack:
; LA64F-LP64S: # %bb.0:
-; LA64F-LP64S-NEXT: addi.d $sp, $sp, -80
-; LA64F-LP64S-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
+; LA64F-LP64S-NEXT: addi.d $sp, $sp, -96
+; LA64F-LP64S-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
; LA64F-LP64S-NEXT: lu12i.w $a0, -12
; LA64F-LP64S-NEXT: ori $a1, $a0, 3200
; LA64F-LP64S-NEXT: lu32i.d $a1, 0
@@ -1292,8 +1292,8 @@ define i32 @caller_half_on_stack() nounwind {
; LA64F-LP64S-NEXT: st.w $t0, $sp, 0
; LA64F-LP64S-NEXT: pcaddu18i $ra, %call36(callee_half_on_stack)
; LA64F-LP64S-NEXT: jirl $ra, $ra, 0
-; LA64F-LP64S-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
-; LA64F-LP64S-NEXT: addi.d $sp, $sp, 80
+; LA64F-LP64S-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
+; LA64F-LP64S-NEXT: addi.d $sp, $sp, 96
; LA64F-LP64S-NEXT: ret
;
; LA64F-LP64D-LABEL: caller_half_on_stack:
@@ -1336,8 +1336,8 @@ define i32 @caller_half_on_stack() nounwind {
;
; LA64D-LP64S-LABEL: caller_half_on_stack:
; LA64D-LP64S: # %bb.0:
-; LA64D-LP64S-NEXT: addi.d $sp, $sp, -80
-; LA64D-LP64S-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
+; LA64D-LP64S-NEXT: addi.d $sp, $sp, -96
+; LA64D-LP64S-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
; LA64D-LP64S-NEXT: lu12i.w $a0, -12
; LA64D-LP64S-NEXT: ori $a1, $a0, 3200
; LA64D-LP64S-NEXT: lu32i.d $a1, 0
@@ -1376,8 +1376,8 @@ define i32 @caller_half_on_stack() nounwind {
; LA64D-LP64S-NEXT: st.w $t0, $sp, 0
; LA64D-LP64S-NEXT: pcaddu18i $ra, %call36(callee_half_on_stack)
; LA64D-LP64S-NEXT: jirl $ra, $ra, 0
-; LA64D-LP64S-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
-; LA64D-LP64S-NEXT: addi.d $sp, $sp, 80
+; LA64D-LP64S-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
+; LA64D-LP64S-NEXT: addi.d $sp, $sp, 96
; LA64D-LP64S-NEXT: ret
;
; LA64D-LP64D-LABEL: caller_half_on_stack:
diff --git a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll
index 52d8dd05aaa4c..1a9de3b0ef3d1 100644
--- a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll
+++ b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll
@@ -14,41 +14,41 @@
define dso_local noundef signext i32 @main() nounwind {
; CHECK-LABEL: main:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -272
-; CHECK-NEXT: st.d $ra, $sp, 264 # 8-byte Folded Spill
+; CHECK-NEXT: addi.d $sp, $sp, -288
+; CHECK-NEXT: st.d $ra, $sp, 280 # 8-byte Folded Spill
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
; CHECK-NEXT: xvld $xr0, $a0, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: xvst $xr0, $sp, 96 # 32-byte Folded Spill
+; CHECK-NEXT: xvst $xr0, $sp, 112 # 32-byte Folded Spill
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1)
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_1)
-; CHECK-NEXT: xvst $xr1, $sp, 64 # 32-byte Folded Spill
+; CHECK-NEXT: xvst $xr1, $sp, 80 # 32-byte Folded Spill
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_2)
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_2)
-; CHECK-NEXT: xvst $xr2, $sp, 32 # 32-byte Folded Spill
+; CHECK-NEXT: xvst $xr2, $sp, 48 # 32-byte Folded Spill
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_3)
; CHECK-NEXT: xvld $xr3, $a0, %pc_lo12(.LCPI0_3)
-; CHECK-NEXT: xvst $xr3, $sp, 0 # 32-byte Folded Spill
-; CHECK-NEXT: xvst $xr0, $sp, 136
-; CHECK-NEXT: xvst $xr1, $sp, 168
-; CHECK-NEXT: xvst $xr2, $sp, 200
-; CHECK-NEXT: xvst $xr3, $sp, 232
-; CHECK-NEXT: addi.d $a0, $sp, 136
+; CHECK-NEXT: xvst $xr3, $sp, 16 # 32-byte Folded Spill
+; CHECK-NEXT: xvst $xr0, $sp, 152
+; CHECK-NEXT: xvst $xr1, $sp, 184
+; CHECK-NEXT: xvst $xr2, $sp, 216
+; CHECK-NEXT: xvst $xr3, $sp, 248
+; CHECK-NEXT: addi.d $a0, $sp, 152
; CHECK-NEXT: pcaddu18i $ra, %call36(foo)
; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: xvld $xr0, $sp, 96 # 32-byte Folded Reload
-; CHECK-NEXT: xvst $xr0, $sp, 136
-; CHECK-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; CHECK-NEXT: xvst $xr0, $sp, 168
-; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
-; CHECK-NEXT: xvst $xr0, $sp, 200
-; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
-; CHECK-NEXT: xvst $xr0, $sp, 232
-; CHECK-NEXT: addi.d $a0, $sp, 136
+; CHECK-NEXT: xvld $xr0, $sp, 112 # 32-byte Folded Reload
+; CHECK-NEXT: xvst $xr0, $sp, 152
+; CHECK-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; CHECK-NEXT: xvst $xr0, $sp, 184
+; CHECK-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
+; CHECK-NEXT: xvst $xr0, $sp, 216
+; CHECK-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
+; CHECK-NEXT: xvst $xr0, $sp, 248
+; CHECK-NEXT: addi.d $a0, $sp, 152
; CHECK-NEXT: pcaddu18i $ra, %call36(bar)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: move $a0, $zero
-; CHECK-NEXT: ld.d $ra, $sp, 264 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 272
+; CHECK-NEXT: ld.d $ra, $sp, 280 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 288
; CHECK-NEXT: ret
entry:
%s = alloca %struct.S, align 2
diff --git a/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll b/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll
index ccc5c703e71ed..15ac95dfc6c55 100644
--- a/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll
+++ b/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll
@@ -28,12 +28,12 @@ define void @func() {
; CHECK-NEXT: ld.w $a3, $a1, 0
; CHECK-NEXT: ld.w $a2, $a1, 0
; CHECK-NEXT: ld.w $a0, $a1, 0
-; CHECK-NEXT: st.d $fp, $sp, 0
+; CHECK-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
; CHECK-NEXT: lu12i.w $fp, 1
; CHECK-NEXT: ori $fp, $fp, 12
; CHECK-NEXT: add.d $fp, $sp, $fp
; CHECK-NEXT: st.w $t8, $fp, 0
-; CHECK-NEXT: ld.d $fp, $sp, 0
+; CHECK-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
; CHECK-NEXT: st.w $t8, $a1, 0
; CHECK-NEXT: st.w $t7, $a1, 0
; CHECK-NEXT: st.w $t6, $a1, 0
diff --git a/llvm/test/CodeGen/LoongArch/frame.ll b/llvm/test/CodeGen/LoongArch/frame.ll
index 048703029d8c6..b29d8634854f3 100644
--- a/llvm/test/CodeGen/LoongArch/frame.ll
+++ b/llvm/test/CodeGen/LoongArch/frame.ll
@@ -1,5 +1,6 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc --mtriple=loongarch64 -mattr=+d < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 -mattr=+d,-lsx < %s | FileCheck %s --check-prefixes=CHECK,NOLSX
+; RUN: llc --mtriple=loongarch64 -mattr=+d,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LSX
%struct.key_t = type { i32, [16 x i8] }
@@ -7,20 +8,35 @@ declare void @llvm.memset.p0.i64(ptr, i8, i64, i1)
declare void @test1(ptr)
define i32 @test() nounwind {
-; CHECK-LABEL: test:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -32
-; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
-; CHECK-NEXT: st.w $zero, $sp, 16
-; CHECK-NEXT: vrepli.b $vr0, 0
-; CHECK-NEXT: vst $vr0, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 4
-; CHECK-NEXT: pcaddu18i $ra, %call36(test1)
-; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: move $a0, $zero
-; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 32
-; CHECK-NEXT: ret
+; NOLSX-LABEL: test:
+; NOLSX: # %bb.0:
+; NOLSX-NEXT: addi.d $sp, $sp, -32
+; NOLSX-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; NOLSX-NEXT: st.w $zero, $sp, 16
+; NOLSX-NEXT: st.d $zero, $sp, 8
+; NOLSX-NEXT: st.d $zero, $sp, 0
+; NOLSX-NEXT: addi.d $a0, $sp, 4
+; NOLSX-NEXT: pcaddu18i $ra, %call36(test1)
+; NOLSX-NEXT: jirl $ra, $ra, 0
+; NOLSX-NEXT: move $a0, $zero
+; NOLSX-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; NOLSX-NEXT: addi.d $sp, $sp, 32
+; NOLSX-NEXT: ret
+;
+; LSX-LABEL: test:
+; LSX: # %bb.0:
+; LSX-NEXT: addi.d $sp, $sp, -32
+; LSX-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LSX-NEXT: st.w $zero, $sp, 16
+; LSX-NEXT: vrepli.b $vr0, 0
+; LSX-NEXT: vst $vr0, $sp, 0
+; LSX-NEXT: addi.d $a0, $sp, 4
+; LSX-NEXT: pcaddu18i $ra, %call36(test1)
+; LSX-NEXT: jirl $ra, $ra, 0
+; LSX-NEXT: move $a0, $zero
+; LSX-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LSX-NEXT: addi.d $sp, $sp, 32
+; LSX-NEXT: ret
%key = alloca %struct.key_t, align 4
call void @llvm.memset.p0.i64(ptr %key, i8 0, i64 20, i1 false)
%1 = getelementptr inbounds %struct.key_t, ptr %key, i64 0, i32 1, i64 0
@@ -98,3 +114,62 @@ define void @test_large_frame_size_1234576() "frame-pointer"="all" {
%1 = alloca i8, i32 1234567
ret void
}
+
+;; Note: will create an emergency spill slot, if (!isInt<7>(StackSize)).
+;; Should involve only one SP-adjusting addi per adjustment.
+;; LSX 112 + 16(emergency solt) = 128
+define void @test_frame_size_112() {
+; NOLSX-LABEL: test_frame_size_112:
+; NOLSX: # %bb.0:
+; NOLSX-NEXT: addi.d $sp, $sp, -112
+; NOLSX-NEXT: .cfi_def_cfa_offset 112
+; NOLSX-NEXT: addi.d $sp, $sp, 112
+; NOLSX-NEXT: ret
+;
+; LSX-LABEL: test_frame_size_112:
+; LSX: # %bb.0:
+; LSX-NEXT: addi.d $sp, $sp, -128
+; LSX-NEXT: .cfi_def_cfa_offset 128
+; LSX-NEXT: addi.d $sp, $sp, 128
+; LSX-NEXT: ret
+ %1 = alloca i8, i32 112
+ ret void
+}
+
+;; LSX 128 + 16(emergency solt) = 144
+define void @test_frame_size_128() {
+; NOLSX-LABEL: test_frame_size_128:
+; NOLSX: # %bb.0:
+; NOLSX-NEXT: addi.d $sp, $sp, -128
+; NOLSX-NEXT: .cfi_def_cfa_offset 128
+; NOLSX-NEXT: addi.d $sp, $sp, 128
+; NOLSX-NEXT: ret
+;
+; LSX-LABEL: test_frame_size_128:
+; LSX: # %bb.0:
+; LSX-NEXT: addi.d $sp, $sp, -144
+; LSX-NEXT: .cfi_def_cfa_offset 144
+; LSX-NEXT: addi.d $sp, $sp, 144
+; LSX-NEXT: ret
+ %1 = alloca i8, i32 128
+ ret void
+}
+
+;; LSX 144 + 16(emergency solt) = 160
+define void @test_frame_size_144() {
+; NOLSX-LABEL: test_frame_size_144:
+; NOLSX: # %bb.0:
+; NOLSX-NEXT: addi.d $sp, $sp, -144
+; NOLSX-NEXT: .cfi_def_cfa_offset 144
+; NOLSX-NEXT: addi.d $sp, $sp, 144
+; NOLSX-NEXT: ret
+;
+; LSX-LABEL: test_frame_size_144:
+; LSX: # %bb.0:
+; LSX-NEXT: addi.d $sp, $sp, -160
+; LSX-NEXT: .cfi_def_cfa_offset 160
+; LSX-NEXT: addi.d $sp, $sp, 160
+; LSX-NEXT: ret
+ %1 = alloca i8, i32 144
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll b/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll
index 402ddb9ad941b..5a55b253c77bb 100644
--- a/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll
+++ b/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll
@@ -6,11 +6,11 @@
define void @box(ptr noalias nocapture noundef writeonly sret(%Box) align 16 dereferenceable(48) %b, i64 %i) {
; CHECK-LABEL: box:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NEXT: addi.d $sp, $sp, -112
+; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: slli.d $a2, $a1, 5
; CHECK-NEXT: alsl.d $a1, $a1, $a2, 4
-; CHECK-NEXT: addi.d $a2, $sp, 0
+; CHECK-NEXT: addi.d $a2, $sp, 16
; CHECK-NEXT: add.d $a3, $a2, $a1
; CHECK-NEXT: vldx $vr0, $a1, $a2
; CHECK-NEXT: vld $vr1, $a3, 32
@@ -18,7 +18,7 @@ define void @box(ptr noalias nocapture noundef writeonly sret(%Box) align 16 der
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: vst $vr1, $a0, 32
; CHECK-NEXT: vst $vr2, $a0, 16
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: addi.d $sp, $sp, 112
; CHECK-NEXT: ret
%1 = alloca [2 x %Box], align 16
%2 = getelementptr inbounds [2 x %Box], ptr %1, i64 0, i64 %i
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
index 789b51d9b5e5b..9528280d181a3 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
@@ -6,10 +6,10 @@ declare <8 x float> @llvm.powi.v8f32.i32(<8 x float>, i32)
define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
; CHECK-LABEL: powi_v8f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -80
-; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
-; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill
+; CHECK-NEXT: addi.d $sp, $sp, -96
+; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
+; CHECK-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
; CHECK-NEXT: addi.w $fp, $a0, 0
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0
; CHECK-NEXT: movgr2fr.w $fa0, $a0
@@ -18,79 +18,79 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: movfr2gr.s $a0, $fa0
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0
-; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
+; CHECK-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
+; CHECK-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1
; CHECK-NEXT: movgr2fr.w $fa0, $a0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: movfr2gr.s $a0, $fa0
-; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
+; CHECK-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
-; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
+; CHECK-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
+; CHECK-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 2
; CHECK-NEXT: movgr2fr.w $fa0, $a0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: movfr2gr.s $a0, $fa0
-; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
+; CHECK-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 2
-; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
+; CHECK-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
+; CHECK-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
; CHECK-NEXT: movgr2fr.w $fa0, $a0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: movfr2gr.s $a0, $fa0
-; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
+; CHECK-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 3
-; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
+; CHECK-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
+; CHECK-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 4
; CHECK-NEXT: movgr2fr.w $fa0, $a0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: movfr2gr.s $a0, $fa0
-; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
+; CHECK-NEXT: xvld $x...
[truncated]
|
|
This patch also updates the code generation for two newly added test cases (which have not yet been optimized in release/21.x):
|
LGTM. |
…lvm#146455) This patch adds an emergency spill slot when ran out of registers. PR llvm#139201 introduces `vstelm` instructions with only 8-bit imm offset, it causes no spill slot to store the spill registers. (cherry picked from commit 64a0478)
|
@tangaac (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
Backport 64a0478
Requested by: @tangaac