-
Notifications
You must be signed in to change notification settings - Fork 13.7k
[RISCV] Fix assertion failure when using -fstack-clash-protection #135248
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Paul Kirth (ilovepi) ChangesWe can't assume MBBI is still pointing at MBB if we've already expanded Co-authored-by: Craig Topper <craig.topper@sifive.com> Full diff: https://github.com/llvm/llvm-project/pull/135248.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index c7b2b781422d1..a83119957d95e 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -2270,11 +2270,13 @@ TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const {
}
// Synthesize the probe loop.
-static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc DL,
+static void emitStackProbeInline(MachineBasicBlock::iterator MBBI, DebugLoc DL,
Register TargetReg, bool IsRVV) {
assert(TargetReg != RISCV::X2 && "New top of stack cannot already be in SP");
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineFunction &MF = *MBB.getParent();
+
auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
bool IsRV64 = Subtarget.is64Bit();
@@ -2363,7 +2365,7 @@ void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF,
MachineBasicBlock::iterator MBBI = MI->getIterator();
DebugLoc DL = MBB.findDebugLoc(MBBI);
Register TargetReg = MI->getOperand(1).getReg();
- emitStackProbeInline(MF, MBB, MBBI, DL, TargetReg,
+ emitStackProbeInline(MBBI, DL, TargetReg,
(MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV));
MBBI->eraseFromParent();
}
diff --git a/llvm/test/CodeGen/RISCV/pr135206.ll b/llvm/test/CodeGen/RISCV/pr135206.ll
new file mode 100644
index 0000000000000..c7988cbc685fa
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/pr135206.ll
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc < %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+target triple = "riscv64-unknown-fuchsia"
+
+%"struct.fuchsia_sysmem::wire::BufferCollectionConstraints" = type { %"struct.fuchsia_sysmem::wire::BufferUsage", i32, i32, i32, i32, i32, i8, %"struct.fuchsia_sysmem::wire::BufferMemoryConstraints", i32, [4 x i8], %"struct.fidl::Array.94" }
+%"struct.fuchsia_sysmem::wire::BufferUsage" = type { i32, i32, i32, i32, i32 }
+%"struct.fuchsia_sysmem::wire::BufferMemoryConstraints" = type { i32, i32, i8, i8, i8, i8, i8, i32, %"struct.fidl::Array" }
+%"struct.fidl::Array" = type { [32 x i64] }
+%"struct.fidl::Array.94" = type { [32 x %"struct.fuchsia_sysmem::wire::ImageFormatConstraints"] }
+%"struct.fuchsia_sysmem::wire::ImageFormatConstraints" = type <{ %"struct.fuchsia_sysmem::wire::PixelFormat", i32, %"struct.fidl::Array.95", i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [4 x i8] }>
+%"struct.fuchsia_sysmem::wire::PixelFormat" = type { i32, i8, %"struct.fuchsia_sysmem::wire::FormatModifier" }
+%"struct.fuchsia_sysmem::wire::FormatModifier" = type { i64 }
+%"struct.fidl::Array.95" = type { [32 x %"struct.fuchsia_sysmem::wire::ColorSpace"] }
+%"struct.fuchsia_sysmem::wire::ColorSpace" = type { i32 }
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg) #0
+
+define fastcc i1 @_ZN12_GLOBAL__N_121AttachTokenSucceedsV1EbbN3fit13function_implILm16ELb0EFvRN14fuchsia_sysmem4wire27BufferCollectionConstraintsEENSt3__29allocatorISt4byteEEEESB_SB_NS1_ILm16ELb0EFvRNS3_21BufferCollectionInfo2EESA_EEj() #1 {
+; CHECK-LABEL: _ZN12_GLOBAL__N_121AttachTokenSucceedsV1EbbN3fit13function_implILm16ELb0EFvRN14fuchsia_sysmem4wire27BufferCollectionConstraintsEENSt3__29allocatorISt4byteEEEESB_SB_NS1_ILm16ELb0EFvRNS3_21BufferCollectionInfo2EESA_EEj:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -2032
+; CHECK-NEXT: .cfi_def_cfa_offset 2032
+; CHECK-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 2008(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 2000(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 1992(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: .cfi_offset s3, -40
+; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: sub t1, sp, a0
+; CHECK-NEXT: .cfi_def_cfa t1, 20480
+; CHECK-NEXT: lui t2, 1
+; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: sub sp, sp, t2
+; CHECK-NEXT: sd zero, 0(sp)
+; CHECK-NEXT: bne sp, t1, .LBB0_1
+; CHECK-NEXT: # %bb.2:
+; CHECK-NEXT: .cfi_def_cfa_register sp
+; CHECK-NEXT: addi sp, sp, -848
+; CHECK-NEXT: .cfi_def_cfa_offset 21328
+; CHECK-NEXT: csrr t1, vlenb
+; CHECK-NEXT: .cfi_def_cfa t1, -8
+; CHECK-NEXT: lui t2, 1
+; CHECK-NEXT: .LBB0_3: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: sub sp, sp, t2
+; CHECK-NEXT: sd zero, 0(sp)
+; CHECK-NEXT: sub t1, t1, t2
+; CHECK-NEXT: bge t1, t2, .LBB0_3
+; CHECK-NEXT: # %bb.4:
+; CHECK-NEXT: .cfi_def_cfa_register sp
+; CHECK-NEXT: sub sp, sp, t1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0f, 0x72, 0x00, 0x11, 0xc0, 0xb6, 0x01, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 23360 + 1 * vlenb
+; CHECK-NEXT: li s0, 0
+; CHECK-NEXT: li a0, 170
+; CHECK-NEXT: li s1, 32
+; CHECK-NEXT: lui a1, %hi(.LCPI0_0)
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: lui a0, 6
+; CHECK-NEXT: addiw a0, a0, -1264
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; CHECK-NEXT: ld s2, %lo(.LCPI0_0)(a1)
+; CHECK-NEXT: vse8.v v8, (s1)
+; CHECK-NEXT: li s3, 16
+; CHECK-NEXT: vse8.v v8, (s3)
+; CHECK-NEXT: vse8.v v8, (zero)
+; CHECK-NEXT: sd s2, 48(zero)
+; CHECK-NEXT: jalr s0
+; CHECK-NEXT: lui a0, 6
+; CHECK-NEXT: addiw a0, a0, -1264
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vse8.v v8, (zero)
+; CHECK-NEXT: vse8.v v8, (s3)
+; CHECK-NEXT: vse8.v v8, (s1)
+; CHECK-NEXT: sd s2, 48(zero)
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: add sp, sp, a1
+; CHECK-NEXT: .cfi_def_cfa sp, 2032
+; CHECK-NEXT: lui a1, 5
+; CHECK-NEXT: addiw a1, a1, 848
+; CHECK-NEXT: add sp, sp, a1
+; CHECK-NEXT: .cfi_def_cfa_offset 2032
+; CHECK-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 2008(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 2000(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 1992(sp) # 8-byte Folded Reload
+; CHECK-NEXT: .cfi_restore ra
+; CHECK-NEXT: .cfi_restore s0
+; CHECK-NEXT: .cfi_restore s1
+; CHECK-NEXT: .cfi_restore s2
+; CHECK-NEXT: .cfi_restore s3
+; CHECK-NEXT: addi sp, sp, 2032
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: ret
+ %1 = alloca %"struct.fuchsia_sysmem::wire::BufferCollectionConstraints", align 8
+ %2 = alloca %"struct.fuchsia_sysmem::wire::BufferCollectionConstraints", align 8
+ %3 = alloca %"struct.fuchsia_sysmem::wire::BufferCollectionConstraints", align 8
+ call void @llvm.memset.p0.i64(ptr null, i8 -86, i64 56, i1 false)
+ %4 = call ptr null()
+ call void @llvm.memset.p0.i64(ptr null, i8 -86, i64 56, i1 false)
+ ret i1 false
+}
+
+attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: write) }
+attributes #1 = { "probe-stack"="inline-asm" "target-features"="+v" }
|
@topperc this is basically your proposed patch w/ a reduced test case. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
llvm/test/CodeGen/RISCV/pr135206.ll
Outdated
|
||
; RUN: llc < %s -o - | FileCheck %s | ||
|
||
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" | ||
target triple = "riscv64-unknown-fuchsia" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
; RUN: llc < %s -o - | FileCheck %s | |
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" | |
target triple = "riscv64-unknown-fuchsia" | |
; RUN: llc -mtriple=riscv64-unknown-fuchsia < %s | FileCheck %s |
llvm/test/CodeGen/RISCV/pr135206.ll
Outdated
%"struct.fuchsia_sysmem::wire::ColorSpace" = type { i32 } | ||
|
||
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) | ||
declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg) #0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we drop the attributes that aren't needed (both on the arguments and in #)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
tried using a small extern function, but I think this needs to be llvm.memset, and the number of args are fixed. I was able to drop the attrs though.
llvm/test/CodeGen/RISCV/pr135206.ll
Outdated
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) | ||
declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg) #0 | ||
|
||
define fastcc i1 @_ZN12_GLOBAL__N_121AttachTokenSucceedsV1EbbN3fit13function_implILm16ELb0EFvRN14fuchsia_sysmem4wire27BufferCollectionConstraintsEENSt3__29allocatorISt4byteEEEESB_SB_NS1_ILm16ELb0EFvRNS3_21BufferCollectionInfo2EESA_EEj() #1 { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
define fastcc i1 @_ZN12_GLOBAL__N_121AttachTokenSucceedsV1EbbN3fit13function_implILm16ELb0EFvRN14fuchsia_sysmem4wire27BufferCollectionConstraintsEENSt3__29allocatorISt4byteEEEESB_SB_NS1_ILm16ELb0EFvRNS3_21BufferCollectionInfo2EESA_EEj() #1 { | |
define fastcc i1 @foo() nounwind #1 { |
(plus same comment re #)
Also, is the fastcc important for reproducing?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. fastcc
did not seem to be required.
llvm/test/CodeGen/RISCV/pr135206.ll
Outdated
%2 = alloca %"struct.fuchsia_sysmem::wire::BufferCollectionConstraints", align 8 | ||
%3 = alloca %"struct.fuchsia_sysmem::wire::BufferCollectionConstraints", align 8 | ||
call void @llvm.memset.p0.i64(ptr null, i8 -86, i64 56, i1 false) | ||
%4 = call ptr null() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Putting this kind of UB in the test seems like a bad idea, use a dummy external function instead, say?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
probably a good idea. This is just what came out of llvm-reduce
from our reproducer. I'll take a pass at minimizing this further.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
llvm/test/CodeGen/RISCV/pr135206.ll
Outdated
} | ||
|
||
attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: write) } | ||
attributes #1 = { "probe-stack"="inline-asm" "target-features"="+v" } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is V important for the reproducer?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not sure. Let me see if it still works w/o it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, +v
did seem to be required. Looking at the fix again, that makes sense due to the (MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV)
bits. I'm guessing that if only one of the stack probes gets used, it wouldn't foul up the iterator.
8578165
to
211b8e0
Compare
llvm/test/CodeGen/RISCV/pr135206.ll
Outdated
declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg) | ||
declare ptr @bar() | ||
|
||
define i1 @foo() #0 { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nounwind?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
llvm/test/CodeGen/RISCV/pr135206.ll
Outdated
ret i1 false | ||
} | ||
|
||
attributes #0 = { "probe-stack"="inline-asm" "target-features"="+v" } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps inline this for clarity?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
llvm/test/CodeGen/RISCV/pr135206.ll
Outdated
; CHECK-NEXT: ret | ||
%1 = alloca %"buff", align 8 | ||
call void @llvm.memset.p0.i64(ptr %1, i8 86, i64 56, i1 false) | ||
%4 = call ptr @bar() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this actually need to return a value, or can it be call void @bar()
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh, yeah it can be void. Good catch!
9ff8f04
to
233c7b1
Compare
We can't assume MBBI is still pointing at MBB if we've already expanded a probe. We need to re-query the MBB from MBBI. Fixes #135206 Co-authored-by: Craig Topper <craig.topper@sifive.com>
233c7b1
to
155bc7f
Compare
@jrtc27 I think I've addressed your comments. Anything else I should change in the test? |
I plan to merge this tomorrow if there isn't any more feedback. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…vm#135248) We can't assume MBBI is still pointing at MBB if we've already expanded a probe. We need to re-query the MBB from MBBI. Fixes llvm#135206 Co-authored-by: Craig Topper <craig.topper@sifive.com>
…vm#135248) We can't assume MBBI is still pointing at MBB if we've already expanded a probe. We need to re-query the MBB from MBBI. Fixes llvm#135206 Co-authored-by: Craig Topper <craig.topper@sifive.com>
…vm#135248) We can't assume MBBI is still pointing at MBB if we've already expanded a probe. We need to re-query the MBB from MBBI. Fixes llvm#135206 Co-authored-by: Craig Topper <craig.topper@sifive.com>
We can't assume MBBI is still pointing at MBB if we've already expanded
a probe. We need to re-query the MBB from MBBI. Fixes #135206
Co-authored-by: Craig Topper craig.topper@sifive.com