Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] Call the FINI_ARRAY destructors in the correct order #71815

Merged
merged 1 commit into from
Nov 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 37 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,22 @@ static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {
//
// extern "C" void * __init_array_start[];
// extern "C" void * __init_array_end[];
// extern "C" void * __fini_array_start[];
// extern "C" void * __fini_array_end[];
//
// using InitCallback = void();
// using FiniCallback = void(void);
//
// void call_init_array_callbacks() {
// for (auto start = __init_array_start; start != __init_array_end; ++start)
// reinterpret_cast<InitCallback *>(*start)();
// }
//
// void call_fini_array_callbacks() {
// size_t fini_array_size = __fini_array_end - __fini_array_start;
// for (size_t i = fini_array_size; i > 0; --i)
// reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();
// }
static void createInitOrFiniCalls(Function &F, bool IsCtor) {
Module &M = *F.getParent();
LLVMContext &C = M.getContext();
Expand Down Expand Up @@ -96,15 +105,39 @@ static void createInitOrFiniCalls(Function &F, bool IsCtor) {
// for now we just call them with no arguments.
auto *CallBackTy = FunctionType::get(IRB.getVoidTy(), {});

IRB.CreateCondBr(IRB.CreateICmpNE(Begin, End), LoopBB, ExitBB);
Constant *Start = Begin;
Constant *Stop = End;
// The destructor array must be called in reverse order. Get a constant
// expression to the end of the array and iterate backwards instead.
if (!IsCtor) {
Type *Int64Ty = IntegerType::getInt64Ty(C);
auto *Offset = ConstantExpr::getSub(
ConstantExpr::getAShr(
ConstantExpr::getSub(ConstantExpr::getPtrToInt(End, Int64Ty),
ConstantExpr::getPtrToInt(Begin, Int64Ty)),
ConstantInt::get(Int64Ty, 3)),
ConstantInt::get(Int64Ty, 1));
Start = ConstantExpr::getGetElementPtr(
ArrayType::get(IRB.getPtrTy(), 0), Begin,
ArrayRef<Constant *>({ConstantInt::get(Int64Ty, 0), Offset}),
/*InBounds=*/true);
Stop = Begin;
jhuber6 marked this conversation as resolved.
Show resolved Hide resolved
}

IRB.CreateCondBr(
IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_NE : ICmpInst::ICMP_UGE, Start,
Stop),
LoopBB, ExitBB);
IRB.SetInsertPoint(LoopBB);
auto *CallBackPHI = IRB.CreatePHI(PtrTy, 2, "ptr");
auto *CallBack = IRB.CreateLoad(CallBackTy->getPointerTo(F.getAddressSpace()),
CallBackPHI, "callback");
IRB.CreateCall(CallBackTy, CallBack);
auto *NewCallBack = IRB.CreateConstGEP1_64(PtrTy, CallBackPHI, 1, "next");
auto *EndCmp = IRB.CreateICmpEQ(NewCallBack, End, "end");
CallBackPHI->addIncoming(Begin, &F.getEntryBlock());
auto *NewCallBack =
IRB.CreateConstGEP1_64(PtrTy, CallBackPHI, IsCtor ? 1 : -1, "next");
auto *EndCmp = IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_ULT,
NewCallBack, Stop, "end");
CallBackPHI->addIncoming(Start, &F.getEntryBlock());
CallBackPHI->addIncoming(NewCallBack, LoopBB);
IRB.CreateCondBr(EndCmp, ExitBB, LoopBB);
IRB.SetInsertPoint(ExitBB);
Expand Down
10 changes: 4 additions & 6 deletions llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ define void @bar() addrspace(1) {
ret void
}



;.
; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo.alias, ptr null }, { i32, ptr, ptr } { i32 1, ptr inttoptr (i64 4096 to ptr), ptr null }]
; CHECK: @[[LLVM_GLOBAL_DTORS:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr addrspacecast (ptr addrspace(1) @bar to ptr), ptr null }]
Expand Down Expand Up @@ -65,13 +63,13 @@ define void @bar() addrspace(1) {
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
; CHECK-NEXT: br i1 icmp uge (ptr addrspace(1) getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), ptr addrspace(1) @__fini_array_start), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
; CHECK: while.entry:
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: call void [[CALLBACK]]()
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 -1
; CHECK-NEXT: [[END:%.*]] = icmp ult ptr addrspace(1) [[NEXT]], @__fini_array_start
; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
; CHECK: while.end:
; CHECK-NEXT: ret void
Expand Down
23 changes: 7 additions & 16 deletions llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,19 @@
@llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]





; VISIBILITY: FUNC WEAK PROTECTED {{.*}} amdgcn.device.init
; VISIBILITY: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.init.kd
; VISIBILITY: FUNC WEAK PROTECTED {{.*}} amdgcn.device.fini
; VISIBILITY: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.fini.kd

; SECTION: .init_array.1 INIT_ARRAY {{.*}} {{.*}} 000008 00 WA 0 0 8
; SECTION: .fini_array.1 FINI_ARRAY {{.*}} {{.*}} 000008 00 WA 0 0 8

; DISABLED-NOT: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.init
; DISABLED-NOT: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.init.kd
; DISABLED-NOT: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.fini
; DISABLED-NOT: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.fini.kd

; METADATA: amdhsa.kernels:
; METADATA: .kind: init
; METADATA: .max_flat_workgroup_size: 1
Expand Down Expand Up @@ -53,13 +52,6 @@ define internal void @bar() {
; CHECK: @[[__FINI_ARRAY_END:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)]
; CHECK: @[[LLVM_USED:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini], section "llvm.metadata"
;.
; CHECK-LABEL: define internal void @foo() {
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define internal void @bar() {
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
Expand All @@ -79,18 +71,17 @@ define internal void @bar() {
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
; CHECK-NEXT: br i1 icmp uge (ptr addrspace(1) getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), ptr addrspace(1) @__fini_array_start), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
; CHECK: while.entry:
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: call void [[CALLBACK]]()
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 -1
; CHECK-NEXT: [[END:%.*]] = icmp ult ptr addrspace(1) [[NEXT]], @__fini_array_start
; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
; CHECK: while.end:
; CHECK-NEXT: ret void
;
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,1" "device-init" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="1,1" "device-fini" }
;.
27 changes: 5 additions & 22 deletions llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -s - 2>&1 | FileCheck %s -check-prefix=CHECK-VIS


; UTC_ARGS: --disable
@llvm.global_ctors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }, { i32, ptr, ptr } { i32 1, ptr @foo.5, ptr null }]
@llvm.global_dtors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }, { i32, ptr, ptr } { i32 1, ptr @bar.5, ptr null }]

; UTC_ARGS: --disable
; CHECK: @__init_array_start = external addrspace(1) constant [0 x ptr addrspace(1)]
; CHECK: @__init_array_end = external addrspace(1) constant [0 x ptr addrspace(1)]
; CHECK: @__fini_array_start = external addrspace(1) constant [0 x ptr addrspace(1)]
Expand Down Expand Up @@ -36,22 +36,6 @@ define internal void @bar.5() {
ret void
}

; CHECK-LABEL: define internal void @foo() {
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define internal void @bar() {
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define internal void @foo.5() {
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define internal void @bar.5() {
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
Expand All @@ -70,14 +54,13 @@ define internal void @bar.5() {
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
; CHECK-NEXT: br i1 icmp uge (ptr addrspace(1) getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), ptr addrspace(1) @__fini_array_start), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
; CHECK: while.entry:
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: call void [[CALLBACK]]()
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 -1
; CHECK-NEXT: [[END:%.*]] = icmp ult ptr addrspace(1) [[NEXT]], @__fini_array_start
; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
; CHECK: while.end:
; CHECK-NEXT: ret void
;