Skip to content

Commit

Permalink
[AMDGPU] Set MaxAtomicSizeInBitsSupported. (#75185)
Browse files Browse the repository at this point in the history
This will result in larger atomic operations getting expanded to
`__atomic_*` libcalls via AtomicExpandPass, which matches what Clang
already does in the frontend.

While AMDGPU currently disables the use of all libcalls, I've changed it
to instead disable all of them _except_ the atomic ones. Those are
already be emitted by the Clang frontend, and enabling them in the
backend allows the same behavior there.
  • Loading branch information
jyknight authored Dec 18, 2023
1 parent 83680f8 commit 137f785
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 16 deletions.
10 changes: 7 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -506,9 +506,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::v12f32, Promote);
AddPromotedToType(ISD::SELECT, MVT::v12f32, MVT::v12i32);

// There are no libcalls of any kind.
for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)
setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
// Disable most libcalls.
for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I) {
if (I < RTLIB::ATOMIC_LOAD || I > RTLIB::ATOMIC_FETCH_NAND_16)
setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
}

setSchedulingPreference(Sched::RegPressure);
setJumpIsExpensive(true);
Expand Down Expand Up @@ -556,6 +558,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
ISD::FSUB, ISD::FNEG,
ISD::FABS, ISD::AssertZext,
ISD::AssertSext, ISD::INTRINSIC_WO_CHAIN});

setMaxAtomicSizeInBitsSupported(64);
}

bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const {
Expand Down
10 changes: 10 additions & 0 deletions llvm/test/CodeGen/AMDGPU/atomic-oversize.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s

define void @test(ptr %a) nounwind {
; CHECK-LABEL: test:
; CHECK: __atomic_load_16
; CHECK: __atomic_store_16
%1 = load atomic i128, ptr %a seq_cst, align 16
store atomic i128 %1, ptr %a seq_cst, align 16
ret void
}
24 changes: 11 additions & 13 deletions llvm/test/Transforms/AtomicExpand/AMDGPU/unaligned-atomic.ll
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
; RUN: not --crash opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -atomic-expand %s 2>&1 | FileCheck %s
; The AtomicExpand pass cannot handle missing libcalls (yet) so reports a fatal error.
; CHECK: LLVM ERROR: expandAtomicOpToLibcall shouldn't fail for Load
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -atomic-expand %s 2>&1 | FileCheck --check-prefix=GCN %s

define i32 @atomic_load_global_align1(ptr addrspace(1) %ptr) {
; GCN-LABEL: @atomic_load_global_align1(
; GCN-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
; GCN-NEXT: [[TMP3:%.*]] = alloca i32, align 4
; GCN-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TMP3]])
; GCN-NEXT: call void @0(i64 4, ptr [[TMP2]], ptr [[TMP3]], i32 5)
; GCN-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
; GCN-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TMP3]])
; GCN-NEXT: [[TMP3:%.*]] = alloca i32, align 4, addrspace(5)
; GCN-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP3]])
; GCN-NEXT: call void @__atomic_load(i64 4, ptr [[TMP2]], ptr addrspace(5) [[TMP3]], i32 5)
; GCN-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4
; GCN-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP3]])
; GCN-NEXT: ret i32 [[TMP5]]
;
%val = load atomic i32, ptr addrspace(1) %ptr seq_cst, align 1
Expand All @@ -19,11 +17,11 @@ define i32 @atomic_load_global_align1(ptr addrspace(1) %ptr) {
define void @atomic_store_global_align1(ptr addrspace(1) %ptr, i32 %val) {
; GCN-LABEL: @atomic_store_global_align1(
; GCN-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
; GCN-NEXT: [[TMP3:%.*]] = alloca i32, align 4
; GCN-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TMP3]])
; GCN-NEXT: store i32 [[VAL:%.*]], ptr [[TMP3]], align 4
; GCN-NEXT: call void @1(i64 4, ptr [[TMP2]], ptr [[TMP3]], i32 0)
; GCN-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TMP3]])
; GCN-NEXT: [[TMP3:%.*]] = alloca i32, align 4, addrspace(5)
; GCN-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP3]])
; GCN-NEXT: store i32 [[VAL:%.*]], ptr addrspace(5) [[TMP3]], align 4
; GCN-NEXT: call void @__atomic_store(i64 4, ptr [[TMP2]], ptr addrspace(5) [[TMP3]], i32 0)
; GCN-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP3]])
; GCN-NEXT: ret void
;
store atomic i32 %val, ptr addrspace(1) %ptr monotonic, align 1
Expand Down

0 comments on commit 137f785

Please sign in to comment.