Skip to content

Commit fb5e46d

Browse files
committed
AMDGPU: Remove .v2bf16 buffer atomic fadd intrinsics
These are redundant with the unsuffixed versions, and have a name collision with surprising behavior when the base intrinsic is used with v2bf16. The global and flat variants should be removed too, but those are complicated due to using v2i16 in place of the natural v2bf16. Those cases can soon be completely deleted in favor of atomicrmw. The GlobalISel codegen change is broken and substitutes handling as bf16 for handling as f16, but it's a bug that this passed the IRTranslator in the first place.
1 parent 405882d commit fb5e46d

12 files changed

+9
-78
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 2 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1337,27 +1337,9 @@ def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic<
13371337

13381338
// gfx908 intrinsic
13391339
def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
1340+
1341+
// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx940, gfx12+.
13401342
def int_amdgcn_raw_ptr_buffer_atomic_fadd : AMDGPURawPtrBufferAtomic<llvm_anyfloat_ty>;
1341-
// gfx12+ intrinsic
1342-
def int_amdgcn_raw_buffer_atomic_fadd_v2bf16 : Intrinsic <
1343-
[llvm_v2bf16_ty],
1344-
[llvm_v2bf16_ty,
1345-
llvm_v4i32_ty,
1346-
llvm_i32_ty,
1347-
llvm_i32_ty,
1348-
llvm_i32_ty],
1349-
[ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
1350-
AMDGPURsrcIntrinsic<1, 0>;
1351-
def int_amdgcn_raw_ptr_buffer_atomic_fadd_v2bf16 : Intrinsic <
1352-
[llvm_v2bf16_ty],
1353-
[llvm_v2bf16_ty,
1354-
AMDGPUBufferRsrcTy,
1355-
llvm_i32_ty,
1356-
llvm_i32_ty,
1357-
llvm_i32_ty],
1358-
[IntrArgMemOnly, NoCapture<ArgIndex<1>>,
1359-
ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
1360-
AMDGPURsrcIntrinsic<1, 0>;
13611343

13621344
class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic <
13631345
[data_ty],
@@ -1434,28 +1416,6 @@ def int_amdgcn_struct_ptr_buffer_atomic_cmpswap : Intrinsic<
14341416
// gfx908 intrinsic
14351417
def int_amdgcn_struct_buffer_atomic_fadd : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;
14361418
def int_amdgcn_struct_ptr_buffer_atomic_fadd : AMDGPUStructPtrBufferAtomic<llvm_anyfloat_ty>;
1437-
// gfx12 intrinsic
1438-
def int_amdgcn_struct_buffer_atomic_fadd_v2bf16 : Intrinsic <
1439-
[llvm_v2bf16_ty],
1440-
[llvm_v2bf16_ty,
1441-
llvm_v4i32_ty,
1442-
llvm_i32_ty,
1443-
llvm_i32_ty,
1444-
llvm_i32_ty,
1445-
llvm_i32_ty],
1446-
[ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
1447-
AMDGPURsrcIntrinsic<1, 0>;
1448-
def int_amdgcn_struct_ptr_buffer_atomic_fadd_v2bf16 : Intrinsic <
1449-
[llvm_v2bf16_ty],
1450-
[llvm_v2bf16_ty,
1451-
AMDGPUBufferRsrcTy,
1452-
llvm_i32_ty,
1453-
llvm_i32_ty,
1454-
llvm_i32_ty,
1455-
llvm_i32_ty],
1456-
[IntrArgMemOnly, NoCapture<ArgIndex<1>>,
1457-
ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
1458-
AMDGPURsrcIntrinsic<1, 0>;
14591419

14601420
// gfx90a intrinsics
14611421
def int_amdgcn_struct_buffer_atomic_fmin : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,6 @@ def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_XOR, SIbuffer_atomic_xor>;
290290
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_INC, SIbuffer_atomic_inc>;
291291
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_DEC, SIbuffer_atomic_dec>;
292292
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_FADD, SIbuffer_atomic_fadd>;
293-
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_FADD_BF16, SIbuffer_atomic_fadd_bf16>;
294293
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_FMIN, SIbuffer_atomic_fmin>;
295294
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_FMAX, SIbuffer_atomic_fmax>;
296295
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_CMPSWAP, SIbuffer_atomic_cmpswap>;

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5564,7 +5564,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
55645564
NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP)
55655565
NODE_NAME_CASE(BUFFER_ATOMIC_CSUB)
55665566
NODE_NAME_CASE(BUFFER_ATOMIC_FADD)
5567-
NODE_NAME_CASE(BUFFER_ATOMIC_FADD_BF16)
55685567
NODE_NAME_CASE(BUFFER_ATOMIC_FMIN)
55695568
NODE_NAME_CASE(BUFFER_ATOMIC_FMAX)
55705569
NODE_NAME_CASE(BUFFER_ATOMIC_COND_SUB_U32)

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -615,7 +615,6 @@ enum NodeType : unsigned {
615615
BUFFER_ATOMIC_CMPSWAP,
616616
BUFFER_ATOMIC_CSUB,
617617
BUFFER_ATOMIC_FADD,
618-
BUFFER_ATOMIC_FADD_BF16,
619618
BUFFER_ATOMIC_FMIN,
620619
BUFFER_ATOMIC_FMAX,
621620
BUFFER_ATOMIC_COND_SUB_U32,

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6011,11 +6011,6 @@ static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) {
60116011
case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
60126012
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd:
60136013
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD;
6014-
case Intrinsic::amdgcn_raw_buffer_atomic_fadd_v2bf16:
6015-
case Intrinsic::amdgcn_struct_buffer_atomic_fadd_v2bf16:
6016-
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd_v2bf16:
6017-
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd_v2bf16:
6018-
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD_BF16;
60196014
case Intrinsic::amdgcn_raw_buffer_atomic_fmin:
60206015
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin:
60216016
case Intrinsic::amdgcn_struct_buffer_atomic_fmin:
@@ -7323,10 +7318,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73237318
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd:
73247319
case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
73257320
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd:
7326-
case Intrinsic::amdgcn_raw_buffer_atomic_fadd_v2bf16:
7327-
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd_v2bf16:
7328-
case Intrinsic::amdgcn_struct_buffer_atomic_fadd_v2bf16:
7329-
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd_v2bf16:
73307321
return legalizeBufferAtomic(MI, B, IntrID);
73317322
case Intrinsic::amdgcn_rsq_clamp:
73327323
return legalizeRsqClampIntrinsic(MI, MRI, B);

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3079,7 +3079,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
30793079
return;
30803080
}
30813081
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
3082-
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD_BF16:
30833082
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
30843083
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
30853084
applyDefaultMapping(OpdMapper);
@@ -4376,7 +4375,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
43764375
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
43774376
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
43784377
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
4379-
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD_BF16:
43804378
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
43814379
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
43824380
// vdata_out

llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,6 @@ def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_xor>;
269269
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_inc>;
270270
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_dec>;
271271
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fadd>;
272-
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fadd_v2bf16>;
273272
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fmin>;
274273
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fmax>;
275274
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cmpswap>;
@@ -287,7 +286,6 @@ def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_xor>;
287286
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_inc>;
288287
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_dec>;
289288
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fadd>;
290-
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fadd_v2bf16>;
291289
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fmin>;
292290
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fmax>;
293291
def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_cmpswap>;
@@ -305,7 +303,6 @@ def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_xor>;
305303
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_inc>;
306304
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_dec>;
307305
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fadd>;
308-
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fadd_v2bf16>;
309306
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fmin>;
310307
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fmax>;
311308
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cmpswap>;
@@ -323,7 +320,6 @@ def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_xor>;
323320
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_inc>;
324321
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_dec>;
325322
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fadd>;
326-
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fadd_v2bf16>;
327323
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fmin>;
328324
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fmax>;
329325
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_cmpswap>;

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1751,7 +1751,7 @@ let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
17511751
defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["noret"]>;
17521752

17531753
let SubtargetPredicate = isGFX12Plus in {
1754-
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd_bf16", v2bf16, "BUFFER_ATOMIC_PK_ADD_BF16_VBUFFER">;
1754+
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2bf16, "BUFFER_ATOMIC_PK_ADD_BF16_VBUFFER">;
17551755
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["ret"]>;
17561756

17571757
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8833,17 +8833,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
88338833
case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
88348834
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd:
88358835
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD);
8836-
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd_v2bf16:
8837-
case Intrinsic::amdgcn_raw_buffer_atomic_fadd_v2bf16:
8838-
return lowerRawBufferAtomicIntrin(Op, DAG,
8839-
AMDGPUISD::BUFFER_ATOMIC_FADD_BF16);
88408836
case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
88418837
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd:
88428838
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD);
8843-
case Intrinsic::amdgcn_struct_buffer_atomic_fadd_v2bf16:
8844-
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd_v2bf16:
8845-
return lowerStructBufferAtomicIntrin(Op, DAG,
8846-
AMDGPUISD::BUFFER_ATOMIC_FADD_BF16);
88478839
case Intrinsic::amdgcn_raw_buffer_atomic_fmin:
88488840
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin:
88498841
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FMIN);
@@ -15841,7 +15833,6 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode *N,
1584115833
case AMDGPUISD::BUFFER_ATOMIC_CMPSWAP:
1584215834
case AMDGPUISD::BUFFER_ATOMIC_CSUB:
1584315835
case AMDGPUISD::BUFFER_ATOMIC_FADD:
15844-
case AMDGPUISD::BUFFER_ATOMIC_FADD_BF16:
1584515836
case AMDGPUISD::BUFFER_ATOMIC_FMIN:
1584615837
case AMDGPUISD::BUFFER_ATOMIC_FMAX:
1584715838
// Target-specific read-modify-write atomics are sources of divergence.

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,6 @@ defm SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">;
222222
defm SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">;
223223
defm SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">;
224224
defm SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">;
225-
defm SIbuffer_atomic_fadd_bf16 : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD_BF16">;
226225
defm SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">;
227226
defm SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">;
228227
defm SIbuffer_atomic_cond_sub_u32 : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_COND_SUB_U32">;

0 commit comments

Comments
 (0)