Skip to content

Commit a440a96

Browse files
authored
AMDGPU: Start selecting flat/global atomicrmw fmin/fmax. (#95592)
Define subtarget features for atomic fmin/fmax support. The flat/global support is a real messe. We had float/double support at the beginning in gfx6 and gfx7. gfx8 removed these. gfx10 reintroduced them. gfx11 removed the f64 versions again. gfx9 partially reintroduced them, in gfx90a and gfx940 but only for f64.
1 parent 3f33d2f commit a440a96

22 files changed

+2982
-7337
lines changed

clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,18 +49,18 @@ __global__ void ffp2(double *p) {
4949
// CHECK: atomicrmw fmin ptr {{.*}} monotonic
5050
// CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic
5151
// CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic
52-
// SAFE: _Z4ffp2Pd
52+
// SAFE-LABEL: @_Z4ffp2Pd
5353
// SAFE: global_atomic_cmpswap_b64
5454
// SAFE: global_atomic_cmpswap_b64
5555
// SAFE: global_atomic_cmpswap_b64
5656
// SAFE: global_atomic_cmpswap_b64
5757
// SAFE: global_atomic_cmpswap_b64
58-
// UNSAFE: _Z4ffp2Pd
59-
// UNSAFE: global_atomic_cmpswap_x2
60-
// UNSAFE: global_atomic_cmpswap_x2
58+
// UNSAFE-LABEL: @_Z4ffp2Pd
6159
// UNSAFE: global_atomic_cmpswap_x2
6260
// UNSAFE: global_atomic_cmpswap_x2
6361
// UNSAFE: global_atomic_cmpswap_x2
62+
// UNSAFE: global_atomic_max_f64
63+
// UNSAFE: global_atomic_min_f64
6464
__atomic_fetch_sub(p, 1.0, memory_order_relaxed);
6565
__atomic_fetch_max(p, 1.0, memory_order_relaxed);
6666
__atomic_fetch_min(p, 1.0, memory_order_relaxed);
@@ -76,18 +76,18 @@ __global__ void ffp3(long double *p) {
7676
// CHECK: atomicrmw fmin ptr {{.*}} monotonic
7777
// CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic
7878
// CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic
79-
// SAFE: _Z4ffp3Pe
79+
// SAFE-LABEL: @_Z4ffp3Pe
8080
// SAFE: global_atomic_cmpswap_b64
8181
// SAFE: global_atomic_cmpswap_b64
8282
// SAFE: global_atomic_cmpswap_b64
8383
// SAFE: global_atomic_cmpswap_b64
8484
// SAFE: global_atomic_cmpswap_b64
85-
// UNSAFE: _Z4ffp3Pe
86-
// UNSAFE: global_atomic_cmpswap_x2
87-
// UNSAFE: global_atomic_cmpswap_x2
85+
// UNSAFE-LABEL: @_Z4ffp3Pe
8886
// UNSAFE: global_atomic_cmpswap_x2
8987
// UNSAFE: global_atomic_cmpswap_x2
9088
// UNSAFE: global_atomic_cmpswap_x2
89+
// UNSAFE: global_atomic_max_f64
90+
// UNSAFE: global_atomic_min_f64
9191
__atomic_fetch_sub(p, 1.0L, memory_order_relaxed);
9292
__atomic_fetch_max(p, 1.0L, memory_order_relaxed);
9393
__atomic_fetch_min(p, 1.0L, memory_order_relaxed);

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,7 @@ def FeatureGFX90AInsts : SubtargetFeature<"gfx90a-insts",
351351
"GFX90AInsts",
352352
"true",
353353
"Additional instructions for GFX90A+"
354+
// [HasAtomicFMinFMaxF64GlobalInsts, HasAtomicFMinFMaxF64FlatInsts] // TODO
354355
>;
355356

356357
def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts",
@@ -711,6 +712,30 @@ def FeatureAtomicFaddRtnInsts : SubtargetFeature<"atomic-fadd-rtn-insts",
711712
[FeatureFlatGlobalInsts]
712713
>;
713714

715+
def FeatureAtomicFMinFMaxF32GlobalInsts : SubtargetFeature<"atomic-fmin-fmax-global-f32",
716+
"HasAtomicFMinFMaxF32GlobalInsts",
717+
"true",
718+
"Has global/buffer instructions for atomicrmw fmin/fmax for float"
719+
>;
720+
721+
def FeatureAtomicFMinFMaxF64GlobalInsts : SubtargetFeature<"atomic-fmin-fmax-global-f64",
722+
"HasAtomicFMinFMaxF64GlobalInsts",
723+
"true",
724+
"Has global/buffer instructions for atomicrmw fmin/fmax for float"
725+
>;
726+
727+
def FeatureAtomicFMinFMaxF32FlatInsts : SubtargetFeature<"atomic-fmin-fmax-flat-f32",
728+
"HasAtomicFMinFMaxF32FlatInsts",
729+
"true",
730+
"Has flat memory instructions for atomicrmw fmin/fmax for float"
731+
>;
732+
733+
def FeatureAtomicFMinFMaxF64FlatInsts : SubtargetFeature<"atomic-fmin-fmax-flat-f64",
734+
"HasAtomicFMinFMaxF64FlatInsts",
735+
"true",
736+
"Has flat memory instructions for atomicrmw fmin/fmax for double"
737+
>;
738+
714739
def FeatureAtomicFaddNoRtnInsts : SubtargetFeature<"atomic-fadd-no-rtn-insts",
715740
"HasAtomicFaddNoRtnInsts",
716741
"true",
@@ -1061,7 +1086,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
10611086
FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts,
10621087
FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel,
10631088
FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
1064-
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero
1089+
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
1090+
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts
10651091
]
10661092
>;
10671093

@@ -1072,7 +1098,9 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
10721098
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
10731099
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
10741100
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess,
1075-
FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero
1101+
FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
1102+
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
1103+
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts
10761104
]
10771105
>;
10781106

@@ -1127,7 +1155,9 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
11271155
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16,
11281156
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts,
11291157
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
1130-
FeatureMaxHardClauseLength63
1158+
FeatureMaxHardClauseLength63,
1159+
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
1160+
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts
11311161
]
11321162
>;
11331163

@@ -1148,7 +1178,8 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
11481178
FeatureA16, FeatureFastDenormalF32, FeatureG16,
11491179
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureGDS,
11501180
FeatureGWS, FeatureDefaultComponentZero,
1151-
FeatureMaxHardClauseLength32
1181+
FeatureMaxHardClauseLength32,
1182+
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts
11521183
]
11531184
>;
11541185

@@ -1168,7 +1199,9 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
11681199
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
11691200
FeatureA16, FeatureFastDenormalF32, FeatureG16,
11701201
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess,
1171-
FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast
1202+
FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast,
1203+
FeatureMaxHardClauseLength32,
1204+
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts
11721205
]
11731206
>;
11741207

@@ -1331,7 +1364,10 @@ def FeatureISAVersion9_0_A : FeatureSet<
13311364
FeaturePackedTID,
13321365
FullRate64Ops,
13331366
FeatureBackOffBarrier,
1334-
FeatureKernargPreload])>;
1367+
FeatureKernargPreload,
1368+
FeatureAtomicFMinFMaxF64GlobalInsts,
1369+
FeatureAtomicFMinFMaxF64FlatInsts
1370+
])>;
13351371

13361372
def FeatureISAVersion9_0_C : FeatureSet<
13371373
!listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
@@ -1371,7 +1407,10 @@ def FeatureISAVersion9_4_Common : FeatureSet<
13711407
FeatureArchitectedFlatScratch,
13721408
FullRate64Ops,
13731409
FeatureBackOffBarrier,
1374-
FeatureKernargPreload]>;
1410+
FeatureKernargPreload,
1411+
FeatureAtomicFMinFMaxF64GlobalInsts,
1412+
FeatureAtomicFMinFMaxF64FlatInsts
1413+
]>;
13751414

13761415
def FeatureISAVersion9_4_0 : FeatureSet<
13771416
!listconcat(FeatureISAVersion9_4_Common.Features,
@@ -1862,11 +1901,28 @@ def isGFX12Plus :
18621901
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
18631902
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
18641903

1865-
def HasBufferFlatGlobalAtomicsF64 :
1904+
1905+
def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd
18661906
Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">,
18671907
// FIXME: This is too coarse, and working around using pseudo's predicates on real instruction.
18681908
AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>;
18691909

1910+
def HasAtomicFMinFMaxF32GlobalInsts :
1911+
Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">,
1912+
AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF32GlobalInsts)>;
1913+
1914+
def HasAtomicFMinFMaxF64GlobalInsts :
1915+
Predicate<"Subtarget->hasAtomicFMinFMaxF64GlobalInsts()">,
1916+
AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF64GlobalInsts)>;
1917+
1918+
def HasAtomicFMinFMaxF32FlatInsts :
1919+
Predicate<"Subtarget->hasAtomicFMinFMaxF32FlatInsts()">,
1920+
AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF32FlatInsts)>;
1921+
1922+
def HasAtomicFMinFMaxF64FlatInsts :
1923+
Predicate<"Subtarget->hasAtomicFMinFMaxF64FlatInsts()">,
1924+
AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF64FlatInsts)>;
1925+
18701926
def HasLdsAtomicAddF64 :
18711927
Predicate<"Subtarget->hasLdsAtomicAddF64()">,
18721928
AssemblerPredicate<(any_of FeatureGFX90AInsts)>;

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,21 +1149,21 @@ let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
11491149
defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics <
11501150
"buffer_atomic_fcmpswap", VReg_64, v2f32, null_frag
11511151
>;
1152+
}
1153+
1154+
let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts in {
11521155
defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics <
11531156
"buffer_atomic_fmin", VGPR_32, f32, null_frag
11541157
>;
11551158
defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics <
11561159
"buffer_atomic_fmax", VGPR_32, f32, null_frag
11571160
>;
1158-
11591161
}
11601162

11611163
let SubtargetPredicate = isGFX6GFX7GFX10 in {
1162-
11631164
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <
11641165
"buffer_atomic_fcmpswap_x2", VReg_128, v2f64, null_frag
11651166
>;
1166-
11671167
}
11681168

11691169
let SubtargetPredicate = HasD16LoadStore in {
@@ -1645,6 +1645,16 @@ defm : BufferAtomicPat<"atomic_load_udec_wrap_global", Ty, "BUFFER_ATOMIC_DEC" #
16451645

16461646
} // end foreach Ty
16471647

1648+
let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts in {
1649+
defm : BufferAtomicPat<"atomic_load_fmin_global", f32, "BUFFER_ATOMIC_FMIN">;
1650+
defm : BufferAtomicPat<"atomic_load_fmax_global", f32, "BUFFER_ATOMIC_FMAX">;
1651+
}
1652+
1653+
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
1654+
defm : BufferAtomicPat<"atomic_load_fmin_global", f64, "BUFFER_ATOMIC_MIN_F64">;
1655+
defm : BufferAtomicPat<"atomic_load_fmax_global", f64, "BUFFER_ATOMIC_MAX_F64">;
1656+
}
1657+
16481658
defm : BufferAtomicCmpSwapPat<i32, v2i32, "BUFFER_ATOMIC_CMPSWAP">;
16491659
defm : BufferAtomicCmpSwapPat<i64, v2i64, "BUFFER_ATOMIC_CMPSWAP_X2">;
16501660

@@ -1746,11 +1756,12 @@ let SubtargetPredicate = HasAtomicCSubNoRtnInsts in {
17461756
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["noret"]>;
17471757
}
17481758

1749-
let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
1759+
let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts in {
17501760
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">;
17511761
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
17521762
}
1753-
let SubtargetPredicate = isGFX6GFX7GFX10 in {
1763+
1764+
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
17541765
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
17551766
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">;
17561767
}
@@ -1822,9 +1833,12 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in {
18221833

18231834
let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
18241835
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">;
1836+
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
1837+
1838+
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
18251839
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
18261840
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">;
1827-
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
1841+
} //End let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts
18281842

18291843
multiclass SIBufferAtomicCmpSwapPat_Common<ValueType vt, ValueType data_vt, string Inst> {
18301844
foreach RtnMode = ["ret", "noret"] in {

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 45 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -752,19 +752,29 @@ defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
752752

753753
// GFX7-, GFX10-only flat instructions.
754754
let SubtargetPredicate = isGFX7GFX10 in {
755-
756755
defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
757756
VReg_64, f64, v2f64, VReg_128>;
758-
759757
} // End SubtargetPredicate = isGFX7GFX10
760758

759+
760+
// The names may be flat_atomic_fmin_x2 on some subtargets, but we
761+
// choose this as the canonical name.
762+
let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in {
763+
defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo <"flat_atomic_min_f64",
764+
VReg_64, f64>;
765+
766+
defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo <"flat_atomic_max_f64",
767+
VReg_64, f64>;
768+
}
769+
770+
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
771+
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>;
772+
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
773+
}
774+
761775
let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
762776
defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>;
763-
defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64>;
764-
defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64>;
765777
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>;
766-
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>;
767-
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
768778
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
769779

770780
let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
@@ -1415,6 +1425,17 @@ defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>;
14151425
defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>;
14161426
defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>;
14171427
defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>;
1428+
1429+
let SubtargetPredicate = HasAtomicFMinFMaxF32FlatInsts in {
1430+
defm : FlatAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_"#as, f32>;
1431+
defm : FlatAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_"#as, f32>;
1432+
}
1433+
1434+
let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in {
1435+
defm : FlatAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_"#as, f64>;
1436+
defm : FlatAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_"#as, f64>;
1437+
}
1438+
14181439
} // end foreach as
14191440

14201441
let SubtargetPredicate = isGFX12Plus in {
@@ -1576,33 +1597,22 @@ let OtherPredicates = [isGFX12Plus] in {
15761597
}
15771598
}
15781599

1579-
let OtherPredicates = [isGFX10Plus] in {
1600+
let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
15801601
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
15811602
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
1582-
defm : FlatAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>;
1583-
defm : FlatAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>;
1584-
}
1585-
1586-
let OtherPredicates = [isGFX10GFX11] in {
15871603
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>;
15881604
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>;
1605+
}
15891606

1607+
let SubtargetPredicate = HasAtomicFMinFMaxF32FlatInsts in {
1608+
defm : FlatAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>;
1609+
defm : FlatAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>;
15901610
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>;
15911611
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>;
15921612
}
15931613

1594-
let OtherPredicates = [isGFX10Only] in {
1595-
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
1596-
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
1597-
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>;
1598-
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>;
1599-
defm : FlatAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>;
1600-
defm : FlatAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>;
1601-
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
1602-
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
1603-
}
1604-
16051614
let OtherPredicates = [isGFX12Only] in {
1615+
// FIXME: Remove these intrinsics
16061616
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin_num", f32>;
16071617
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax_num", f32>;
16081618
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin_num", f32>;
@@ -1632,22 +1642,26 @@ defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_am
16321642
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_global", v2f16>;
16331643
}
16341644

1635-
let OtherPredicates = [HasBufferFlatGlobalAtomicsF64] in {
1636-
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
1645+
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
16371646
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
16381647
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
1639-
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>;
1640-
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>;
16411648
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>;
16421649
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>;
1643-
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>;
1644-
defm : FlatAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>;
1645-
defm : FlatAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>;
1646-
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", f64>;
1650+
}
1651+
1652+
let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in {
16471653
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
16481654
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
16491655
}
16501656

1657+
let OtherPredicates = [HasBufferFlatGlobalAtomicsF64] in {
1658+
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
1659+
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>;
1660+
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>;
1661+
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>;
1662+
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", f64>;
1663+
}
1664+
16511665
let OtherPredicates = [HasFlatAtomicFaddF32Inst] in {
16521666
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>;
16531667
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", f32>;

0 commit comments

Comments
 (0)