Skip to content

Commit 203cd18

Browse files
VigneshwarJtstellar
authored andcommitted
AMDGPU: Handle gfx950 XDL-write-VGPR-Overlap-Src-AB wait state (llvm#126732)
gfx950 needs more additional waitstates from gfx940 (cherry picked from commit c837f57)
1 parent cb51906 commit 203cd18

File tree

3 files changed

+51
-29
lines changed

3 files changed

+51
-29
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2297,12 +2297,14 @@ GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) {
22972297
return NumPasses + 2;
22982298
}
22992299

2300-
static int GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) {
2301-
// 2 pass -> 5
2302-
// 4 pass -> 7
2303-
// 8 pass -> 11
2304-
// 16 pass -> 19
2305-
return NumPasses + 3;
2300+
static int GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses,
2301+
bool IsGFX950) {
2302+
// xdl def cycles | gfx940 | gfx950
2303+
// 2 pass | 5 5
2304+
// 4 pass | 7 8
2305+
// 8 pass | 11 12
2306+
// 16 pass | 19 20
2307+
return NumPasses + 3 + (NumPasses != 2 && IsGFX950);
23062308
}
23072309

23082310
int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
@@ -2471,7 +2473,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
24712473
NeedWaitStates =
24722474
isXDL(ST, *MI1)
24732475
? GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(
2474-
NumPasses)
2476+
NumPasses, ST.hasGFX950Insts())
24752477
: GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(
24762478
NumPasses);
24772479
break;

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,8 @@ body: |
417417
# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_srca_read_overlap
418418
# GCN: V_MFMA
419419
# GCN-NEXT: S_NOP 7
420-
# GCN-NEXT: S_NOP 2
420+
# GFX940-NEXT: S_NOP 2
421+
# GFX950-NEXT: S_NOP 3
421422
# GCN-NEXT: V_MFMA
422423
name: xdl_sgemm16x16_mfma_write_agpr_mfma_srca_read_overlap
423424
body: |
@@ -439,7 +440,8 @@ body: |
439440
# GCN-LABEL: name: smfmac32x32_write_agpr_mfma_srca_read_overlap
440441
# GCN: V_SMFMAC
441442
# GCN-NEXT: S_NOP 7
442-
# GCN-NEXT: S_NOP 2
443+
# GFX940-NEXT: S_NOP 2
444+
# GFX950-NEXT: S_NOP 3
443445
# GCN-NEXT: V_MFMA
444446
name: smfmac32x32_write_agpr_mfma_srca_read_overlap
445447
body: |
@@ -450,7 +452,8 @@ body: |
450452
# GCN-LABEL: name: smfmac32x32_write_agpr_smfmac_srcc_read_overlap
451453
# GCN: V_SMFMAC
452454
# GCN-NEXT: S_NOP 7
453-
# GCN-NEXT: S_NOP 2
455+
# GFX940-NEXT: S_NOP 2
456+
# GFX950-NEXT: S_NOP 3
454457
# GCN-NEXT: V_SMFMAC
455458
name: smfmac32x32_write_agpr_smfmac_srcc_read_overlap
456459
body: |
@@ -462,7 +465,8 @@ body: |
462465
# GCN: V_MFMA
463466
# GCN-NEXT: S_NOP 7
464467
# GCN-NEXT: S_NOP 7
465-
# GCN-NEXT: S_NOP 2
468+
# GFX940-NEXT: S_NOP 2
469+
# GFX950-NEXT: S_NOP 3
466470
# GCN-NEXT: V_MFMA
467471
name: xdl_sgemm32x32_mfma_write_agpr_mfma_srca_read_overlap
468472
body: |
@@ -1715,7 +1719,8 @@ body: |
17151719
...
17161720
# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_agpr_mfma_srca_read_overlap
17171721
# GCN: V_MFMA
1718-
# GCN-NEXT: S_NOP 6
1722+
# GFX940-NEXT: S_NOP 6
1723+
# GFX950-NEXT: S_NOP 7
17191724
# GCN-NEXT: V_MFMA
17201725
name: xdl_sgemm16X16X16_mfma_write_agpr_mfma_srca_read_overlap
17211726
body: |
@@ -1725,7 +1730,8 @@ body: |
17251730
...
17261731
# GCN-LABEL: name: xdl_sgemm16X16X32_mfma_write_agpr_mfma_srcb_read_overlap
17271732
# GCN: V_MFMA
1728-
# GCN-NEXT: S_NOP 6
1733+
# GFX940-NEXT: S_NOP 6
1734+
# GFX950-NEXT: S_NOP 7
17291735
# GCN-NEXT: V_MFMA
17301736
name: xdl_sgemm16X16X32_mfma_write_agpr_mfma_srcb_read_overlap
17311737
body: |
@@ -1735,7 +1741,8 @@ body: |
17351741
...
17361742
# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_dmfma16x16_srca_read_overlap
17371743
# GCN: V_MFMA
1738-
# GCN-NEXT: S_NOP 6
1744+
# GFX940-NEXT: S_NOP 6
1745+
# GFX950-NEXT: S_NOP 7
17391746
# GCN-NEXT: V_MFMA
17401747
name: xdl_sgemm16X16X16_mfma_write_vgpr_dmfma16x16_srca_read_overlap
17411748
body: |
@@ -1826,7 +1833,8 @@ body: |
18261833
...
18271834
# GCN-LABEL: name: smfmac16x16x32_mfma_write_vgpr_smfmac_read_idx
18281835
# GCN: V_SMFMAC
1829-
# GCN-NEXT: S_NOP 6
1836+
# GFX940-NEXT: S_NOP 6
1837+
# GFX950-NEXT: S_NOP 7
18301838
# GCN-NEXT: V_SMFMAC
18311839
name: smfmac16x16x32_mfma_write_vgpr_smfmac_read_idx
18321840
body: |
@@ -2188,7 +2196,8 @@ body: |
21882196
# 4 pass source
21892197
# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srca
21902198
# GCN: V_MFMA
2191-
# GCN-NEXT: S_NOP 6
2199+
# GFX940-NEXT: S_NOP 6
2200+
# GFX950-NEXT: S_NOP 7
21922201
# GCN-NEXT: V_MFMA
21932202
name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srca
21942203
body: |
@@ -2202,7 +2211,8 @@ body: |
22022211
# 4 pass source
22032212
# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcb
22042213
# GCN: V_MFMA
2205-
# GCN-NEXT: S_NOP 6
2214+
# GFX940-NEXT: S_NOP 6
2215+
# GFX950-NEXT: S_NOP 7
22062216
# GCN-NEXT: V_MFMA
22072217
name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcb
22082218
body: |
@@ -2276,7 +2286,8 @@ body: |
22762286
# 4 pass source
22772287
# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srca
22782288
# GCN: V_MFMA
2279-
# GCN-NEXT: S_NOP 6
2289+
# GFX940-NEXT: S_NOP 6
2290+
# GFX950-NEXT: S_NOP 7
22802291
# GCN-NEXT: V_MFMA
22812292
name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srca
22822293
body: |
@@ -2290,7 +2301,8 @@ body: |
22902301
# 4 pass source
22912302
# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcb
22922303
# GCN: V_MFMA
2293-
# GCN-NEXT: S_NOP 6
2304+
# GFX940-NEXT: S_NOP 6
2305+
# GFX950-NEXT: S_NOP 7
22942306
# GCN-NEXT: V_MFMA
22952307
name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcb
22962308
body: |
@@ -2321,7 +2333,8 @@ body: |
23212333
# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca
23222334
# GCN: V_MFMA
23232335
# GCN-NEXT: S_NOP 7
2324-
# GCN-NEXT: S_NOP 2
2336+
# GFX940-NEXT: S_NOP 2
2337+
# GFX950-NEXT: S_NOP 3
23252338
# GCN-NEXT: V_MFMA
23262339
name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca
23272340
body: |
@@ -2336,7 +2349,8 @@ body: |
23362349
# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb
23372350
# GCN: V_MFMA
23382351
# GCN-NEXT: S_NOP 7
2339-
# GCN-NEXT: S_NOP 2
2352+
# GFX940-NEXT: S_NOP 2
2353+
# GFX950-NEXT: S_NOP 3
23402354
# GCN-NEXT: V_MFMA
23412355
name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb
23422356
body: |
@@ -2370,7 +2384,8 @@ body: |
23702384
# GCN: V_MFMA
23712385
# GCN-NEXT: S_NOP 7
23722386
# GCN-NEXT: S_NOP 7
2373-
# GCN-NEXT: S_NOP 2
2387+
# GFX940-NEXT: S_NOP 2
2388+
# GFX950-NEXT: S_NOP 3
23742389
# GCN-NEXT: V_MFMA
23752390
name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca
23762391
body: |
@@ -2386,7 +2401,8 @@ body: |
23862401
# GCN: V_MFMA
23872402
# GCN-NEXT: S_NOP 7
23882403
# GCN-NEXT: S_NOP 7
2389-
# GCN-NEXT: S_NOP 2
2404+
# GFX940-NEXT: S_NOP 2
2405+
# GFX950-NEXT: S_NOP 3
23902406
# GCN-NEXT: V_MFMA
23912407
name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb
23922408
body: |
@@ -2456,7 +2472,8 @@ body: |
24562472
# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srca
24572473
# GCN: V_MFMA
24582474
# GCN-NEXT: S_NOP 7
2459-
# GCN-NEXT: S_NOP 2
2475+
# GFX940-NEXT: S_NOP 2
2476+
# GFX950-NEXT: S_NOP 3
24602477
# GCN-NEXT: V_MFMA
24612478
name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srca
24622479
body: |
@@ -2470,7 +2487,8 @@ body: |
24702487
# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcb
24712488
# GCN: V_MFMA
24722489
# GCN-NEXT: S_NOP 7
2473-
# GCN-NEXT: S_NOP 2
2490+
# GFX940-NEXT: S_NOP 2
2491+
# GFX950-NEXT: S_NOP 3
24742492
# GCN-NEXT: V_MFMA
24752493
name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcb
24762494
body: |
@@ -2502,7 +2520,8 @@ body: |
25022520
# GCN: V_MFMA
25032521
# GCN-NEXT: S_NOP 7
25042522
# GCN-NEXT: S_NOP 7
2505-
# GCN-NEXT: S_NOP 2
2523+
# GFX940-NEXT: S_NOP 2
2524+
# GFX950-NEXT: S_NOP 3
25062525
# GCN-NEXT: V_MFMA
25072526
name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srca
25082527
body: |
@@ -2519,7 +2538,8 @@ body: |
25192538
# GCN: V_MFMA
25202539
# GCN-NEXT: S_NOP 7
25212540
# GCN-NEXT: S_NOP 7
2522-
# GCN-NEXT: S_NOP 2
2541+
# GFX940-NEXT: S_NOP 2
2542+
# GFX950-NEXT: S_NOP 3
25232543
# GCN-NEXT: V_MFMA
25242544
name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srcb
25252545
body: |

llvm/test/CodeGen/AMDGPU/mai-hazards-mfma-scale.gfx950.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ body: |
254254
; GCN-NEXT: {{ $}}
255255
; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 0, 0, $sgpr4, $vgpr21, 12, 4, implicit $mode, implicit $exec
256256
; GCN-NEXT: S_NOP 7
257-
; GCN-NEXT: S_NOP 2
257+
; GCN-NEXT: S_NOP 3
258258
; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, killed $sgpr4, killed $vgpr21, 12, 4, implicit $mode, implicit $exec
259259
; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
260260
renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 0, 0, $sgpr4, $vgpr21, 12, 4, implicit $mode, implicit $exec
@@ -275,7 +275,7 @@ body: |
275275
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4
276276
; GCN-NEXT: {{ $}}
277277
; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 2, 2, $sgpr4, $vgpr21, 12, 4, implicit $mode, implicit $exec
278-
; GCN-NEXT: S_NOP 6
278+
; GCN-NEXT: S_NOP 7
279279
; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, killed $sgpr4, killed $vgpr21, 12, 4, implicit $mode, implicit $exec
280280
; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
281281
renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 2, 2, $sgpr4, $vgpr21, 12, 4, implicit $mode, implicit $exec

0 commit comments

Comments
 (0)