Skip to content

Commit f548c4d

Browse files
committed
AMDGPU: Add mode register use to s_getreg_b32
This should fix reading the wrong mode after setting the mode. Ideally we would have separate pseudos for the case that we know does not read mode.
1 parent 9eb91f4 commit f548c4d

File tree

3 files changed

+152
-24
lines changed

3 files changed

+152
-24
lines changed

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,14 +1110,15 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo <
11101110

11111111
// This is hasSideEffects to allow its use in readcyclecounter selection.
11121112
// FIXME: Need to truncate immediate to 16-bits.
1113-
// FIXME: Missing mode register use. Should have separate pseudos for
1114-
// known may read MODE and only read MODE.
1113+
// FIXME: Should have separate pseudos for known may read MODE and
1114+
// only read MODE.
11151115
def S_GETREG_B32 : SOPK_Pseudo <
11161116
"s_getreg_b32",
11171117
(outs SReg_32:$sdst), (ins hwreg:$simm16),
11181118
"$sdst, $simm16",
11191119
[(set i32:$sdst, (int_amdgcn_s_getreg (i32 timm:$simm16)))]> {
11201120
let hasSideEffects = 1;
1121+
let Uses = [MODE];
11211122
}
11221123

11231124
let Defs = [MODE], Uses = [MODE] in {

llvm/test/CodeGen/AMDGPU/fdiv.ll

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2417,12 +2417,12 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 {
24172417
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
24182418
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
24192419
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
2420+
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
24202421
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
24212422
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
24222423
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
24232424
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
24242425
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
2425-
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
24262426
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
24272427
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
24282428
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -2455,12 +2455,12 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 {
24552455
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
24562456
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
24572457
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
2458+
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
24582459
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
24592460
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
24602461
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
24612462
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
24622463
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
2463-
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
24642464
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
24652465
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
24662466
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -2727,12 +2727,12 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 {
27272727
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
27282728
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
27292729
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
2730+
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
27302731
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
27312732
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
27322733
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
27332734
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
27342735
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
2735-
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
27362736
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
27372737
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
27382738
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -2765,12 +2765,12 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 {
27652765
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
27662766
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
27672767
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
2768+
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
27682769
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
27692770
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
27702771
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
27712772
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
27722773
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
2773-
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
27742774
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
27752775
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
27762776
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -3294,12 +3294,12 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z)
32943294
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0
32953295
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3
32963296
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0
3297+
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
32973298
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
32983299
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v3, v4, 1.0
32993300
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v6, v4, v4
33003301
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v6, v5, v4
33013302
; GFX6-FASTFMA-NEXT: v_fma_f32 v7, -v3, v6, v5
3302-
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
33033303
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, v7, v4, v6
33043304
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v3, v6, v5
33053305
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -3334,12 +3334,12 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z)
33343334
; GFX7-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0
33353335
; GFX7-NEXT: v_rcp_f32_e32 v4, v3
33363336
; GFX7-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0
3337+
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
33373338
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
33383339
; GFX7-NEXT: v_fma_f32 v6, -v3, v4, 1.0
33393340
; GFX7-NEXT: v_fma_f32 v4, v6, v4, v4
33403341
; GFX7-NEXT: v_mul_f32_e32 v6, v5, v4
33413342
; GFX7-NEXT: v_fma_f32 v7, -v3, v6, v5
3342-
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
33433343
; GFX7-NEXT: v_fma_f32 v6, v7, v4, v6
33443344
; GFX7-NEXT: v_fma_f32 v3, -v3, v6, v5
33453345
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -3868,12 +3868,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 {
38683868
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
38693869
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
38703870
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
3871+
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
38713872
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
38723873
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
38733874
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
38743875
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
38753876
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
3876-
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
38773877
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
38783878
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
38793879
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -3906,12 +3906,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 {
39063906
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
39073907
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
39083908
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
3909+
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
39093910
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
39103911
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
39113912
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
39123913
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
39133914
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
3914-
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
39153915
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
39163916
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
39173917
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -4434,12 +4434,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y
44344434
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0
44354435
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3
44364436
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0
4437+
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
44374438
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
44384439
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v3, v4, 1.0
44394440
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v6, v4, v4
44404441
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v6, v5, v4
44414442
; GFX6-FASTFMA-NEXT: v_fma_f32 v7, -v3, v6, v5
4442-
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
44434443
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, v7, v4, v6
44444444
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v3, v6, v5
44454445
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -4474,12 +4474,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y
44744474
; GFX7-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0
44754475
; GFX7-NEXT: v_rcp_f32_e32 v4, v3
44764476
; GFX7-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0
4477+
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
44774478
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
44784479
; GFX7-NEXT: v_fma_f32 v6, -v3, v4, 1.0
44794480
; GFX7-NEXT: v_fma_f32 v4, v6, v4, v4
44804481
; GFX7-NEXT: v_mul_f32_e32 v6, v5, v4
44814482
; GFX7-NEXT: v_fma_f32 v7, -v3, v6, v5
4482-
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
44834483
; GFX7-NEXT: v_fma_f32 v6, v7, v4, v6
44844484
; GFX7-NEXT: v_fma_f32 v3, -v3, v6, v5
44854485
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -5010,12 +5010,12 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 {
50105010
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0
50115011
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
50125012
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, -v0, v1, -v0
5013+
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
50135014
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
50145015
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
50155016
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
50165017
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
50175018
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
5018-
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
50195019
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
50205020
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
50215021
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -5048,12 +5048,12 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 {
50485048
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0
50495049
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
50505050
; GFX7-NEXT: v_div_scale_f32 v4, vcc, -v0, v1, -v0
5051+
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
50515052
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
50525053
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
50535054
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
50545055
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
50555056
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
5056-
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
50575057
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
50585058
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
50595059
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -5569,12 +5569,12 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 {
55695569
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
55705570
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
55715571
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, -v1, v0
5572+
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
55725573
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
55735574
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
55745575
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
55755576
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
55765577
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
5577-
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
55785578
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
55795579
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
55805580
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -5607,12 +5607,12 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 {
56075607
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
56085608
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
56095609
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, -v1, v0
5610+
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
56105611
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
56115612
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
56125613
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
56135614
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
56145615
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
5615-
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
56165616
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
56175617
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
56185618
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -6113,12 +6113,12 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 {
61136113
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0
61146114
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1
61156115
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0
6116+
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
61166117
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
61176118
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0
61186119
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2
61196120
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2
61206121
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3
6121-
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
61226122
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4
61236123
; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3
61246124
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -6153,12 +6153,12 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 {
61536153
; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0
61546154
; GFX7-NEXT: v_rcp_f32_e32 v2, v1
61556155
; GFX7-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0
6156+
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
61566157
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
61576158
; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0
61586159
; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2
61596160
; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2
61606161
; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3
6161-
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
61626162
; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4
61636163
; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3
61646164
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -6619,12 +6619,12 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 {
66196619
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6
66206620
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1
66216621
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6
6622+
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
66226623
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
66236624
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0
66246625
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2
66256626
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2
66266627
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3
6627-
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
66286628
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4
66296629
; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3
66306630
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -6659,12 +6659,12 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 {
66596659
; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6
66606660
; GFX7-NEXT: v_rcp_f32_e32 v2, v1
66616661
; GFX7-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6
6662+
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
66626663
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
66636664
; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0
66646665
; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2
66656666
; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2
66666667
; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3
6667-
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
66686668
; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4
66696669
; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3
66706670
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -7168,12 +7168,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
71687168
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
71697169
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
71707170
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
7171+
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
71717172
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
71727173
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
71737174
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
71747175
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
71757176
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
7176-
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
71777177
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
71787178
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
71797179
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -7206,12 +7206,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
72067206
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
72077207
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
72087208
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
7209+
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
72097210
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
72107211
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
72117212
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
72127213
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
72137214
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
7214-
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
72157215
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
72167216
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
72177217
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -7721,12 +7721,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
77217721
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
77227722
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
77237723
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
7724+
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
77247725
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
77257726
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
77267727
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
77277728
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
77287729
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
7729-
; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
77307730
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
77317731
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
77327732
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -7759,12 +7759,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
77597759
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
77607760
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
77617761
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
7762+
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
77627763
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
77637764
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
77647765
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
77657766
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
77667767
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
7767-
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
77687768
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
77697769
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
77707770
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4

0 commit comments

Comments
 (0)