@@ -2417,12 +2417,12 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 {
2417
2417
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
2418
2418
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
2419
2419
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
2420
+ ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
2420
2421
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2421
2422
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
2422
2423
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
2423
2424
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
2424
2425
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
2425
- ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
2426
2426
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
2427
2427
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
2428
2428
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -2455,12 +2455,12 @@ define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 {
2455
2455
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
2456
2456
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
2457
2457
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
2458
+ ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
2458
2459
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2459
2460
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
2460
2461
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
2461
2462
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
2462
2463
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
2463
- ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
2464
2464
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
2465
2465
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
2466
2466
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -2727,12 +2727,12 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 {
2727
2727
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
2728
2728
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
2729
2729
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
2730
+ ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
2730
2731
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2731
2732
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
2732
2733
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
2733
2734
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
2734
2735
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
2735
- ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
2736
2736
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
2737
2737
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
2738
2738
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -2765,12 +2765,12 @@ define float @v_fdiv_f32_dynamic(float %x, float %y) #2 {
2765
2765
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
2766
2766
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
2767
2767
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
2768
+ ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
2768
2769
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2769
2770
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
2770
2771
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
2771
2772
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
2772
2773
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
2773
- ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
2774
2774
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
2775
2775
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
2776
2776
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -3294,12 +3294,12 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z)
3294
3294
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0
3295
3295
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3
3296
3296
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0
3297
+ ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
3297
3298
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
3298
3299
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v3, v4, 1.0
3299
3300
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v6, v4, v4
3300
3301
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v6, v5, v4
3301
3302
; GFX6-FASTFMA-NEXT: v_fma_f32 v7, -v3, v6, v5
3302
- ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
3303
3303
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, v7, v4, v6
3304
3304
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v3, v6, v5
3305
3305
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -3334,12 +3334,12 @@ define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z)
3334
3334
; GFX7-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0
3335
3335
; GFX7-NEXT: v_rcp_f32_e32 v4, v3
3336
3336
; GFX7-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0
3337
+ ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
3337
3338
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
3338
3339
; GFX7-NEXT: v_fma_f32 v6, -v3, v4, 1.0
3339
3340
; GFX7-NEXT: v_fma_f32 v4, v6, v4, v4
3340
3341
; GFX7-NEXT: v_mul_f32_e32 v6, v5, v4
3341
3342
; GFX7-NEXT: v_fma_f32 v7, -v3, v6, v5
3342
- ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
3343
3343
; GFX7-NEXT: v_fma_f32 v6, v7, v4, v6
3344
3344
; GFX7-NEXT: v_fma_f32 v3, -v3, v6, v5
3345
3345
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -3868,12 +3868,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 {
3868
3868
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
3869
3869
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
3870
3870
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
3871
+ ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
3871
3872
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
3872
3873
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
3873
3874
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
3874
3875
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
3875
3876
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
3876
- ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
3877
3877
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
3878
3878
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
3879
3879
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -3906,12 +3906,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 {
3906
3906
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
3907
3907
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
3908
3908
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
3909
+ ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
3909
3910
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
3910
3911
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
3911
3912
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
3912
3913
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
3913
3914
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
3914
- ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
3915
3915
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
3916
3916
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
3917
3917
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -4434,12 +4434,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y
4434
4434
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0
4435
4435
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3
4436
4436
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0
4437
+ ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
4437
4438
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
4438
4439
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v3, v4, 1.0
4439
4440
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v6, v4, v4
4440
4441
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v6, v5, v4
4441
4442
; GFX6-FASTFMA-NEXT: v_fma_f32 v7, -v3, v6, v5
4442
- ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
4443
4443
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, v7, v4, v6
4444
4444
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v3, v6, v5
4445
4445
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -4474,12 +4474,12 @@ define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y
4474
4474
; GFX7-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0
4475
4475
; GFX7-NEXT: v_rcp_f32_e32 v4, v3
4476
4476
; GFX7-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0
4477
+ ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
4477
4478
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
4478
4479
; GFX7-NEXT: v_fma_f32 v6, -v3, v4, 1.0
4479
4480
; GFX7-NEXT: v_fma_f32 v4, v6, v4, v4
4480
4481
; GFX7-NEXT: v_mul_f32_e32 v6, v5, v4
4481
4482
; GFX7-NEXT: v_fma_f32 v7, -v3, v6, v5
4482
- ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
4483
4483
; GFX7-NEXT: v_fma_f32 v6, v7, v4, v6
4484
4484
; GFX7-NEXT: v_fma_f32 v3, -v3, v6, v5
4485
4485
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -5010,12 +5010,12 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 {
5010
5010
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0
5011
5011
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
5012
5012
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, -v0, v1, -v0
5013
+ ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
5013
5014
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5014
5015
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
5015
5016
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
5016
5017
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
5017
5018
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
5018
- ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
5019
5019
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
5020
5020
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
5021
5021
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -5048,12 +5048,12 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 {
5048
5048
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0
5049
5049
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
5050
5050
; GFX7-NEXT: v_div_scale_f32 v4, vcc, -v0, v1, -v0
5051
+ ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
5051
5052
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5052
5053
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
5053
5054
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
5054
5055
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
5055
5056
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
5056
- ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
5057
5057
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
5058
5058
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
5059
5059
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -5569,12 +5569,12 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 {
5569
5569
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
5570
5570
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
5571
5571
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, -v1, v0
5572
+ ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
5572
5573
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5573
5574
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
5574
5575
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
5575
5576
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
5576
5577
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
5577
- ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
5578
5578
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
5579
5579
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
5580
5580
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -5607,12 +5607,12 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 {
5607
5607
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
5608
5608
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
5609
5609
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, -v1, v0
5610
+ ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
5610
5611
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5611
5612
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
5612
5613
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
5613
5614
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
5614
5615
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
5615
- ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
5616
5616
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
5617
5617
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
5618
5618
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -6113,12 +6113,12 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 {
6113
6113
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0
6114
6114
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1
6115
6115
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0
6116
+ ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
6116
6117
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6117
6118
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0
6118
6119
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2
6119
6120
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2
6120
6121
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3
6121
- ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
6122
6122
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4
6123
6123
; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3
6124
6124
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -6153,12 +6153,12 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 {
6153
6153
; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0
6154
6154
; GFX7-NEXT: v_rcp_f32_e32 v2, v1
6155
6155
; GFX7-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0
6156
+ ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
6156
6157
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6157
6158
; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0
6158
6159
; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2
6159
6160
; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2
6160
6161
; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3
6161
- ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
6162
6162
; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4
6163
6163
; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3
6164
6164
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -6619,12 +6619,12 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 {
6619
6619
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6
6620
6620
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1
6621
6621
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6
6622
+ ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
6622
6623
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6623
6624
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0
6624
6625
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2
6625
6626
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2
6626
6627
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3
6627
- ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
6628
6628
; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4
6629
6629
; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3
6630
6630
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -6659,12 +6659,12 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 {
6659
6659
; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6
6660
6660
; GFX7-NEXT: v_rcp_f32_e32 v2, v1
6661
6661
; GFX7-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6
6662
+ ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
6662
6663
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6663
6664
; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0
6664
6665
; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2
6665
6666
; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2
6666
6667
; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3
6667
- ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
6668
6668
; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4
6669
6669
; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3
6670
6670
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -7168,12 +7168,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
7168
7168
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
7169
7169
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
7170
7170
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
7171
+ ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
7171
7172
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7172
7173
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
7173
7174
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
7174
7175
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
7175
7176
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
7176
- ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
7177
7177
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
7178
7178
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
7179
7179
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -7206,12 +7206,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
7206
7206
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
7207
7207
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
7208
7208
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
7209
+ ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
7209
7210
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7210
7211
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
7211
7212
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
7212
7213
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
7213
7214
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
7214
- ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
7215
7215
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
7216
7216
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
7217
7217
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -7721,12 +7721,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
7721
7721
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
7722
7722
; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2
7723
7723
; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
7724
+ ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
7724
7725
; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7725
7726
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0
7726
7727
; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3
7727
7728
; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3
7728
7729
; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4
7729
- ; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
7730
7730
; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5
7731
7731
; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4
7732
7732
; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
@@ -7759,12 +7759,12 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
7759
7759
; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0
7760
7760
; GFX7-NEXT: v_rcp_f32_e32 v3, v2
7761
7761
; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0
7762
+ ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
7762
7763
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7763
7764
; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0
7764
7765
; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3
7765
7766
; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3
7766
7767
; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4
7767
- ; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
7768
7768
; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5
7769
7769
; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4
7770
7770
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
0 commit comments