@@ -80,11 +80,10 @@ define amdgpu_ps i7 @s_saddsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
80
80
; GFX8-NEXT: s_min_i32 s2, s2, 0
81
81
; GFX8-NEXT: s_lshl_b32 s1, s1, 9
82
82
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
83
+ ; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
83
84
; GFX8-NEXT: s_sext_i32_i16 s2, s2
84
85
; GFX8-NEXT: s_sext_i32_i16 s1, s1
85
- ; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
86
86
; GFX8-NEXT: s_max_i32 s1, s2, s1
87
- ; GFX8-NEXT: s_sext_i32_i16 s1, s1
88
87
; GFX8-NEXT: s_sext_i32_i16 s2, s3
89
88
; GFX8-NEXT: s_min_i32 s1, s1, s2
90
89
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -189,11 +188,10 @@ define amdgpu_ps i8 @s_saddsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
189
188
; GFX8-NEXT: s_min_i32 s2, s2, 0
190
189
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
191
190
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
191
+ ; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
192
192
; GFX8-NEXT: s_sext_i32_i16 s2, s2
193
193
; GFX8-NEXT: s_sext_i32_i16 s1, s1
194
- ; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
195
194
; GFX8-NEXT: s_max_i32 s1, s2, s1
196
- ; GFX8-NEXT: s_sext_i32_i16 s1, s1
197
195
; GFX8-NEXT: s_sext_i32_i16 s2, s3
198
196
; GFX8-NEXT: s_min_i32 s1, s1, s2
199
197
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -386,11 +384,10 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
386
384
; GFX8-NEXT: s_lshr_b32 s3, s1, 8
387
385
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
388
386
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
387
+ ; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
389
388
; GFX8-NEXT: s_sext_i32_i16 s4, s4
390
389
; GFX8-NEXT: s_sext_i32_i16 s1, s1
391
- ; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
392
390
; GFX8-NEXT: s_max_i32 s1, s4, s1
393
- ; GFX8-NEXT: s_sext_i32_i16 s1, s1
394
391
; GFX8-NEXT: s_sext_i32_i16 s4, s5
395
392
; GFX8-NEXT: s_min_i32 s1, s1, s4
396
393
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -400,11 +397,10 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
400
397
; GFX8-NEXT: s_max_i32 s4, s3, 0
401
398
; GFX8-NEXT: s_min_i32 s3, s3, 0
402
399
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
400
+ ; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
403
401
; GFX8-NEXT: s_sext_i32_i16 s3, s3
404
402
; GFX8-NEXT: s_sext_i32_i16 s2, s2
405
- ; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
406
403
; GFX8-NEXT: s_max_i32 s2, s3, s2
407
- ; GFX8-NEXT: s_sext_i32_i16 s2, s2
408
404
; GFX8-NEXT: s_sext_i32_i16 s3, s4
409
405
; GFX8-NEXT: s_min_i32 s2, s2, s3
410
406
; GFX8-NEXT: s_add_i32 s1, s1, s2
@@ -787,11 +783,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
787
783
; GFX8-NEXT: s_lshr_b32 s7, s1, 24
788
784
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
789
785
; GFX8-NEXT: s_sub_i32 s8, 0x8000, s8
786
+ ; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
790
787
; GFX8-NEXT: s_sext_i32_i16 s8, s8
791
788
; GFX8-NEXT: s_sext_i32_i16 s1, s1
792
- ; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
793
789
; GFX8-NEXT: s_max_i32 s1, s8, s1
794
- ; GFX8-NEXT: s_sext_i32_i16 s1, s1
795
790
; GFX8-NEXT: s_sext_i32_i16 s8, s9
796
791
; GFX8-NEXT: s_min_i32 s1, s1, s8
797
792
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -801,11 +796,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
801
796
; GFX8-NEXT: s_max_i32 s8, s5, 0
802
797
; GFX8-NEXT: s_min_i32 s5, s5, 0
803
798
; GFX8-NEXT: s_sub_i32 s5, 0x8000, s5
799
+ ; GFX8-NEXT: s_sub_i32 s8, 0x7fff, s8
804
800
; GFX8-NEXT: s_sext_i32_i16 s5, s5
805
801
; GFX8-NEXT: s_sext_i32_i16 s2, s2
806
- ; GFX8-NEXT: s_sub_i32 s8, 0x7fff, s8
807
802
; GFX8-NEXT: s_max_i32 s2, s5, s2
808
- ; GFX8-NEXT: s_sext_i32_i16 s2, s2
809
803
; GFX8-NEXT: s_sext_i32_i16 s5, s8
810
804
; GFX8-NEXT: s_min_i32 s2, s2, s5
811
805
; GFX8-NEXT: s_add_i32 s1, s1, s2
@@ -815,11 +809,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
815
809
; GFX8-NEXT: s_max_i32 s6, s5, 0
816
810
; GFX8-NEXT: s_min_i32 s5, s5, 0
817
811
; GFX8-NEXT: s_sub_i32 s5, 0x8000, s5
812
+ ; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
818
813
; GFX8-NEXT: s_sext_i32_i16 s5, s5
819
814
; GFX8-NEXT: s_sext_i32_i16 s3, s3
820
- ; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
821
815
; GFX8-NEXT: s_max_i32 s3, s5, s3
822
- ; GFX8-NEXT: s_sext_i32_i16 s3, s3
823
816
; GFX8-NEXT: s_sext_i32_i16 s5, s6
824
817
; GFX8-NEXT: s_min_i32 s3, s3, s5
825
818
; GFX8-NEXT: s_add_i32 s2, s2, s3
@@ -829,14 +822,13 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
829
822
; GFX8-NEXT: s_min_i32 s5, s5, 0
830
823
; GFX8-NEXT: s_lshl_b32 s4, s7, 8
831
824
; GFX8-NEXT: s_sub_i32 s5, 0x8000, s5
832
- ; GFX8-NEXT: s_sext_i32_i16 s5, s5
833
- ; GFX8-NEXT: s_sext_i32_i16 s4, s4
834
825
; GFX8-NEXT: s_sext_i32_i16 s1, s1
835
826
; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
836
- ; GFX8-NEXT: s_max_i32 s4, s5, s4
827
+ ; GFX8-NEXT: s_sext_i32_i16 s5, s5
828
+ ; GFX8-NEXT: s_sext_i32_i16 s4, s4
837
829
; GFX8-NEXT: s_sext_i32_i16 s0, s0
838
830
; GFX8-NEXT: s_ashr_i32 s1, s1, 8
839
- ; GFX8-NEXT: s_sext_i32_i16 s4, s4
831
+ ; GFX8-NEXT: s_max_i32 s4, s5 , s4
840
832
; GFX8-NEXT: s_sext_i32_i16 s5, s6
841
833
; GFX8-NEXT: s_ashr_i32 s0, s0, 8
842
834
; GFX8-NEXT: s_sext_i32_i16 s2, s2
@@ -2631,11 +2623,10 @@ define amdgpu_ps i16 @s_saddsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
2631
2623
; GFX8-NEXT: s_max_i32 s3, s2, 0
2632
2624
; GFX8-NEXT: s_min_i32 s2, s2, 0
2633
2625
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
2626
+ ; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
2634
2627
; GFX8-NEXT: s_sext_i32_i16 s2, s2
2635
2628
; GFX8-NEXT: s_sext_i32_i16 s1, s1
2636
- ; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
2637
2629
; GFX8-NEXT: s_max_i32 s1, s2, s1
2638
- ; GFX8-NEXT: s_sext_i32_i16 s1, s1
2639
2630
; GFX8-NEXT: s_sext_i32_i16 s2, s3
2640
2631
; GFX8-NEXT: s_min_i32 s1, s1, s2
2641
2632
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -2835,11 +2826,10 @@ define amdgpu_ps i32 @s_saddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
2835
2826
; GFX8-NEXT: s_max_i32 s4, s3, 0
2836
2827
; GFX8-NEXT: s_min_i32 s3, s3, 0
2837
2828
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
2829
+ ; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
2838
2830
; GFX8-NEXT: s_sext_i32_i16 s3, s3
2839
2831
; GFX8-NEXT: s_sext_i32_i16 s5, s1
2840
- ; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
2841
2832
; GFX8-NEXT: s_max_i32 s3, s3, s5
2842
- ; GFX8-NEXT: s_sext_i32_i16 s3, s3
2843
2833
; GFX8-NEXT: s_sext_i32_i16 s4, s4
2844
2834
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
2845
2835
; GFX8-NEXT: s_min_i32 s3, s3, s4
@@ -3190,11 +3180,10 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
3190
3180
; GFX8-NEXT: s_max_i32 s7, s6, 0
3191
3181
; GFX8-NEXT: s_min_i32 s6, s6, 0
3192
3182
; GFX8-NEXT: s_sub_i32 s6, 0x8000, s6
3183
+ ; GFX8-NEXT: s_sub_i32 s7, 0x7fff, s7
3193
3184
; GFX8-NEXT: s_sext_i32_i16 s6, s6
3194
3185
; GFX8-NEXT: s_sext_i32_i16 s8, s2
3195
- ; GFX8-NEXT: s_sub_i32 s7, 0x7fff, s7
3196
3186
; GFX8-NEXT: s_max_i32 s6, s6, s8
3197
- ; GFX8-NEXT: s_sext_i32_i16 s6, s6
3198
3187
; GFX8-NEXT: s_sext_i32_i16 s7, s7
3199
3188
; GFX8-NEXT: s_lshr_b32 s4, s0, 16
3200
3189
; GFX8-NEXT: s_min_i32 s6, s6, s7
@@ -3215,11 +3204,10 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
3215
3204
; GFX8-NEXT: s_max_i32 s6, s2, 0
3216
3205
; GFX8-NEXT: s_min_i32 s2, s2, 0
3217
3206
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
3207
+ ; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
3218
3208
; GFX8-NEXT: s_sext_i32_i16 s2, s2
3219
3209
; GFX8-NEXT: s_sext_i32_i16 s7, s3
3220
- ; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
3221
3210
; GFX8-NEXT: s_max_i32 s2, s2, s7
3222
- ; GFX8-NEXT: s_sext_i32_i16 s2, s2
3223
3211
; GFX8-NEXT: s_sext_i32_i16 s6, s6
3224
3212
; GFX8-NEXT: s_lshr_b32 s5, s1, 16
3225
3213
; GFX8-NEXT: s_min_i32 s2, s2, s6
@@ -3513,11 +3501,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
3513
3501
; GFX8-NEXT: s_max_i32 s10, s9, 0
3514
3502
; GFX8-NEXT: s_min_i32 s9, s9, 0
3515
3503
; GFX8-NEXT: s_sub_i32 s9, 0x8000, s9
3504
+ ; GFX8-NEXT: s_sub_i32 s10, 0x7fff, s10
3516
3505
; GFX8-NEXT: s_sext_i32_i16 s9, s9
3517
3506
; GFX8-NEXT: s_sext_i32_i16 s11, s3
3518
- ; GFX8-NEXT: s_sub_i32 s10, 0x7fff, s10
3519
3507
; GFX8-NEXT: s_max_i32 s9, s9, s11
3520
- ; GFX8-NEXT: s_sext_i32_i16 s9, s9
3521
3508
; GFX8-NEXT: s_sext_i32_i16 s10, s10
3522
3509
; GFX8-NEXT: s_lshr_b32 s6, s0, 16
3523
3510
; GFX8-NEXT: s_min_i32 s9, s9, s10
@@ -3538,11 +3525,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
3538
3525
; GFX8-NEXT: s_max_i32 s9, s3, 0
3539
3526
; GFX8-NEXT: s_min_i32 s3, s3, 0
3540
3527
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
3528
+ ; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
3541
3529
; GFX8-NEXT: s_sext_i32_i16 s3, s3
3542
3530
; GFX8-NEXT: s_sext_i32_i16 s10, s4
3543
- ; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
3544
3531
; GFX8-NEXT: s_max_i32 s3, s3, s10
3545
- ; GFX8-NEXT: s_sext_i32_i16 s3, s3
3546
3532
; GFX8-NEXT: s_sext_i32_i16 s9, s9
3547
3533
; GFX8-NEXT: s_lshr_b32 s7, s1, 16
3548
3534
; GFX8-NEXT: s_min_i32 s3, s3, s9
@@ -3563,11 +3549,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
3563
3549
; GFX8-NEXT: s_max_i32 s4, s3, 0
3564
3550
; GFX8-NEXT: s_min_i32 s3, s3, 0
3565
3551
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
3552
+ ; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
3566
3553
; GFX8-NEXT: s_sext_i32_i16 s3, s3
3567
3554
; GFX8-NEXT: s_sext_i32_i16 s9, s5
3568
- ; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
3569
3555
; GFX8-NEXT: s_max_i32 s3, s3, s9
3570
- ; GFX8-NEXT: s_sext_i32_i16 s3, s3
3571
3556
; GFX8-NEXT: s_sext_i32_i16 s4, s4
3572
3557
; GFX8-NEXT: s_lshr_b32 s8, s2, 16
3573
3558
; GFX8-NEXT: s_min_i32 s3, s3, s4
@@ -3924,11 +3909,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
3924
3909
; GFX8-NEXT: s_max_i32 s13, s12, 0
3925
3910
; GFX8-NEXT: s_min_i32 s12, s12, 0
3926
3911
; GFX8-NEXT: s_sub_i32 s12, 0x8000, s12
3912
+ ; GFX8-NEXT: s_sub_i32 s13, 0x7fff, s13
3927
3913
; GFX8-NEXT: s_sext_i32_i16 s12, s12
3928
3914
; GFX8-NEXT: s_sext_i32_i16 s14, s4
3929
- ; GFX8-NEXT: s_sub_i32 s13, 0x7fff, s13
3930
3915
; GFX8-NEXT: s_max_i32 s12, s12, s14
3931
- ; GFX8-NEXT: s_sext_i32_i16 s12, s12
3932
3916
; GFX8-NEXT: s_sext_i32_i16 s13, s13
3933
3917
; GFX8-NEXT: s_lshr_b32 s8, s0, 16
3934
3918
; GFX8-NEXT: s_min_i32 s12, s12, s13
@@ -3949,11 +3933,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
3949
3933
; GFX8-NEXT: s_max_i32 s12, s4, 0
3950
3934
; GFX8-NEXT: s_min_i32 s4, s4, 0
3951
3935
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
3936
+ ; GFX8-NEXT: s_sub_i32 s12, 0x7fff, s12
3952
3937
; GFX8-NEXT: s_sext_i32_i16 s4, s4
3953
3938
; GFX8-NEXT: s_sext_i32_i16 s13, s5
3954
- ; GFX8-NEXT: s_sub_i32 s12, 0x7fff, s12
3955
3939
; GFX8-NEXT: s_max_i32 s4, s4, s13
3956
- ; GFX8-NEXT: s_sext_i32_i16 s4, s4
3957
3940
; GFX8-NEXT: s_sext_i32_i16 s12, s12
3958
3941
; GFX8-NEXT: s_lshr_b32 s9, s1, 16
3959
3942
; GFX8-NEXT: s_min_i32 s4, s4, s12
@@ -3974,11 +3957,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
3974
3957
; GFX8-NEXT: s_max_i32 s5, s4, 0
3975
3958
; GFX8-NEXT: s_min_i32 s4, s4, 0
3976
3959
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
3960
+ ; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
3977
3961
; GFX8-NEXT: s_sext_i32_i16 s4, s4
3978
3962
; GFX8-NEXT: s_sext_i32_i16 s12, s6
3979
- ; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
3980
3963
; GFX8-NEXT: s_max_i32 s4, s4, s12
3981
- ; GFX8-NEXT: s_sext_i32_i16 s4, s4
3982
3964
; GFX8-NEXT: s_sext_i32_i16 s5, s5
3983
3965
; GFX8-NEXT: s_lshr_b32 s10, s2, 16
3984
3966
; GFX8-NEXT: s_min_i32 s4, s4, s5
@@ -3999,11 +3981,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
3999
3981
; GFX8-NEXT: s_max_i32 s5, s4, 0
4000
3982
; GFX8-NEXT: s_min_i32 s4, s4, 0
4001
3983
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
3984
+ ; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
4002
3985
; GFX8-NEXT: s_sext_i32_i16 s4, s4
4003
3986
; GFX8-NEXT: s_sext_i32_i16 s6, s7
4004
- ; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
4005
3987
; GFX8-NEXT: s_max_i32 s4, s4, s6
4006
- ; GFX8-NEXT: s_sext_i32_i16 s4, s4
4007
3988
; GFX8-NEXT: s_sext_i32_i16 s5, s5
4008
3989
; GFX8-NEXT: s_lshr_b32 s11, s3, 16
4009
3990
; GFX8-NEXT: s_min_i32 s4, s4, s5
0 commit comments